Ejemplo n.º 1
0
 def __init__(self, num_hidden, question, facts):
     self.question = question
     self.facts = facts
     # transposing for attention
     self.question_transposed = tf.transpose(question)
     self.facts_transposed = [tf.transpose(c) for c in facts]
     # parameters
     self.w1 = weight('w1', [num_hidden, 7 * num_hidden])
     self.b1 = bias('b1', [num_hidden, 1])
     self.w2 = weight('w2', [1, num_hidden])
     self.b2 = bias('b2', [1, 1])
     self.gru = tf.nn.rnn_cell.GRUCell(num_hidden)
Ejemplo n.º 2
0
 def __init__(self, num_hidden, question, facts, is_training, bn):
     self.question = question
     self.facts = tf.unstack(tf.transpose(facts, [1, 2, 0]))  # F x [d, N]
     # transposing for attention
     self.question_transposed = tf.transpose(question)
     self.facts_transposed = [tf.transpose(f)
                              for f in self.facts]  # F x [N, d]
     # parameters
     self.w1 = weight('w1', [num_hidden, 4 * num_hidden])
     self.b1 = bias('b1', [num_hidden, 1])
     self.w2 = weight('w2', [1, num_hidden])
     self.b2 = bias('b2', [1, 1])
     self.gru = AttnGRU(num_hidden, is_training, bn)
Ejemplo n.º 3
0
    def __init__(self, num_hidden, question, facts):
        self.question = question
        self.facts = facts

        # transposing for attention
        self.question_transposed = tf.transpose(question)
        self.facts_transposed = [tf.transpose(c) for c in facts]

        # parameters
        self.w1 = weight('w1', [num_hidden, 7 * num_hidden])
        self.b1 = bias('b1', [num_hidden, 1])
        self.w2 = weight('w2', [1, num_hidden])
        self.b2 = bias('b2', [1, 1])
        self.gru = tf.nn.rnn_cell.GRUCell(num_hidden)
Ejemplo n.º 4
0
    def __init__(self, num_hidden, question, facts, is_training, bn):
        self.question = question
        self.facts = tf.unpack(tf.transpose(facts, [1, 2, 0]))  # F x [d, N]

        # transposing for attention
        self.question_transposed = tf.transpose(question)
        self.facts_transposed = [tf.transpose(f) for f in self.facts]  # F x [N, d]

        # parameters
        self.w1 = weight('w1', [num_hidden, 4 * num_hidden])
        self.b1 = bias('b1', [num_hidden, 1])
        self.w2 = weight('w2', [1, num_hidden])
        self.b2 = bias('b2', [1, 1])
        self.gru = AttnGRU(num_hidden, is_training, bn)
Ejemplo n.º 5
0
    def __init__(self, num_hidden, question, facts, is_training, bn):

        ##### num_hidden - Hidden size of a episodic memory unit   question- what we are asking  facts - context with positional encoding
        ##### bn - Whether to use BN or not
        #### Facts are the output of the bidrectional RNN (fusion layer)
        self.question = question
        self.facts = tf.unpack(tf.transpose(
            facts,
            [1, 2, 0]))  # F x [d, N]  fact counts * [hidden * bactch size]
        print("I am inside")
        # transposing for attention
        self.question_transposed = tf.transpose(question)
        self.facts_transposed = [tf.transpose(f)
                                 for f in self.facts]  # F x [N, d]
        # parameters
        self.w1 = weight('w1', [num_hidden, 4 * num_hidden])
        self.b1 = bias('b1', [num_hidden, 1])
        self.w2 = weight('w2', [1, num_hidden])
        self.b2 = bias('b2', [1, 1])
        self.gru = AttnGRU(
            num_hidden, is_training, bn
        )  #this is the arrention (modified)GRU gate implemented in DMN model
Ejemplo n.º 6
0
    def _linear(self, x, h, bias_default=0.0):
        I, D = x.get_shape().as_list()[1], self._num_units
        w = weight("W", [I, D])
        u = weight("U", [D, D])
        b = bias("b", D, bias_default)

        if self.batch_norm:
            with tf.variable_scope("Linear1"):
                x_w = batch_norm(tf.matmul(x, w), is_training=self.is_training)
            with tf.variable_scope("Linear2"):
                h_u = batch_norm(tf.matmul(h, u), is_training=self.is_training)
            return x_w + h_u + b
        else:
            return tf.matmul(x, w) + tf.matmul(h, u) + b
Ejemplo n.º 7
0
    def _linear(self,
                x,
                h,
                bias_default=0.0
                ):  #this is to multiply the internal things with gates
        I, D = x.get_shape().as_list()[1], self._num_units
        w = weight('W', [I, D])
        u = weight('U', [D, D])
        b = bias('b', D, bias_default)

        if self.batch_normx:  #batch norm
            with tf.variable_scope('Linear1'):
                x_w = batch_norm(tf.matmul(x, w), is_training=self.is_training)
            with tf.variable_scope('Linear2'):
                h_u = batch_norm(tf.matmul(h, u), is_training=self.is_training)
            return x_w + h_u + b
        else:
            return tf.matmul(x, w) + tf.matmul(h, u) + b
Ejemplo n.º 8
0
    def build(self):
        params = self.params
        N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count
        V, d, A = params.embed_size, params.hidden_size, self.words.vocab_size

        # initialize self
        # placeholders
        input = tf.placeholder(
            'int32', shape=[N, F, L],
            name='x')  # [num_batch, fact_count, sentence_len]
        question = tf.placeholder('int32', shape=[N, Q],
                                  name='q')  # [num_batch, question_len]
        answer = tf.placeholder('int32', shape=[N],
                                name='y')  # [num_batch] - one word answer
        fact_counts = tf.placeholder('int64', shape=[N], name='fc')
        input_mask = tf.placeholder('float32', shape=[N, F, L, V], name='xm')
        is_training = tf.placeholder(tf.bool)
        self.att = tf.constant(0.)

        # Prepare parameters
        gru = rnn_cell.GRUCell(d)
        l = self.positional_encoding()
        embedding = weight('embedding', [A, V],
                           init='uniform',
                           range=3**(1 / 2))

        with tf.name_scope('SentenceReader'):
            input_list = tf.unpack(tf.transpose(input))  # L x [F, N]
            input_embed = []
            for facts in input_list:
                facts = tf.unpack(facts)
                embed = tf.pack([
                    tf.nn.embedding_lookup(embedding, w) for w in facts
                ])  # [F, N, V]
                input_embed.append(embed)

            # apply positional encoding
            input_embed = tf.transpose(tf.pack(input_embed),
                                       [2, 1, 0, 3])  # [N, F, L, V]
            encoded = l * input_embed * input_mask
            facts = tf.reduce_sum(encoded, 2)  # [N, F, V]

        # dropout time
        facts = dropout(facts, params.keep_prob, is_training)

        with tf.name_scope('InputFusion'):
            # Bidirectional RNN
            with tf.variable_scope('Forward'):
                forward_states, _ = tf.nn.dynamic_rnn(gru,
                                                      facts,
                                                      fact_counts,
                                                      dtype=tf.float32)

            with tf.variable_scope('Backward'):
                facts_reverse = tf.reverse_sequence(facts, fact_counts, 1)
                backward_states, _ = tf.nn.dynamic_rnn(gru,
                                                       facts_reverse,
                                                       fact_counts,
                                                       dtype=tf.float32)

            # Use forward and backward states both
            facts = forward_states + backward_states  # [N, F, d]

        with tf.variable_scope('Question'):
            ques_list = tf.unpack(tf.transpose(question))
            ques_embed = [
                tf.nn.embedding_lookup(embedding, w) for w in ques_list
            ]
            _, question_vec = tf.nn.rnn(gru, ques_embed, dtype=tf.float32)

        # Episodic Memory
        with tf.variable_scope('Episodic'):
            episode = EpisodeModule(d, question_vec, facts, is_training,
                                    params.batch_norm)
            memory = tf.identity(question_vec)

            for t in range(params.memory_step):
                with tf.variable_scope('Layer%d' % t) as scope:
                    if params.memory_update == 'gru':
                        memory = gru(episode.new(memory), memory)[0]
                    else:
                        # ReLU update
                        c = episode.new(memory)
                        concated = tf.concat(1, [memory, c, question_vec])

                        w_t = weight('w_t', [3 * d, d])
                        z = tf.matmul(concated, w_t)
                        if params.batch_norm:
                            z = batch_norm(z, is_training)
                        else:
                            b_t = bias('b_t', d)
                            z = z + b_t
                        memory = tf.nn.relu(z)  # [N, d]

                    scope.reuse_variables()

        # Regularizations
        if params.batch_norm:
            memory = batch_norm(memory, is_training=is_training)
        memory = dropout(memory, params.keep_prob, is_training)

        with tf.name_scope('Answer'):
            # Answer module : feed-forward version (for it is one word answer)
            w_a = weight('w_a', [d, A], init='xavier')
            logits = tf.matmul(memory, w_a)  # [N, A]

        with tf.name_scope('Loss'):
            # Cross-Entropy loss
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits, answer)
            loss = tf.reduce_mean(cross_entropy)
            total_loss = loss + params.weight_decay * tf.add_n(
                tf.get_collection('l2'))

        with tf.variable_scope('Accuracy'):
            # Accuracy
            predicts = tf.cast(tf.argmax(logits, 1), 'int32')
            corrects = tf.equal(predicts, answer)
            num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
            accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

        # Training
        optimizer = tf.train.AdamOptimizer(params.learning_rate)
        opt_op = optimizer.minimize(total_loss, global_step=self.global_step)

        # placeholders
        self.x = input
        self.xm = input_mask
        self.q = question
        self.y = answer
        self.fc = fact_counts
        self.is_training = is_training

        # tensors
        self.total_loss = total_loss
        self.num_corrects = num_corrects
        self.accuracy = accuracy
        self.opt_op = opt_op
Ejemplo n.º 9
0
 def _linear(self, x, h, bias_default=0.0):
     I, D = x.get_shape().as_list()[1], self._num_units
     w = weight('W', [I, D])
     u = weight('U', [D, D])
     b = bias('b', D, bias_default)
     return tf.matmul(x, w) + tf.matmul(h, u) + b
Ejemplo n.º 10
0
 def _linear(self, x, h, bias_default=0.0):
     I, D = x.get_shape().as_list()[1], self._num_units
     w = weight('W', [I, D])
     u = weight('U', [D, D])
     b = bias('b', D, bias_default)
     return tf.matmul(x, w) + tf.matmul(h, u) + b
Ejemplo n.º 11
0
    def build(self):
        params = self.params
        N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count
        V, d, A = params.embed_size, params.hidden_size, self.words.vocab_size

        # initialize self
        # placeholders
        input = tf.placeholder('int32', shape=[N, F, L], name='x')  # [num_batch, fact_count, sentence_len]
        question = tf.placeholder('int32', shape=[N, Q], name='q')  # [num_batch, question_len]
        answer = tf.placeholder('int32', shape=[N], name='y')  # [num_batch] - one word answer
        fact_counts = tf.placeholder('int64', shape=[N], name='fc')
        input_mask = tf.placeholder('float32', shape=[N, F, L, V], name='xm')
        is_training = tf.placeholder(tf.bool)
        self.att = tf.constant(0.)

        # Prepare parameters
        gru = rnn_cell.GRUCell(d)
        l = self.positional_encoding()
        embedding = weight('embedding', [A, V], init='uniform', range=3**(1/2))

        with tf.name_scope('SentenceReader'):
            input_list = tf.unpack(tf.transpose(input))  # L x [F, N]
            input_embed = []
            for facts in input_list:
                facts = tf.unpack(facts)
                embed = tf.pack([tf.nn.embedding_lookup(embedding, w) for w in facts])  # [F, N, V]
                input_embed.append(embed)

            # apply positional encoding
            input_embed = tf.transpose(tf.pack(input_embed), [2, 1, 0, 3])  # [N, F, L, V]
            encoded = l * input_embed * input_mask
            facts = tf.reduce_sum(encoded, 2)  # [N, F, V]

        # dropout time
        facts = dropout(facts, params.keep_prob, is_training)

        with tf.name_scope('InputFusion'):
            # Bidirectional RNN
            with tf.variable_scope('Forward'):
                forward_states, _ = tf.nn.dynamic_rnn(gru, facts, fact_counts, dtype=tf.float32)

            with tf.variable_scope('Backward'):
                facts_reverse = tf.reverse_sequence(facts, fact_counts, 1)
                backward_states, _ = tf.nn.dynamic_rnn(gru, facts_reverse, fact_counts, dtype=tf.float32)

            # Use forward and backward states both
            facts = forward_states + backward_states  # [N, F, d]

        with tf.variable_scope('Question'):
            ques_list = tf.unpack(tf.transpose(question))
            ques_embed = [tf.nn.embedding_lookup(embedding, w) for w in ques_list]
            _, question_vec = tf.nn.rnn(gru, ques_embed, dtype=tf.float32)

        # Episodic Memory
        with tf.variable_scope('Episodic'):
            episode = EpisodeModule(d, question_vec, facts, is_training, params.batch_norm)
            memory = tf.identity(question_vec)

            for t in range(params.memory_step):
                with tf.variable_scope('Layer%d' % t) as scope:
                    if params.memory_update == 'gru':
                        memory = gru(episode.new(memory), memory)[0]
                    else:
                        # ReLU update
                        c = episode.new(memory)
                        concated = tf.concat(1, [memory, c, question_vec])

                        w_t = weight('w_t', [3 * d, d])
                        z = tf.matmul(concated, w_t)
                        if params.batch_norm:
                            z = batch_norm(z, is_training)
                        else:
                            b_t = bias('b_t', d)
                            z = z + b_t
                        memory = tf.nn.relu(z)  # [N, d]

                    scope.reuse_variables()

        # Regularizations
        if params.batch_norm:
            memory = batch_norm(memory, is_training=is_training)
        memory = dropout(memory, params.keep_prob, is_training)

        with tf.name_scope('Answer'):
            # Answer module : feed-forward version (for it is one word answer)
            w_a = weight('w_a', [d, A], init='xavier')
            logits = tf.matmul(memory, w_a)  # [N, A]

        with tf.name_scope('Loss'):
            # Cross-Entropy loss
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, answer)
            loss = tf.reduce_mean(cross_entropy)
            total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2'))

        with tf.variable_scope('Accuracy'):
            # Accuracy
            predicts = tf.cast(tf.argmax(logits, 1), 'int32')
            corrects = tf.equal(predicts, answer)
            num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
            accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

        # Training
        optimizer = tf.train.AdamOptimizer(params.learning_rate)
        opt_op = optimizer.minimize(total_loss, global_step=self.global_step)

        # placeholders
        self.x = input
        self.xm = input_mask
        self.q = question
        self.y = answer
        self.fc = fact_counts
        self.is_training = is_training

        # tensors
        self.total_loss = total_loss
        self.num_corrects = num_corrects
        self.accuracy = accuracy
        self.opt_op = opt_op