Esempio n. 1
0
 def __init__(self, num_hidden, question, facts):
     self.question = question
     self.facts = facts
     # transposing for attention
     self.question_transposed = tf.transpose(question)
     self.facts_transposed = [tf.transpose(c) for c in facts]
     # parameters
     self.w1 = weight('w1', [num_hidden, 7 * num_hidden])
     self.b1 = bias('b1', [num_hidden, 1])
     self.w2 = weight('w2', [1, num_hidden])
     self.b2 = bias('b2', [1, 1])
     self.gru = tf.nn.rnn_cell.GRUCell(num_hidden)
Esempio n. 2
0
 def __init__(self, num_hidden, question, facts, is_training, bn):
     self.question = question
     self.facts = tf.unstack(tf.transpose(facts, [1, 2, 0]))  # F x [d, N]
     # transposing for attention
     self.question_transposed = tf.transpose(question)
     self.facts_transposed = [tf.transpose(f)
                              for f in self.facts]  # F x [N, d]
     # parameters
     self.w1 = weight('w1', [num_hidden, 4 * num_hidden])
     self.b1 = bias('b1', [num_hidden, 1])
     self.w2 = weight('w2', [1, num_hidden])
     self.b2 = bias('b2', [1, 1])
     self.gru = AttnGRU(num_hidden, is_training, bn)
    def __init__(self, num_hidden, question, facts):
        self.question = question
        self.facts = facts

        # transposing for attention
        self.question_transposed = tf.transpose(question)
        self.facts_transposed = [tf.transpose(c) for c in facts]

        # parameters
        self.w1 = weight('w1', [num_hidden, 7 * num_hidden])
        self.b1 = bias('b1', [num_hidden, 1])
        self.w2 = weight('w2', [1, num_hidden])
        self.b2 = bias('b2', [1, 1])
        self.gru = tf.nn.rnn_cell.GRUCell(num_hidden)
    def __init__(self, num_hidden, question, facts, is_training, bn):
        self.question = question
        self.facts = tf.unpack(tf.transpose(facts, [1, 2, 0]))  # F x [d, N]

        # transposing for attention
        self.question_transposed = tf.transpose(question)
        self.facts_transposed = [tf.transpose(f) for f in self.facts]  # F x [N, d]

        # parameters
        self.w1 = weight('w1', [num_hidden, 4 * num_hidden])
        self.b1 = bias('b1', [num_hidden, 1])
        self.w2 = weight('w2', [1, num_hidden])
        self.b2 = bias('b2', [1, 1])
        self.gru = AttnGRU(num_hidden, is_training, bn)
Esempio n. 5
0
    def _linear(self, x, h, bias_default=0.0):
        I, D = x.get_shape().as_list()[1], self._num_units
        w = weight("W", [I, D])
        u = weight("U", [D, D])
        b = bias("b", D, bias_default)

        if self.batch_norm:
            with tf.variable_scope("Linear1"):
                x_w = batch_norm(tf.matmul(x, w), is_training=self.is_training)
            with tf.variable_scope("Linear2"):
                h_u = batch_norm(tf.matmul(h, u), is_training=self.is_training)
            return x_w + h_u + b
        else:
            return tf.matmul(x, w) + tf.matmul(h, u) + b
Esempio n. 6
0
    def _linear(self,
                x,
                h,
                bias_default=0.0
                ):  #this is to multiply the internal things with gates
        I, D = x.get_shape().as_list()[1], self._num_units
        w = weight('W', [I, D])
        u = weight('U', [D, D])
        b = bias('b', D, bias_default)

        if self.batch_normx:  #batch norm
            with tf.variable_scope('Linear1'):
                x_w = batch_norm(tf.matmul(x, w), is_training=self.is_training)
            with tf.variable_scope('Linear2'):
                h_u = batch_norm(tf.matmul(h, u), is_training=self.is_training)
            return x_w + h_u + b
        else:
            return tf.matmul(x, w) + tf.matmul(h, u) + b
Esempio n. 7
0
    def __init__(self, num_hidden, question, facts, is_training, bn):

        ##### num_hidden - Hidden size of a episodic memory unit   question- what we are asking  facts - context with positional encoding
        ##### bn - Whether to use BN or not
        #### Facts are the output of the bidrectional RNN (fusion layer)
        self.question = question
        self.facts = tf.unpack(tf.transpose(
            facts,
            [1, 2, 0]))  # F x [d, N]  fact counts * [hidden * bactch size]
        print("I am inside")
        # transposing for attention
        self.question_transposed = tf.transpose(question)
        self.facts_transposed = [tf.transpose(f)
                                 for f in self.facts]  # F x [N, d]
        # parameters
        self.w1 = weight('w1', [num_hidden, 4 * num_hidden])
        self.b1 = bias('b1', [num_hidden, 1])
        self.w2 = weight('w2', [1, num_hidden])
        self.b2 = bias('b2', [1, 1])
        self.gru = AttnGRU(
            num_hidden, is_training, bn
        )  #this is the arrention (modified)GRU gate implemented in DMN model
Esempio n. 8
0
    def build(self):
        params = self.params
        N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count
        V, d, A = params.glove_size, params.hidden_size, self.words.vocab_size

        # initialize self
        # placeholders
        input = tf.placeholder(tf.float32, shape=[N, L, V], name='x')  # [num_batch, sentence_len, glove_dim]
        question = tf.placeholder(tf.float32, shape=[N, Q, V], name='q')  # [num_batch, sentence_len, glove_dim]
        answer = tf.placeholder(tf.int64, shape=[N], name='y')  # [num_batch] - one word answer
        input_mask = tf.placeholder(tf.bool, shape=[N, L], name='x_mask')  # [num_batch, sentence_len]
        is_training = tf.placeholder(tf.bool)

        # Prepare parameters
        gru = rnn_cell.GRUCell(d)

        # Input module
        with tf.variable_scope('input') as scope:
            input_list = self.make_decoder_batch_input(input)
            input_states, _ = seq2seq.rnn_decoder(input_list, gru.zero_state(N, tf.float32), gru)

            # Question module
            scope.reuse_variables()

            ques_list = self.make_decoder_batch_input(question)
            questions, _ = seq2seq.rnn_decoder(ques_list, gru.zero_state(N, tf.float32), gru)
            question_vec = questions[-1]  # use final state

        # Masking: to extract fact vectors at end of sentence. (details in paper)
        input_states = tf.transpose(tf.pack(input_states), [1, 0, 2])  # [N, L, D]
        facts = []
        for n in range(N):
            filtered = tf.boolean_mask(input_states[n, :, :], input_mask[n, :])  # [?, D]
            padding = tf.zeros(tf.pack([F - tf.shape(filtered)[0], d]))
            facts.append(tf.concat(0, [filtered, padding]))  # [F, D]

        facked = tf.pack(facts)  # packing for transpose... I hate TF so much
        facts = tf.unpack(tf.transpose(facked, [1, 0, 2]), num=F)  # F x [N, D]

        # Episodic Memory
        with tf.variable_scope('episodic') as scope:
            episode = EpisodeModule(d, question_vec, facts)

            memory = tf.identity(question_vec)
            for t in range(params.memory_step):
                memory = gru(episode.new(memory), memory)[0]
                scope.reuse_variables()

        # Regularizations
        if params.batch_norm:
            memory = batch_norm(memory, is_training=is_training)
        memory = dropout(memory, params.keep_prob, is_training)

        with tf.name_scope('Answer'):
            # Answer module : feed-forward version (for it is one word answer)
            w_a = weight('w_a', [d, A])
            logits = tf.matmul(memory, w_a)  # [N, A]

        with tf.name_scope('Loss'):
            # Cross-Entropy loss
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, answer)
            loss = tf.reduce_mean(cross_entropy)
            total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2'))

        with tf.variable_scope('Accuracy'):
            # Accuracy
            predicts = tf.cast(tf.argmax(logits, 1), 'int32')
            corrects = tf.equal(predicts, answer)
            num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
            accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

        # Training
        optimizer = tf.train.AdadeltaOptimizer(params.learning_rate)
        opt_op = optimizer.minimize(total_loss, global_step=self.global_step)

        # placeholders
        self.x = input
        self.q = question
        self.y = answer
        self.mask = input_mask
        self.is_training = is_training

        # tensors
        self.total_loss = total_loss
        self.num_corrects = num_corrects
        self.accuracy = accuracy
        self.opt_op = opt_op
Esempio n. 9
0
    def build(self):
        params = self.params
        N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count
        V, d, A = params.embed_size, params.hidden_size, self.words.vocab_size

        # initialize self
        # placeholders
        input = tf.placeholder(
            'int32', shape=[N, F, L],
            name='x')  # [num_batch, fact_count, sentence_len]
        question = tf.placeholder('int32', shape=[N, Q],
                                  name='q')  # [num_batch, question_len]
        answer = tf.placeholder('int32', shape=[N],
                                name='y')  # [num_batch] - one word answer
        fact_counts = tf.placeholder('int64', shape=[N], name='fc')
        input_mask = tf.placeholder('float32', shape=[N, F, L, V], name='xm')
        is_training = tf.placeholder(tf.bool)
        self.att = tf.constant(0.)

        # Prepare parameters
        gru = rnn_cell.GRUCell(d)
        l = self.positional_encoding()
        embedding = weight('embedding', [A, V],
                           init='uniform',
                           range=3**(1 / 2))

        with tf.name_scope('SentenceReader'):
            input_list = tf.unpack(tf.transpose(input))  # L x [F, N]
            input_embed = []
            for facts in input_list:
                facts = tf.unpack(facts)
                embed = tf.pack([
                    tf.nn.embedding_lookup(embedding, w) for w in facts
                ])  # [F, N, V]
                input_embed.append(embed)

            # apply positional encoding
            input_embed = tf.transpose(tf.pack(input_embed),
                                       [2, 1, 0, 3])  # [N, F, L, V]
            encoded = l * input_embed * input_mask
            facts = tf.reduce_sum(encoded, 2)  # [N, F, V]

        # dropout time
        facts = dropout(facts, params.keep_prob, is_training)

        with tf.name_scope('InputFusion'):
            # Bidirectional RNN
            with tf.variable_scope('Forward'):
                forward_states, _ = tf.nn.dynamic_rnn(gru,
                                                      facts,
                                                      fact_counts,
                                                      dtype=tf.float32)

            with tf.variable_scope('Backward'):
                facts_reverse = tf.reverse_sequence(facts, fact_counts, 1)
                backward_states, _ = tf.nn.dynamic_rnn(gru,
                                                       facts_reverse,
                                                       fact_counts,
                                                       dtype=tf.float32)

            # Use forward and backward states both
            facts = forward_states + backward_states  # [N, F, d]

        with tf.variable_scope('Question'):
            ques_list = tf.unpack(tf.transpose(question))
            ques_embed = [
                tf.nn.embedding_lookup(embedding, w) for w in ques_list
            ]
            _, question_vec = tf.nn.rnn(gru, ques_embed, dtype=tf.float32)

        # Episodic Memory
        with tf.variable_scope('Episodic'):
            episode = EpisodeModule(d, question_vec, facts, is_training,
                                    params.batch_norm)
            memory = tf.identity(question_vec)

            for t in range(params.memory_step):
                with tf.variable_scope('Layer%d' % t) as scope:
                    if params.memory_update == 'gru':
                        memory = gru(episode.new(memory), memory)[0]
                    else:
                        # ReLU update
                        c = episode.new(memory)
                        concated = tf.concat(1, [memory, c, question_vec])

                        w_t = weight('w_t', [3 * d, d])
                        z = tf.matmul(concated, w_t)
                        if params.batch_norm:
                            z = batch_norm(z, is_training)
                        else:
                            b_t = bias('b_t', d)
                            z = z + b_t
                        memory = tf.nn.relu(z)  # [N, d]

                    scope.reuse_variables()

        # Regularizations
        if params.batch_norm:
            memory = batch_norm(memory, is_training=is_training)
        memory = dropout(memory, params.keep_prob, is_training)

        with tf.name_scope('Answer'):
            # Answer module : feed-forward version (for it is one word answer)
            w_a = weight('w_a', [d, A], init='xavier')
            logits = tf.matmul(memory, w_a)  # [N, A]

        with tf.name_scope('Loss'):
            # Cross-Entropy loss
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits, answer)
            loss = tf.reduce_mean(cross_entropy)
            total_loss = loss + params.weight_decay * tf.add_n(
                tf.get_collection('l2'))

        with tf.variable_scope('Accuracy'):
            # Accuracy
            predicts = tf.cast(tf.argmax(logits, 1), 'int32')
            corrects = tf.equal(predicts, answer)
            num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
            accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

        # Training
        optimizer = tf.train.AdamOptimizer(params.learning_rate)
        opt_op = optimizer.minimize(total_loss, global_step=self.global_step)

        # placeholders
        self.x = input
        self.xm = input_mask
        self.q = question
        self.y = answer
        self.fc = fact_counts
        self.is_training = is_training

        # tensors
        self.total_loss = total_loss
        self.num_corrects = num_corrects
        self.accuracy = accuracy
        self.opt_op = opt_op
Esempio n. 10
0
 def _linear(self, x, h, bias_default=0.0):
     I, D = x.get_shape().as_list()[1], self._num_units
     w = weight('W', [I, D])
     u = weight('U', [D, D])
     b = bias('b', D, bias_default)
     return tf.matmul(x, w) + tf.matmul(h, u) + b
Esempio n. 11
0
 def _linear(self, x, h, bias_default=0.0):
     I, D = x.get_shape().as_list()[1], self._num_units
     w = weight('W', [I, D])
     u = weight('U', [D, D])
     b = bias('b', D, bias_default)
     return tf.matmul(x, w) + tf.matmul(h, u) + b
Esempio n. 12
0
    def build(self):
        params = self.params
        N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count
        V, d, A = params.embed_size, params.hidden_size, self.words.vocab_size

        # initialize self
        # placeholders
        input = tf.placeholder('int32', shape=[N, L], name='x')  # [num_batch, sentence_len]
        question = tf.placeholder('int32', shape=[N, Q], name='q')  # [num_batch, sentence_len]
        answer = tf.placeholder('int32', shape=[N], name='y')  # [num_batch] - one word answer
        input_mask = tf.placeholder(tf.bool, shape=[N, L], name='x_mask')  # [num_batch, sentence_len]
        is_training = tf.placeholder(tf.bool)

        # Prepare parameters
        gru = rnn_cell.GRUCell(d)

        # Input module
        with tf.variable_scope('input') as scope:
            input_list = tf.unpack(tf.transpose(input))
            input_states, _ = seq2seq.embedding_rnn_decoder(input_list, gru.zero_state(N, tf.float32), gru, A, V)

            # Question module
            scope.reuse_variables()

            ques_list = tf.unpack(tf.transpose(question))
            questions, _ = seq2seq.embedding_rnn_decoder(ques_list, gru.zero_state(N, tf.float32), gru, A, V)
            question_vec = questions[-1]  # use final state

        # Masking: to extract fact vectors at end of sentence. (details in paper)
        input_states = tf.transpose(tf.pack(input_states), [1, 0, 2])  # [N, L, D]
        facts = []
        for n in range(N):
            filtered = tf.boolean_mask(input_states[n, :, :], input_mask[n, :])  # [?, D]
            padding = tf.zeros(tf.pack([F - tf.shape(filtered)[0], d]))
            facts.append(tf.concat(0, [filtered, padding]))  # [F, D]

        facked = tf.pack(facts)  # packing for transpose... I hate TF so much
        facts = tf.unpack(tf.transpose(facked, [1, 0, 2]), num=F)  # F x [N, D]

        # Episodic Memory
        with tf.variable_scope('episodic') as scope:
            episode = EpisodeModule(d, question_vec, facts)

            memory = tf.identity(question_vec)
            for t in range(params.memory_step):
                memory = gru(episode.new(memory), memory)[0]
                scope.reuse_variables()

        # Regularizations
        if params.batch_norm:
            memory = batch_norm(memory, is_training=is_training)
        memory = dropout(memory, params.keep_prob, is_training)

        with tf.name_scope('Answer'):
            # Answer module : feed-forward version (for it is one word answer)
            w_a = weight('w_a', [d, A])
            logits = tf.matmul(memory, w_a)  # [N, A]

        with tf.name_scope('Loss'):
            # Cross-Entropy loss
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, answer)
            loss = tf.reduce_mean(cross_entropy)
            total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2'))

        with tf.variable_scope('Accuracy'):
            # Accuracy
            predicts = tf.cast(tf.argmax(logits, 1), 'int32')
            corrects = tf.equal(predicts, answer)
            num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
            accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

        # Training
        optimizer = tf.train.AdadeltaOptimizer(params.learning_rate)
        opt_op = optimizer.minimize(total_loss, global_step=self.global_step)

        # placeholders
        self.x = input
        self.q = question
        self.y = answer
        self.mask = input_mask
        self.is_training = is_training

        # tensors
        self.total_loss = total_loss
        self.num_corrects = num_corrects
        self.accuracy = accuracy
        self.opt_op = opt_op
Esempio n. 13
0
    def build(self):
        params = self.params
        N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count
        V, d, A = params.embed_size, params.hidden_size, self.words.vocab_size

        # initialize self
        # placeholders
        input = tf.placeholder('int32', shape=[N, F, L], name='x')  # [num_batch, fact_count, sentence_len]
        question = tf.placeholder('int32', shape=[N, Q], name='q')  # [num_batch, question_len]
        answer = tf.placeholder('int32', shape=[N], name='y')  # [num_batch] - one word answer
        fact_counts = tf.placeholder('int64', shape=[N], name='fc')
        input_mask = tf.placeholder('float32', shape=[N, F, L, V], name='xm')
        is_training = tf.placeholder(tf.bool)
        self.att = tf.constant(0.)

        # Prepare parameters
        gru = rnn_cell.GRUCell(d)
        l = self.positional_encoding()
        embedding = weight('embedding', [A, V], init='uniform', range=3**(1/2))

        with tf.name_scope('SentenceReader'):
            input_list = tf.unpack(tf.transpose(input))  # L x [F, N]
            input_embed = []
            for facts in input_list:
                facts = tf.unpack(facts)
                embed = tf.pack([tf.nn.embedding_lookup(embedding, w) for w in facts])  # [F, N, V]
                input_embed.append(embed)

            # apply positional encoding
            input_embed = tf.transpose(tf.pack(input_embed), [2, 1, 0, 3])  # [N, F, L, V]
            encoded = l * input_embed * input_mask
            facts = tf.reduce_sum(encoded, 2)  # [N, F, V]

        # dropout time
        facts = dropout(facts, params.keep_prob, is_training)

        with tf.name_scope('InputFusion'):
            # Bidirectional RNN
            with tf.variable_scope('Forward'):
                forward_states, _ = tf.nn.dynamic_rnn(gru, facts, fact_counts, dtype=tf.float32)

            with tf.variable_scope('Backward'):
                facts_reverse = tf.reverse_sequence(facts, fact_counts, 1)
                backward_states, _ = tf.nn.dynamic_rnn(gru, facts_reverse, fact_counts, dtype=tf.float32)

            # Use forward and backward states both
            facts = forward_states + backward_states  # [N, F, d]

        with tf.variable_scope('Question'):
            ques_list = tf.unpack(tf.transpose(question))
            ques_embed = [tf.nn.embedding_lookup(embedding, w) for w in ques_list]
            _, question_vec = tf.nn.rnn(gru, ques_embed, dtype=tf.float32)

        # Episodic Memory
        with tf.variable_scope('Episodic'):
            episode = EpisodeModule(d, question_vec, facts, is_training, params.batch_norm)
            memory = tf.identity(question_vec)

            for t in range(params.memory_step):
                with tf.variable_scope('Layer%d' % t) as scope:
                    if params.memory_update == 'gru':
                        memory = gru(episode.new(memory), memory)[0]
                    else:
                        # ReLU update
                        c = episode.new(memory)
                        concated = tf.concat(1, [memory, c, question_vec])

                        w_t = weight('w_t', [3 * d, d])
                        z = tf.matmul(concated, w_t)
                        if params.batch_norm:
                            z = batch_norm(z, is_training)
                        else:
                            b_t = bias('b_t', d)
                            z = z + b_t
                        memory = tf.nn.relu(z)  # [N, d]

                    scope.reuse_variables()

        # Regularizations
        if params.batch_norm:
            memory = batch_norm(memory, is_training=is_training)
        memory = dropout(memory, params.keep_prob, is_training)

        with tf.name_scope('Answer'):
            # Answer module : feed-forward version (for it is one word answer)
            w_a = weight('w_a', [d, A], init='xavier')
            logits = tf.matmul(memory, w_a)  # [N, A]

        with tf.name_scope('Loss'):
            # Cross-Entropy loss
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, answer)
            loss = tf.reduce_mean(cross_entropy)
            total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2'))

        with tf.variable_scope('Accuracy'):
            # Accuracy
            predicts = tf.cast(tf.argmax(logits, 1), 'int32')
            corrects = tf.equal(predicts, answer)
            num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
            accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

        # Training
        optimizer = tf.train.AdamOptimizer(params.learning_rate)
        opt_op = optimizer.minimize(total_loss, global_step=self.global_step)

        # placeholders
        self.x = input
        self.xm = input_mask
        self.q = question
        self.y = answer
        self.fc = fact_counts
        self.is_training = is_training

        # tensors
        self.total_loss = total_loss
        self.num_corrects = num_corrects
        self.accuracy = accuracy
        self.opt_op = opt_op
Esempio n. 14
0
    def build(self):

        params = self.params
        N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count
        V, d, A = params.embed_size, params.hidden_size, self.words.vocab_size

        # initialize self
        # placeholders
        input = tf.placeholder(
            'int32', shape=[N, F, L],
            name='x')  # [num_batch, fact_count, sentence_len]
        question = tf.placeholder('int32', shape=[N, Q],
                                  name='q')  # [num_batch, question_len]
        answer = tf.placeholder('int32', shape=[N],
                                name='y')  # [num_batch] - one word answer
        fact_counts = tf.placeholder(
            'int64', shape=[N], name='fc')  #how many facts for each question
        input_mask = tf.placeholder(
            'float32', shape=[N, F, L, V],
            name='xm')  #[num_batch, fact_count, sentence_len,embed_size]
        is_training = tf.placeholder(tf.bool)
        self.att = tf.constant(0.)

        # Prepare parameters
        gru = rnn_cell.GRUCell(
            d)  #building a GRU cell with d hidden dimentions
        l = self.positional_encoding(
        )  #This is a positional encoding matrix for each sentance. We can embed input words in each sentance by this
        embedding = weight(
            'embedding', [A, V], init='uniform',
            range=3**(1 / 2))  #embedding metric [vocanb size , embed size]

        with tf.name_scope('SentenceReader'):
            input_list = tf.unpack(
                tf.transpose(input)
            )  # L x [F, N]    #input it gives how many sentaces ,batch size and length of a sentence
            input_embed = []

            for facts in input_list:  #this will iterate till maximum sentance length
                facts = tf.unpack(
                    facts
                )  #in each sentance postion there can be 10 *128 words
                embed = tf.pack([
                    tf.nn.embedding_lookup(embedding, w) for w in facts
                ])  # [F, N, V]  #put them insid the ebedding metric
                input_embed.append(
                    embed)  #add the embeddings  for each senetence length

            # apply positional encoding
            input_embed = tf.transpose(
                tf.pack(input_embed),
                [2, 1, 0, 3])  # [N, F, L, V]  #embeddings for all words
            encoded = l * input_embed * input_mask  #again initialize them
            facts = tf.reduce_sum(
                encoded, 2
            )  # [N, F, V]   #this is like simming all the vectors in one sentance (total sentances)

#####################################Up to here all embedding has done##############################################################
# dropout time
        facts = dropout(facts, params.keep_prob,
                        is_training)  #impleent the dropout

        with tf.name_scope('InputFusion'):
            #Bidirectional RNN
            with tf.variable_scope('Forward'):
                forward_states, _ = tf.nn.dynamic_rnn(
                    gru, facts, fact_counts,
                    dtype=tf.float32)  #this creates a dynamic RNN
#This can be replaced with a biderectional dynamic RNN it's easy ###########
            with tf.variable_scope('Backward'):
                facts_reverse = tf.reverse_sequence(facts, fact_counts,
                                                    1)  #reversing the facts
                backward_states, _ = tf.nn.dynamic_rnn(
                    gru, facts_reverse, fact_counts,
                    dtype=tf.float32)  #fact counts for where to stop

            # Use forward and backward states both
            facts = forward_states + backward_states  # [N, F, d]  #sum them up

        with tf.variable_scope('Question'):
            ques_list = tf.unpack(tf.transpose(
                question))  #unpacking the a place holder to a list
            ques_embed = [
                tf.nn.embedding_lookup(embedding, w) for w in ques_list
            ]  #assign embeddings using embedding lookup
            _, question_vec = tf.nn.rnn(
                gru, ques_embed,
                dtype=tf.float32)  #send the over a RNN then take the