Ejemplo n.º 1
0
    def attend(self, contexts, output):
        alpha_list = []
        contexts = nn.dropout(contexts,
                              self.dropout_rate,
                              self.is_training,
                              name='drop_c')
        output = nn.dropout(output,
                            self.dropout_rate,
                            self.is_training,
                            name='drop_o')

        for i in range(self.batch_size):
            context = contexts[i]  # shape = [196, 512]
            logits_context = nn.dense(context,
                                      units=196,
                                      activation=None,
                                      use_bias=False,
                                      name='fc_lc')  # shape = [196, 196]

            output_i = tf.reshape(output[i], shape=[512, 1])
            ones = tf.ones([1, 196], tf.float32)
            logits_temp = tf.matmul(output_i, ones)  # shape = [512, 196]
            logits_temp = tf.transpose(logits_temp)  # shape = [196, 512]
            logits_output = nn.dense(logits_temp,
                                     units=196,
                                     activation=None,
                                     use_bias=False,
                                     name='fc_lo')  # shape = [196, 196]

            logit_tanh = tf.tanh(logits_context + logits_output)
            alpha = nn.dense(logit_tanh,
                             units=1,
                             activation=None,
                             use_bias=False,
                             name='fc_alpha')  # shape = [196, 1]
            alpha = tf.reshape(alpha, shape=[196])
            alpha_list.append(alpha)

        alpha_batch = tf.stack(alpha_list, axis=0)  # shape = [batch_size, 196]
        alpha = tf.nn.softmax(alpha_batch)  # shape = [batch_size, 196]

        return alpha
Ejemplo n.º 2
0
    def build(self):
        params = self.params
        N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count
        V, d, A = params.glove_size, params.hidden_size, self.words.vocab_size

        # initialize self
        # placeholders
        input = tf.placeholder(tf.float32, shape=[N, L, V], name='x')  # [num_batch, sentence_len, glove_dim]
        question = tf.placeholder(tf.float32, shape=[N, Q, V], name='q')  # [num_batch, sentence_len, glove_dim]
        answer = tf.placeholder(tf.int64, shape=[N], name='y')  # [num_batch] - one word answer
        input_mask = tf.placeholder(tf.bool, shape=[N, L], name='x_mask')  # [num_batch, sentence_len]
        is_training = tf.placeholder(tf.bool)

        # Prepare parameters
        gru = rnn_cell.GRUCell(d)

        # Input module
        with tf.variable_scope('input') as scope:
            input_list = self.make_decoder_batch_input(input)
            input_states, _ = seq2seq.rnn_decoder(input_list, gru.zero_state(N, tf.float32), gru)

            # Question module
            scope.reuse_variables()

            ques_list = self.make_decoder_batch_input(question)
            questions, _ = seq2seq.rnn_decoder(ques_list, gru.zero_state(N, tf.float32), gru)
            question_vec = questions[-1]  # use final state

        # Masking: to extract fact vectors at end of sentence. (details in paper)
        input_states = tf.transpose(tf.pack(input_states), [1, 0, 2])  # [N, L, D]
        facts = []
        for n in range(N):
            filtered = tf.boolean_mask(input_states[n, :, :], input_mask[n, :])  # [?, D]
            padding = tf.zeros(tf.pack([F - tf.shape(filtered)[0], d]))
            facts.append(tf.concat(0, [filtered, padding]))  # [F, D]

        facked = tf.pack(facts)  # packing for transpose... I hate TF so much
        facts = tf.unpack(tf.transpose(facked, [1, 0, 2]), num=F)  # F x [N, D]

        # Episodic Memory
        with tf.variable_scope('episodic') as scope:
            episode = EpisodeModule(d, question_vec, facts)

            memory = tf.identity(question_vec)
            for t in range(params.memory_step):
                memory = gru(episode.new(memory), memory)[0]
                scope.reuse_variables()

        # Regularizations
        if params.batch_norm:
            memory = batch_norm(memory, is_training=is_training)
        memory = dropout(memory, params.keep_prob, is_training)

        with tf.name_scope('Answer'):
            # Answer module : feed-forward version (for it is one word answer)
            w_a = weight('w_a', [d, A])
            logits = tf.matmul(memory, w_a)  # [N, A]

        with tf.name_scope('Loss'):
            # Cross-Entropy loss
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, answer)
            loss = tf.reduce_mean(cross_entropy)
            total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2'))

        with tf.variable_scope('Accuracy'):
            # Accuracy
            predicts = tf.cast(tf.argmax(logits, 1), 'int32')
            corrects = tf.equal(predicts, answer)
            num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
            accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

        # Training
        optimizer = tf.train.AdadeltaOptimizer(params.learning_rate)
        opt_op = optimizer.minimize(total_loss, global_step=self.global_step)

        # placeholders
        self.x = input
        self.q = question
        self.y = answer
        self.mask = input_mask
        self.is_training = is_training

        # tensors
        self.total_loss = total_loss
        self.num_corrects = num_corrects
        self.accuracy = accuracy
        self.opt_op = opt_op
Ejemplo n.º 3
0
    def build(self):
        params = self.params
        N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count
        V, d, A = params.embed_size, params.hidden_size, self.words.vocab_size

        # initialize self
        # placeholders
        input = tf.placeholder(
            'int32', shape=[N, F, L],
            name='x')  # [num_batch, fact_count, sentence_len]
        question = tf.placeholder('int32', shape=[N, Q],
                                  name='q')  # [num_batch, question_len]
        answer = tf.placeholder('int32', shape=[N],
                                name='y')  # [num_batch] - one word answer
        fact_counts = tf.placeholder('int64', shape=[N], name='fc')
        input_mask = tf.placeholder('float32', shape=[N, F, L, V], name='xm')
        is_training = tf.placeholder(tf.bool)
        self.att = tf.constant(0.)

        # Prepare parameters
        gru = rnn_cell.GRUCell(d)
        l = self.positional_encoding()
        embedding = weight('embedding', [A, V],
                           init='uniform',
                           range=3**(1 / 2))

        with tf.name_scope('SentenceReader'):
            input_list = tf.unpack(tf.transpose(input))  # L x [F, N]
            input_embed = []
            for facts in input_list:
                facts = tf.unpack(facts)
                embed = tf.pack([
                    tf.nn.embedding_lookup(embedding, w) for w in facts
                ])  # [F, N, V]
                input_embed.append(embed)

            # apply positional encoding
            input_embed = tf.transpose(tf.pack(input_embed),
                                       [2, 1, 0, 3])  # [N, F, L, V]
            encoded = l * input_embed * input_mask
            facts = tf.reduce_sum(encoded, 2)  # [N, F, V]

        # dropout time
        facts = dropout(facts, params.keep_prob, is_training)

        with tf.name_scope('InputFusion'):
            # Bidirectional RNN
            with tf.variable_scope('Forward'):
                forward_states, _ = tf.nn.dynamic_rnn(gru,
                                                      facts,
                                                      fact_counts,
                                                      dtype=tf.float32)

            with tf.variable_scope('Backward'):
                facts_reverse = tf.reverse_sequence(facts, fact_counts, 1)
                backward_states, _ = tf.nn.dynamic_rnn(gru,
                                                       facts_reverse,
                                                       fact_counts,
                                                       dtype=tf.float32)

            # Use forward and backward states both
            facts = forward_states + backward_states  # [N, F, d]

        with tf.variable_scope('Question'):
            ques_list = tf.unpack(tf.transpose(question))
            ques_embed = [
                tf.nn.embedding_lookup(embedding, w) for w in ques_list
            ]
            _, question_vec = tf.nn.rnn(gru, ques_embed, dtype=tf.float32)

        # Episodic Memory
        with tf.variable_scope('Episodic'):
            episode = EpisodeModule(d, question_vec, facts, is_training,
                                    params.batch_norm)
            memory = tf.identity(question_vec)

            for t in range(params.memory_step):
                with tf.variable_scope('Layer%d' % t) as scope:
                    if params.memory_update == 'gru':
                        memory = gru(episode.new(memory), memory)[0]
                    else:
                        # ReLU update
                        c = episode.new(memory)
                        concated = tf.concat(1, [memory, c, question_vec])

                        w_t = weight('w_t', [3 * d, d])
                        z = tf.matmul(concated, w_t)
                        if params.batch_norm:
                            z = batch_norm(z, is_training)
                        else:
                            b_t = bias('b_t', d)
                            z = z + b_t
                        memory = tf.nn.relu(z)  # [N, d]

                    scope.reuse_variables()

        # Regularizations
        if params.batch_norm:
            memory = batch_norm(memory, is_training=is_training)
        memory = dropout(memory, params.keep_prob, is_training)

        with tf.name_scope('Answer'):
            # Answer module : feed-forward version (for it is one word answer)
            w_a = weight('w_a', [d, A], init='xavier')
            logits = tf.matmul(memory, w_a)  # [N, A]

        with tf.name_scope('Loss'):
            # Cross-Entropy loss
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits, answer)
            loss = tf.reduce_mean(cross_entropy)
            total_loss = loss + params.weight_decay * tf.add_n(
                tf.get_collection('l2'))

        with tf.variable_scope('Accuracy'):
            # Accuracy
            predicts = tf.cast(tf.argmax(logits, 1), 'int32')
            corrects = tf.equal(predicts, answer)
            num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
            accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

        # Training
        optimizer = tf.train.AdamOptimizer(params.learning_rate)
        opt_op = optimizer.minimize(total_loss, global_step=self.global_step)

        # placeholders
        self.x = input
        self.xm = input_mask
        self.q = question
        self.y = answer
        self.fc = fact_counts
        self.is_training = is_training

        # tensors
        self.total_loss = total_loss
        self.num_corrects = num_corrects
        self.accuracy = accuracy
        self.opt_op = opt_op
Ejemplo n.º 4
0
    def build_rnn(self):
        with tf.variable_scope("word_embedding"):
            word_embedding_matrix = tf.get_variable(
                name='weights',
                shape=[self.vocabulary_size, self.embedding_size],
                initializer=nn.kernel_initializer(),
                regularizer=nn.kernel_regularizer(),
                trainable=True)

        # 1. build Word LSTM
        WordLSTM = tf.nn.rnn_cell.LSTMCell(self.lstm_units,
                                           initializer=nn.kernel_initializer())
        if self.is_training:
            WordLSTM = tf.nn.rnn_cell.DropoutWrapper(
                WordLSTM,
                input_keep_prob=1.0 - self.lstm_drop_rate,
                output_keep_prob=1.0 - self.lstm_drop_rate,
                state_keep_prob=1.0 - self.lstm_drop_rate)

        # 2. initialize word lstm
        with tf.variable_scope("word_lstm_initialize"):
            context = tf.reduce_mean(self.visual_feats, axis=1)
            context_dropout = nn.dropout(context,
                                         self.dropout_rate,
                                         self.is_training,
                                         name='drop_c')
            initial_memory = nn.dense(context_dropout,
                                      units=self.lstm_units,
                                      activation=None,
                                      name='fc_m')
            initial_output = nn.dense(context_dropout,
                                      self.lstm_units,
                                      activation=None,
                                      name='fc_o')

        WordLSTM_last_state = initial_memory, initial_output
        WordLSTM_last_output = initial_output
        WordLSTM_last_word = tf.zeros(
            [self.batch_size], tf.int32)  # tf.zeros() means the '<S>' token

        predictions = []  # store predict word
        prediction_corrects = []  # store correct predict to compute accuracy
        cross_entropies = []  # store cross entropy loss
        alphas = []

        # 3. generate word step by step
        for id in range(self.max_caption_length):
            with tf.variable_scope("word_embedding"):
                word_embedding = tf.nn.embedding_lookup(
                    word_embedding_matrix, WordLSTM_last_word)

            with tf.variable_scope("attend", reuse=tf.AUTO_REUSE):
                alpha = self.attend(self.visual_feats, WordLSTM_last_output)
                context = tf.reduce_sum(self.visual_feats *
                                        tf.expand_dims(alpha, axis=2),
                                        axis=1)
                if self.is_training:
                    titled_masks = tf.tile(
                        tf.expand_dims(self.masks[:, id], axis=1), [1, 196])
                    masked_alpha = alpha * titled_masks
                    alphas.append(tf.reshape(masked_alpha, [-1]))

            with tf.variable_scope('WordLSTM'):
                inputs = tf.concat([context, word_embedding], axis=1)
                WordLSTM_current_output, WordLSTM_current_state = WordLSTM(
                    inputs, WordLSTM_last_state)

            with tf.variable_scope('decode'):
                expanded_output = tf.concat(
                    [context, word_embedding, WordLSTM_current_output], axis=1)
                expanded_output_dropout = nn.dropout(expanded_output,
                                                     self.dropout_rate,
                                                     self.is_training,
                                                     name='drop')
                logits = nn.dense(expanded_output_dropout,
                                  units=self.vocabulary_size,
                                  activation=None,
                                  name='fc')
                prediction = tf.argmax(logits, 1)
                predictions.append(prediction)

            tf.get_variable_scope().reuse_variables()

            WordLSTM_last_state = WordLSTM_current_state
            # use teacher policy
            if self.is_training:
                WordLSTM_last_word = self.sentences[:, id]
            else:
                WordLSTM_last_word = prediction

            # compute loss
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=self.sentences[:, id], logits=logits)
            masked_cross_entropy = cross_entropy * self.masks[:, id]
            cross_entropies.append(masked_cross_entropy)

            # compute accuracy
            ground_truth = tf.cast(self.sentences[:, id], tf.int64)
            prediction_correct = tf.where(
                tf.equal(prediction, ground_truth),
                tf.cast(self.masks[:, id], tf.float32),
                tf.cast(tf.zeros_like(prediction), tf.float32))
            prediction_corrects.append(prediction_correct)

        # 4. compute accuracy
        prediction_corrects = tf.stack(prediction_corrects, axis=1)
        accuracy = tf.reduce_sum(prediction_corrects) / tf.reduce_sum(
            self.masks)

        self.predictions = predictions
        self.cross_entropies = cross_entropies
        self.alphas = alphas
        self.accuracy = accuracy
        print('rnn built.')
Ejemplo n.º 5
0
    def build(self):
        params = self.params
        N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count
        V, d, A = params.embed_size, params.hidden_size, self.words.vocab_size

        # initialize self
        # placeholders
        input = tf.placeholder('int32', shape=[N, L], name='x')  # [num_batch, sentence_len]
        question = tf.placeholder('int32', shape=[N, Q], name='q')  # [num_batch, sentence_len]
        answer = tf.placeholder('int32', shape=[N], name='y')  # [num_batch] - one word answer
        input_mask = tf.placeholder(tf.bool, shape=[N, L], name='x_mask')  # [num_batch, sentence_len]
        is_training = tf.placeholder(tf.bool)

        # Prepare parameters
        gru = rnn_cell.GRUCell(d)

        # Input module
        with tf.variable_scope('input') as scope:
            input_list = tf.unpack(tf.transpose(input))
            input_states, _ = seq2seq.embedding_rnn_decoder(input_list, gru.zero_state(N, tf.float32), gru, A, V)

            # Question module
            scope.reuse_variables()

            ques_list = tf.unpack(tf.transpose(question))
            questions, _ = seq2seq.embedding_rnn_decoder(ques_list, gru.zero_state(N, tf.float32), gru, A, V)
            question_vec = questions[-1]  # use final state

        # Masking: to extract fact vectors at end of sentence. (details in paper)
        input_states = tf.transpose(tf.pack(input_states), [1, 0, 2])  # [N, L, D]
        facts = []
        for n in range(N):
            filtered = tf.boolean_mask(input_states[n, :, :], input_mask[n, :])  # [?, D]
            padding = tf.zeros(tf.pack([F - tf.shape(filtered)[0], d]))
            facts.append(tf.concat(0, [filtered, padding]))  # [F, D]

        facked = tf.pack(facts)  # packing for transpose... I hate TF so much
        facts = tf.unpack(tf.transpose(facked, [1, 0, 2]), num=F)  # F x [N, D]

        # Episodic Memory
        with tf.variable_scope('episodic') as scope:
            episode = EpisodeModule(d, question_vec, facts)

            memory = tf.identity(question_vec)
            for t in range(params.memory_step):
                memory = gru(episode.new(memory), memory)[0]
                scope.reuse_variables()

        # Regularizations
        if params.batch_norm:
            memory = batch_norm(memory, is_training=is_training)
        memory = dropout(memory, params.keep_prob, is_training)

        with tf.name_scope('Answer'):
            # Answer module : feed-forward version (for it is one word answer)
            w_a = weight('w_a', [d, A])
            logits = tf.matmul(memory, w_a)  # [N, A]

        with tf.name_scope('Loss'):
            # Cross-Entropy loss
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, answer)
            loss = tf.reduce_mean(cross_entropy)
            total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2'))

        with tf.variable_scope('Accuracy'):
            # Accuracy
            predicts = tf.cast(tf.argmax(logits, 1), 'int32')
            corrects = tf.equal(predicts, answer)
            num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
            accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

        # Training
        optimizer = tf.train.AdadeltaOptimizer(params.learning_rate)
        opt_op = optimizer.minimize(total_loss, global_step=self.global_step)

        # placeholders
        self.x = input
        self.q = question
        self.y = answer
        self.mask = input_mask
        self.is_training = is_training

        # tensors
        self.total_loss = total_loss
        self.num_corrects = num_corrects
        self.accuracy = accuracy
        self.opt_op = opt_op
Ejemplo n.º 6
0
    def build(self):
        params = self.params
        N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count
        V, d, A = params.embed_size, params.hidden_size, self.words.vocab_size

        # initialize self
        # placeholders
        input = tf.placeholder('int32', shape=[N, F, L], name='x')  # [num_batch, fact_count, sentence_len]
        question = tf.placeholder('int32', shape=[N, Q], name='q')  # [num_batch, question_len]
        answer = tf.placeholder('int32', shape=[N], name='y')  # [num_batch] - one word answer
        fact_counts = tf.placeholder('int64', shape=[N], name='fc')
        input_mask = tf.placeholder('float32', shape=[N, F, L, V], name='xm')
        is_training = tf.placeholder(tf.bool)
        self.att = tf.constant(0.)

        # Prepare parameters
        gru = rnn_cell.GRUCell(d)
        l = self.positional_encoding()
        embedding = weight('embedding', [A, V], init='uniform', range=3**(1/2))

        with tf.name_scope('SentenceReader'):
            input_list = tf.unpack(tf.transpose(input))  # L x [F, N]
            input_embed = []
            for facts in input_list:
                facts = tf.unpack(facts)
                embed = tf.pack([tf.nn.embedding_lookup(embedding, w) for w in facts])  # [F, N, V]
                input_embed.append(embed)

            # apply positional encoding
            input_embed = tf.transpose(tf.pack(input_embed), [2, 1, 0, 3])  # [N, F, L, V]
            encoded = l * input_embed * input_mask
            facts = tf.reduce_sum(encoded, 2)  # [N, F, V]

        # dropout time
        facts = dropout(facts, params.keep_prob, is_training)

        with tf.name_scope('InputFusion'):
            # Bidirectional RNN
            with tf.variable_scope('Forward'):
                forward_states, _ = tf.nn.dynamic_rnn(gru, facts, fact_counts, dtype=tf.float32)

            with tf.variable_scope('Backward'):
                facts_reverse = tf.reverse_sequence(facts, fact_counts, 1)
                backward_states, _ = tf.nn.dynamic_rnn(gru, facts_reverse, fact_counts, dtype=tf.float32)

            # Use forward and backward states both
            facts = forward_states + backward_states  # [N, F, d]

        with tf.variable_scope('Question'):
            ques_list = tf.unpack(tf.transpose(question))
            ques_embed = [tf.nn.embedding_lookup(embedding, w) for w in ques_list]
            _, question_vec = tf.nn.rnn(gru, ques_embed, dtype=tf.float32)

        # Episodic Memory
        with tf.variable_scope('Episodic'):
            episode = EpisodeModule(d, question_vec, facts, is_training, params.batch_norm)
            memory = tf.identity(question_vec)

            for t in range(params.memory_step):
                with tf.variable_scope('Layer%d' % t) as scope:
                    if params.memory_update == 'gru':
                        memory = gru(episode.new(memory), memory)[0]
                    else:
                        # ReLU update
                        c = episode.new(memory)
                        concated = tf.concat(1, [memory, c, question_vec])

                        w_t = weight('w_t', [3 * d, d])
                        z = tf.matmul(concated, w_t)
                        if params.batch_norm:
                            z = batch_norm(z, is_training)
                        else:
                            b_t = bias('b_t', d)
                            z = z + b_t
                        memory = tf.nn.relu(z)  # [N, d]

                    scope.reuse_variables()

        # Regularizations
        if params.batch_norm:
            memory = batch_norm(memory, is_training=is_training)
        memory = dropout(memory, params.keep_prob, is_training)

        with tf.name_scope('Answer'):
            # Answer module : feed-forward version (for it is one word answer)
            w_a = weight('w_a', [d, A], init='xavier')
            logits = tf.matmul(memory, w_a)  # [N, A]

        with tf.name_scope('Loss'):
            # Cross-Entropy loss
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, answer)
            loss = tf.reduce_mean(cross_entropy)
            total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2'))

        with tf.variable_scope('Accuracy'):
            # Accuracy
            predicts = tf.cast(tf.argmax(logits, 1), 'int32')
            corrects = tf.equal(predicts, answer)
            num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
            accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

        # Training
        optimizer = tf.train.AdamOptimizer(params.learning_rate)
        opt_op = optimizer.minimize(total_loss, global_step=self.global_step)

        # placeholders
        self.x = input
        self.xm = input_mask
        self.q = question
        self.y = answer
        self.fc = fact_counts
        self.is_training = is_training

        # tensors
        self.total_loss = total_loss
        self.num_corrects = num_corrects
        self.accuracy = accuracy
        self.opt_op = opt_op
Ejemplo n.º 7
0
    def build(self):

        params = self.params
        N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count
        V, d, A = params.embed_size, params.hidden_size, self.words.vocab_size

        # initialize self
        # placeholders
        input = tf.placeholder(
            'int32', shape=[N, F, L],
            name='x')  # [num_batch, fact_count, sentence_len]
        question = tf.placeholder('int32', shape=[N, Q],
                                  name='q')  # [num_batch, question_len]
        answer = tf.placeholder('int32', shape=[N],
                                name='y')  # [num_batch] - one word answer
        fact_counts = tf.placeholder(
            'int64', shape=[N], name='fc')  #how many facts for each question
        input_mask = tf.placeholder(
            'float32', shape=[N, F, L, V],
            name='xm')  #[num_batch, fact_count, sentence_len,embed_size]
        is_training = tf.placeholder(tf.bool)
        self.att = tf.constant(0.)

        # Prepare parameters
        gru = rnn_cell.GRUCell(
            d)  #building a GRU cell with d hidden dimentions
        l = self.positional_encoding(
        )  #This is a positional encoding matrix for each sentance. We can embed input words in each sentance by this
        embedding = weight(
            'embedding', [A, V], init='uniform',
            range=3**(1 / 2))  #embedding metric [vocanb size , embed size]

        with tf.name_scope('SentenceReader'):
            input_list = tf.unpack(
                tf.transpose(input)
            )  # L x [F, N]    #input it gives how many sentaces ,batch size and length of a sentence
            input_embed = []

            for facts in input_list:  #this will iterate till maximum sentance length
                facts = tf.unpack(
                    facts
                )  #in each sentance postion there can be 10 *128 words
                embed = tf.pack([
                    tf.nn.embedding_lookup(embedding, w) for w in facts
                ])  # [F, N, V]  #put them insid the ebedding metric
                input_embed.append(
                    embed)  #add the embeddings  for each senetence length

            # apply positional encoding
            input_embed = tf.transpose(
                tf.pack(input_embed),
                [2, 1, 0, 3])  # [N, F, L, V]  #embeddings for all words
            encoded = l * input_embed * input_mask  #again initialize them
            facts = tf.reduce_sum(
                encoded, 2
            )  # [N, F, V]   #this is like simming all the vectors in one sentance (total sentances)

#####################################Up to here all embedding has done##############################################################
# dropout time
        facts = dropout(facts, params.keep_prob,
                        is_training)  #impleent the dropout

        with tf.name_scope('InputFusion'):
            #Bidirectional RNN
            with tf.variable_scope('Forward'):
                forward_states, _ = tf.nn.dynamic_rnn(
                    gru, facts, fact_counts,
                    dtype=tf.float32)  #this creates a dynamic RNN
#This can be replaced with a biderectional dynamic RNN it's easy ###########
            with tf.variable_scope('Backward'):
                facts_reverse = tf.reverse_sequence(facts, fact_counts,
                                                    1)  #reversing the facts
                backward_states, _ = tf.nn.dynamic_rnn(
                    gru, facts_reverse, fact_counts,
                    dtype=tf.float32)  #fact counts for where to stop

            # Use forward and backward states both
            facts = forward_states + backward_states  # [N, F, d]  #sum them up

        with tf.variable_scope('Question'):
            ques_list = tf.unpack(tf.transpose(
                question))  #unpacking the a place holder to a list
            ques_embed = [
                tf.nn.embedding_lookup(embedding, w) for w in ques_list
            ]  #assign embeddings using embedding lookup
            _, question_vec = tf.nn.rnn(
                gru, ques_embed,
                dtype=tf.float32)  #send the over a RNN then take the