예제 #1
0
    def encode_w_attn(self, inputs, mask, prev_states, scope="", reuse=False):

        with tf.variable_scope(scope, reuse):
            attn_cell = LSTMAttnCell(HIDDEN_DIM, prev_states)
            o, final_state = tf.nn.dynamic_rnn(attn_cell,
                                               inputs,
                                               dtype=tf.float32,
                                               sequence_length=mask)
        return (o, final_state)
예제 #2
0
    def encode_w_attn(self, inputs, mask, prev_states, scope="", reuse=False):

        with tf.variable_scope(scope, reuse):
            attn_cell = LSTMAttnCell(HIDDEN_DIM, prev_states, HIDDEN_DIM)
            outputs, final_state = tf.nn.dynamic_rnn(attn_cell,
                                                     inputs,
                                                     dtype=tf.float32,
                                                     sequence_length=mask)

            hidden_states_list = attn_cell.get_hidden_states()
            attn_cell.clear_hidden_states()
            print "length of hidden states list", len(hidden_states_list)

            packed_hidden_states = tf.pack(hidden_states_list, axis=2)
            packed_hidden_states = tf.transpose(
                packed_hidden_states,
                perm=[0, 2, 1])  # [batch size x SEQ LENGTH x HIDDEN]
            print "packed hidden states :", packed_hidden_states

        return (outputs, packed_hidden_states, final_state)
예제 #3
0
    def create_cell(self):

        encoder_output = tf.ones([TRAIN_BATCH_SIZE, PASSAGE_MAX_LENGTH])

        with tf.variable_scope("decode"):
            d_cell = LSTMAttnCell(
                HIDDEN_DIM,
                encoder_output)  # Make decoder cell with hidden dim

            # Make starter token input
            inp = tf.ones([TRAIN_BATCH_SIZE,
                           1000])  # STARTER TOKEN, SHAPE: [BATCH, VOCAB_SIZE]

            # make initial state for LSTM cell
            h_0 = tf.ones([TRAIN_BATCH_SIZE, HIDDEN_DIM
                           ])  # hidden state from passage and question
            c_0 = tf.ones([TRAIN_BATCH_SIZE, HIDDEN_DIM
                           ])  # empty memory SHAPE [BATCH, 2*HIDDEN_DIM]
            h_t = tf.nn.rnn_cell.LSTMStateTuple(c_0, h_0)

            for time_step in range(OUTPUT_MAX_LENGTH):
                o_t, h_t = d_cell(inp, h_t)

                U = tf.get_variable(
                    'U',
                    shape=(2 * HIDDEN_DIM, VOCAB_SIZE),
                    initializer=tf.contrib.layers.xavier_initializer(),
                    dtype=tf.float32)
                b = tf.get_variable('b',
                                    shape=(VOCAB_SIZE, ),
                                    dtype=tf.float32)
                o_drop_t = tf.nn.dropout(o_t, self.dropout_placeholder)
                y_t = tf.matmul(o_drop_t, U) + b  # SHAPE: [BATCH, VOCAB_SIZE]

                inp = y_t

                preds.append(y_t)
                tf.get_variable_scope().reuse_variables()

            packed_preds = tf.pack(preds, axis=2)
            preds = tf.transpose(packed_preds, perm=[0, 2, 1])
예제 #4
0
 def encode_w_attn(self, inputs, mask, prev_states_fw, prev_states_bw, scope="", reuse=False):
     self.attn_cell_fw = LSTMAttnCell(HIDDEN_DIM, prev_states_fw)
     self.attn_cell_bw = LSTMAttnCell(HIDDEN_DIM, prev_states_bw)
     with tf.variable_scope(scope, reuse):
         output_tuple, final_state = tf.nn.bidirectional_dynamic_rnn(self.attn_cell_fw, self.attn_cell_bw, inputs, dtype=tf.float32, sequence_length=mask)
     return (output_tuple, final_state) 
예제 #5
0
    def add_prediction_op(self):
        questions = self.add_embedding(self.questions_placeholder)
        passages = self.add_embedding(self.passages_placeholder)

        # Question encoder
        with tf.variable_scope("question"):
            q_cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_DIM,
                                             activation=ACTIVATION_FUNC)
            q_outputs, q_final_tuple = tf.nn.dynamic_rnn(
                q_cell,
                questions,
                dtype=tf.float32,
                sequence_length=self.seq_length(self.questions_placeholder))
            q_final_c, q_final_h = q_final_tuple
            q_final_h = tf.expand_dims(q_final_h, axis=1)

        # Passage encoder with attention
        p_outputs, p_final_tuple = self.encode_w_attn(
            passages,
            self.seq_length(self.passages_placeholder),
            q_outputs,
            scope="passage_attn")
        p_final_c, p_final_h = p_final_tuple
        p_final_h = tf.expand_dims(p_final_h, axis=1)

        # Attention state encoder (Match LSTM layer variant)
        with tf.variable_scope("attention"):
            a_cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_DIM,
                                             activation=ACTIVATION_FUNC)
            a_outputs, a_final_tuple = tf.nn.dynamic_rnn(
                a_cell,
                p_outputs,
                dtype=tf.float32,
                sequence_length=self.seq_length(self.passages_placeholder))
            a_final_c, a_final_h = a_final_tuple
            a_final_h = tf.expand_dims(a_final_h, axis=1)

        # Concatenation of all final hidden states
        q_p_a_hidden = tf.concat(2, [q_final_h, p_final_h, a_final_h
                                     ])  # SHAPE: [BATCH, 1, 3*HIDDEN_DIM]

        preds = list()

        with tf.variable_scope("decoder"):
            d_cell_dim = 3 * HIDDEN_DIM

            # Run decoder with attention between DECODER and PASSAGE with ATTENTION (bet passage and question)
            d_cell = LSTMAttnCell(d_cell_dim,
                                  p_outputs,
                                  HIDDEN_DIM,
                                  activation=ACTIVATION_FUNC)
            # d_cell = tf.nn.rnn_cell.LSTMCell(d_cell_dim) # Make decoder cell with hidden dim

            # Create first-time-step input to LSTM (starter token)
            inp = self.add_embedding(
                self.start_token_placeholder
            )  # STARTER TOKEN, SHAPE: [BATCH, EMBEDDING_DIM]

            # make initial state for LSTM cell
            h_0 = tf.reshape(
                q_p_a_hidden,
                [-1, d_cell_dim])  # hidden state from passage and question
            c_0 = tf.reshape(
                tf.zeros((d_cell_dim)),
                [-1, d_cell_dim])  # empty memory SHAPE [BATCH, 2*HIDDEN_DIM]
            h_t = tf.nn.rnn_cell.LSTMStateTuple(c_0, h_0)

            # U and b for manipulating the output from LSTM to logit (LSTM output -> logit)
            U = tf.get_variable(
                'U',
                shape=(d_cell_dim, VOCAB_SIZE),
                initializer=tf.contrib.layers.xavier_initializer(),
                dtype=tf.float32)
            b = tf.get_variable('b', shape=(VOCAB_SIZE, ), dtype=tf.float32)

            for time_step in range(OUTPUT_MAX_LENGTH):
                o_t, h_t = d_cell(inp, h_t)

                o_drop_t = tf.nn.dropout(o_t, self.dropout_placeholder)
                y_t = tf.matmul(o_drop_t, U) + b  # SHAPE: [BATCH, VOCAB_SIZE]

                # limit vocab size to words that we have seen in question or passage and popular words
                mask = self.get_vocab_masks()
                y_t = tf.multiply(y_t, mask)

                inp = tf.nn.softmax(y_t)
                inp_index = tf.argmax(inp, 1)
                inp = tf.nn.embedding_lookup(self.pretrained_embeddings,
                                             inp_index)

                preds.append(y_t)
                tf.get_variable_scope().reuse_variables()

            packed_preds = tf.pack(preds, axis=2)
            preds = tf.transpose(packed_preds, perm=[0, 2, 1])
        return preds
예제 #6
0
    def add_prediction_op(self): 
        questions = self.add_embedding(self.questions_placeholder)
        passages = self.add_embedding(self.passages_placeholder)

####### DEBUG PART #### CHECKS OUT
#        print "##### debugging input embeddings "
#        print "questions dims : should be [None, ", QUESTION_MAX_LENGTH, ", ", EMBEDDING_DIM, " :", questions
#        print "passages dims : should be [None, ", PASSAGE_MAX_LENGTH, ", ", EMBEDDING_DIM, " :", passages

        # Question encoder
        with tf.variable_scope("question"): 
            q_cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_DIM, activation=ACTIVATION_FUNC)
            q_outputs, q_final_tuple = tf.nn.dynamic_rnn(q_cell, questions, dtype=tf.float32, sequence_length=self.seq_length(self.questions_placeholder))
            q_final_c, q_final_h = q_final_tuple
            q_final_h = tf.expand_dims(q_final_h, axis=1)


####### DEBUG PART ####
        print "shape of q_outputs : ", q_outputs
        print "shape of q_final_h : ", q_final_h
        print "\n\n##### debugging input embeddings "
        q_outputs = tf.Print(q_outputs, [q_outputs], message="q_outputs\n", summarize = QUESTION_MAX_LENGTH * HIDDEN_DIM * 3)
        print "\n\n"
        q_final_h = tf.Print(q_final_h, [q_final_h], message="q_final_h\n", summarize = TRAIN_BATCH_SIZE * HIDDEN_DIM)
        print "\n\n"
        q_final_c = tf.Print(q_final_c, [q_final_c], message="q_final_c\n", summarize = TRAIN_BATCH_SIZE * HIDDEN_DIM)
#######################

        # Passage encoder with attention
        p_outputs, p_final_tuple = self.encode_w_attn(passages, self.seq_length(self.passages_placeholder), q_outputs, scope = "passage_attn")
        p_final_c, p_final_h = p_final_tuple
        p_final_h = tf.expand_dims(p_final_h, axis=1)
# # # # # # # # # # # #

        # Attention state encoder
        with tf.variable_scope("attention"): 
            a_cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_DIM, activation=ACTIVATION_FUNC)
            a_outputs, a_final_tuple = tf.nn.dynamic_rnn(a_cell, p_outputs, dtype=tf.float32, sequence_length=self.seq_length(self.passages_placeholder))
            a_final_c, a_final_h = a_final_tuple
            a_final_h = tf.expand_dims(a_final_h, axis=1)

# # # # # # # # # # # #
        # q_last = tf.slice(q_outputs, [0, QUESTION_MAX_LENGTH - 1, 0], [-1, 1, -1])
        # p_last = tf.slice(p_outputs, [0, PASSAGE_MAX_LENGTH - 1, 0], [-1, 1, -1])
        # a_last = tf.slice(a_outputs, [0, PASSAGE_MAX_LENGTH - 1, 0], [-1, 1, -1])
        q_p_a_hidden = tf.concat(2, [q_final_h, p_final_h, a_final_h]) # SHAPE: [BATCH, 1, 3*HIDDEN_DIM]       

####### DEBUG PART ####
        # print "\n\n##### debugging sliced q_last "
        # print "sliced q_last shape: ", q_last
        # q_last = tf.Print(q_last, [q_last], message="q_last", summarize = 10)
        q_p_a_hidden = tf.Print(q_p_a_hidden, [q_p_a_hidden], message="q_p_a_hidden", summarize = TRAIN_BATCH_SIZE * HIDDEN_DIM * 3)
#######################

        preds = list()
        
        with tf.variable_scope("decoder"):
            d_cell_dim = 3 * HIDDEN_DIM
            
            # Run decoder with attention between DECODER and PASSAGE with ATTENTION (bet passage and question)
            d_cell = LSTMAttnCell(d_cell_dim, p_outputs, HIDDEN_DIM, activation=ACTIVATION_FUNC)
 
            # Create first-time-step input to LSTM (starter token)
            inp = self.add_embedding(self.start_token_placeholder) # STARTER TOKEN, SHAPE: [BATCH, EMBEDDING_DIM]


            # make initial state for LSTM cell
            h_0 = tf.reshape(q_p_a_hidden, [-1, d_cell_dim]) # hidden state from passage and question
            c_0 = tf.reshape(tf.zeros((d_cell_dim)), [-1, d_cell_dim]) # empty memory SHAPE [BATCH, 2*HIDDEN_DIM]
            h_t = tf.nn.rnn_cell.LSTMStateTuple(c_0, h_0)
            
            # U and b for manipulating the output from LSTM to logit (LSTM output -> logit)
            U = tf.get_variable('U', shape=(d_cell_dim, VOCAB_SIZE), initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
            b = tf.get_variable('b', shape=(VOCAB_SIZE, ), dtype=tf.float32)
            

####### DEBUG PART #### THIS PART CHECKS OUT
#            print "\n\n##### debugging decoder "
#            inp = tf.Print(inp, [inp], message = "starter token input : \n", summarize = EMBEDDING_DIM + 50)
#            h_0 = tf.Print(h_0, [h_0], message = "h_0 : \n", summarize = TRAIN_BATCH_SIZE * 3 * HIDDEN_DIM)
#            print "U : ", U
#            print "b : ", b
#######################

            for time_step in range(OUTPUT_MAX_LENGTH):
                o_t, h_t = d_cell(inp, h_t)

                o_drop_t = tf.nn.dropout(o_t, self.dropout_placeholder)
                y_t = tf.matmul(o_drop_t, U) + b # SHAPE: [BATCH, VOCAB_SIZE]

                # y_t = tf.Print(y_t, [y_t], message="y_t : \n", summarize = 500)
                
                # limit vocab size to words that we have seen in question or passage and popular words
                mask = self.get_vocab_masks()

                # mask = tf.Print(mask, [mask], message="mask : \n", summarize = 500)
                
                y_t = tf.multiply(y_t, mask)
                
                y_t = tf.Print(y_t, [y_t], message="post mask y_t : \n", summarize = 500)

                y_t = tf.nn.softmax(y_t)

                y_t = tf.Print(y_t, [y_t], message="post softmax y_t : \n", summarize = 500)

                if self.predicting:
                    inp_index = tf.argmax(y_t, 1)
                    inp = tf.nn.embedding_lookup(self.pretrained_embeddings, inp_index)
                else: 
                    inp = tf.slice(self.answers_placeholder, [0, time_step], [-1, 1]) 
                    inp = tf.nn.embedding_lookup(self.pretrained_embeddings, inp)
                    inp = tf.reshape(inp, [-1, EMBEDDING_DIM])

                inp = tf.Print(inp, [inp], message="inp : \n", summarize = 500)    
                
                preds.append(y_t)
                tf.get_variable_scope().reuse_variables()

            packed_preds = tf.pack(preds, axis=2)
            preds = tf.transpose(packed_preds, perm=[0, 2, 1])
        return preds
예제 #7
0
    def add_prediction_op(self):
        questions = self.add_embedding(self.questions_placeholder)
        passages = self.add_embedding(self.passages_placeholder)

        # Question encoder
        with tf.variable_scope("question"):
            q_cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_DIM)
            q_outputs, _ = tf.nn.dynamic_rnn(q_cell,
                                             questions,
                                             dtype=tf.float32,
                                             sequence_length=self.seq_length(
                                                 self.questions_placeholder))

        # Passage encoder with attention
        p_outputs, p_hs, _ = self.encode_w_attn(passages,
                                                self.seq_length(
                                                    self.passages_placeholder),
                                                q_outputs,
                                                scope="passage_attn")
        print "passage encoder with attention output shape :", p_outputs
        h_tilda_and_h = tf.concat(2, [p_outputs, p_hs])
        print "concatenated h tilda nad h :", h_tilda_and_h

        # with tf.variable_scope("passage"):
        #     p_cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_DIM)
        #     p_outputs, p_state_tuple = tf.nn.dynamic_rnn(p_cell, passages, initial_state=q_state_tuple, dtype=tf.float32, sequence_length=self.seq_length(passages))

        # Attention state encoder
        with tf.variable_scope("attention"):
            a_cell = tf.nn.rnn_cell.LSTMCell(2 * HIDDEN_DIM)
            a_outputs, _ = tf.nn.dynamic_rnn(a_cell,
                                             h_tilda_and_h,
                                             dtype=tf.float32,
                                             sequence_length=self.seq_length(
                                                 self.passages_placeholder))

        q_last = tf.slice(q_outputs, [0, QUESTION_MAX_LENGTH - 1, 0],
                          [-1, 1, -1])  # HIDDEN
        p_last = tf.slice(p_outputs, [0, PASSAGE_MAX_LENGTH - 1, 0],
                          [-1, 1, -1])  # HIDDEN
        a_last = tf.slice(a_outputs, [0, PASSAGE_MAX_LENGTH - 1, 0],
                          [-1, 1, -1])  # 2 * HIDDEN
        q_p_a_hidden = tf.concat(
            2, [q_last, p_last, a_last])  # SHAPE: [BATCH, 1, 4*HIDDEN_DIM]

        preds = list()

        with tf.variable_scope("decoder"):
            d_cell_dim = 4 * HIDDEN_DIM

            # Run decoder with attention between DECODER and PASSAGE with ATTENTION (bet passage and question)
            d_cell = LSTMAttnCell(d_cell_dim, p_outputs, HIDDEN_DIM)
            # d_cell = tf.nn.rnn_cell.LSTMCell(d_cell_dim) # Make decoder cell with hidden dim

            # Create first-time-step input to LSTM (starter token)
            inp = self.add_embedding(
                self.start_token_placeholder
            )  # STARTER TOKEN, SHAPE: [BATCH, EMBEDDING_DIM]

            # make initial state for LSTM cell
            h_0 = tf.reshape(
                q_p_a_hidden,
                [-1, d_cell_dim])  # hidden state from passage and question
            c_0 = tf.reshape(
                tf.zeros((d_cell_dim)),
                [-1, d_cell_dim])  # empty memory SHAPE [BATCH, 2*HIDDEN_DIM]
            h_t = tf.nn.rnn_cell.LSTMStateTuple(c_0, h_0)

            # U and b for manipulating the output from LSTM to logit (LSTM output -> logit)
            U = tf.get_variable(
                'U',
                shape=(d_cell_dim, VOCAB_SIZE),
                initializer=tf.contrib.layers.xavier_initializer(),
                dtype=tf.float32)
            b = tf.get_variable('b', shape=(VOCAB_SIZE, ), dtype=tf.float32)

            for time_step in range(OUTPUT_MAX_LENGTH):
                o_t, h_t = d_cell(inp, h_t)

                o_drop_t = tf.nn.dropout(o_t, self.dropout_placeholder)
                y_t = tf.matmul(o_drop_t, U) + b  # SHAPE: [BATCH, VOCAB_SIZE]
                y_t = tf.nn.softmax(y_t)

                if self.predicting:
                    inp_index = tf.argmax(y_t, 1)
                    inp = tf.nn.embedding_lookup(self.pretrained_embeddings,
                                                 inp_index)
                else:
                    inp = tf.slice(self.answers_placeholder, [0, time_step],
                                   [-1, 1])
                    inp = tf.nn.embedding_lookup(self.pretrained_embeddings,
                                                 inp)
                    inp = tf.reshape(inp, [-1, EMBEDDING_DIM])

                preds.append(y_t)
                tf.get_variable_scope().reuse_variables()

            packed_preds = tf.pack(preds, axis=2)
            preds = tf.transpose(packed_preds, perm=[0, 2, 1])
        return preds
예제 #8
0
    def add_prediction_op(self): 
        questions = self.add_embedding(self.questions_placeholder)
        passages = self.add_embedding(self.passages_placeholder)

        # Question preprocessing encoder
        with tf.variable_scope("question"): 
            q_cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_DIM)
            q_outputs, _ = tf.nn.dynamic_rnn(q_cell, questions, dtype=tf.float32, sequence_length=self.seq_length(self.questions_placeholder))

        # Passage preprocessing encoder
        with tf.variable_scope("passage"): 
            p_cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_DIM)
            p_outputs, _ = tf.nn.dynamic_rnn(p_cell, passages, dtype=tf.float32, sequence_length=self.seq_length(self.passages_placeholder))

        # Match LSTM layer
        # Inputs: hidden sates from Passage preprocessing encoder (p_outputs) [None x PASSAGE_MAX_LENGTH x HIDDEN_DIM]    
        # Encoder_inputs: hidden states from Question preprocessing encoder (q_outputs) [None x QUESTION_MAX_LENGTH x HIDDEN_DIM]
        # Dimension of Match LSTM attention cell: HIDDEN_DIM
        # Dimension of  : HIDDEN_DIM
        # Masking to be done on Match LSTM: same as passage masking (seq_length(self.passages_placeholder))

        match_outputs, _ = self.encode_w_attn(p_outputs, self.seq_length(self.passages_placeholder), q_outputs, scope = "match_LSTM_layer")
 
        # # Attention state encoder
        # with tf.variable_scope("attention"): 
        #     a_cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_DIM)
        #     a_outputs, _ = tf.nn.dynamic_rnn(a_cell, p_outputs, dtype=tf.float32, sequence_length=self.seq_length(self.passages_placeholder))

        q_last = tf.slice(q_outputs, [0, QUESTION_MAX_LENGTH - 1, 0], [-1, 1, -1])
        p_last = tf.slice(p_outputs, [0, PASSAGE_MAX_LENGTH - 1, 0], [-1, 1, -1])
        match_last = tf.slice(match_outputs, [0, PASSAGE_MAX_LENGTH - 1, 0], [-1, 1, -1])
        q_p_match_hidden = tf.concat(2, [q_last, p_last, match_last]) # SHAPE: [BATCH, 1, 3*HIDDEN_DIM]
       
        preds = list()
        
        with tf.variable_scope("decoder"):
            d_cell_dim = 3 * HIDDEN_DIM
            
            # Run decoder with attention between DECODER and MATCH OUTPUTS
            d_cell = LSTMAttnCell(d_cell_dim, match_outputs, HIDDEN_DIM)

            # Create first-time-step input to LSTM (starter token)
            inp = self.add_embedding(self.start_token_placeholder) # STARTER TOKEN, SHAPE: [BATCH, EMBEDDING_DIM]

            # make initial state for LSTM cell
            h_0 = tf.reshape(q_p_match_hidden, [-1, d_cell_dim]) # hidden state from passage and question
            c_0 = tf.reshape(tf.zeros((d_cell_dim)), [-1, d_cell_dim]) # empty memory SHAPE [BATCH, 2*HIDDEN_DIM]
            h_t = tf.nn.rnn_cell.LSTMStateTuple(c_0, h_0)
            
            # U and b for manipulating the output from LSTM to logit (LSTM output -> logit)
            U = tf.get_variable('U', shape=(d_cell_dim, VOCAB_SIZE), initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
            b = tf.get_variable('b', shape=(VOCAB_SIZE, ), dtype=tf.float32)
            
            for time_step in range(OUTPUT_MAX_LENGTH):
                o_t, h_t = d_cell(inp, h_t)

                o_drop_t = tf.nn.dropout(o_t, self.dropout_placeholder)
                y_t = tf.matmul(o_drop_t, U) + b # SHAPE: [BATCH, VOCAB_SIZE]
                y_t = tf.nn.softmax(y_t)
                
                # if self.predicting:
                inp_index = tf.argmax(y_t, 1)
                inp = tf.nn.embedding_lookup(self.pretrained_embeddings, inp_index)
                # else: 
                #     inp = tf.slice(self.answers_placeholder, [0, time_step], [-1, 1]) 
                #     inp = tf.nn.embedding_lookup(self.pretrained_embeddings, inp)
                #     inp = tf.reshape(inp, [-1, EMBEDDING_DIM])

                preds.append(y_t)
                tf.get_variable_scope().reuse_variables()

            packed_preds = tf.pack(preds, axis=2)
            preds = tf.transpose(packed_preds, perm=[0, 2, 1])
        return preds