def encode_w_attn(self, inputs, mask, prev_states, scope="", reuse=False): with tf.variable_scope(scope, reuse): attn_cell = LSTMAttnCell(HIDDEN_DIM, prev_states) o, final_state = tf.nn.dynamic_rnn(attn_cell, inputs, dtype=tf.float32, sequence_length=mask) return (o, final_state)
def encode_w_attn(self, inputs, mask, prev_states, scope="", reuse=False): with tf.variable_scope(scope, reuse): attn_cell = LSTMAttnCell(HIDDEN_DIM, prev_states, HIDDEN_DIM) outputs, final_state = tf.nn.dynamic_rnn(attn_cell, inputs, dtype=tf.float32, sequence_length=mask) hidden_states_list = attn_cell.get_hidden_states() attn_cell.clear_hidden_states() print "length of hidden states list", len(hidden_states_list) packed_hidden_states = tf.pack(hidden_states_list, axis=2) packed_hidden_states = tf.transpose( packed_hidden_states, perm=[0, 2, 1]) # [batch size x SEQ LENGTH x HIDDEN] print "packed hidden states :", packed_hidden_states return (outputs, packed_hidden_states, final_state)
def create_cell(self): encoder_output = tf.ones([TRAIN_BATCH_SIZE, PASSAGE_MAX_LENGTH]) with tf.variable_scope("decode"): d_cell = LSTMAttnCell( HIDDEN_DIM, encoder_output) # Make decoder cell with hidden dim # Make starter token input inp = tf.ones([TRAIN_BATCH_SIZE, 1000]) # STARTER TOKEN, SHAPE: [BATCH, VOCAB_SIZE] # make initial state for LSTM cell h_0 = tf.ones([TRAIN_BATCH_SIZE, HIDDEN_DIM ]) # hidden state from passage and question c_0 = tf.ones([TRAIN_BATCH_SIZE, HIDDEN_DIM ]) # empty memory SHAPE [BATCH, 2*HIDDEN_DIM] h_t = tf.nn.rnn_cell.LSTMStateTuple(c_0, h_0) for time_step in range(OUTPUT_MAX_LENGTH): o_t, h_t = d_cell(inp, h_t) U = tf.get_variable( 'U', shape=(2 * HIDDEN_DIM, VOCAB_SIZE), initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32) b = tf.get_variable('b', shape=(VOCAB_SIZE, ), dtype=tf.float32) o_drop_t = tf.nn.dropout(o_t, self.dropout_placeholder) y_t = tf.matmul(o_drop_t, U) + b # SHAPE: [BATCH, VOCAB_SIZE] inp = y_t preds.append(y_t) tf.get_variable_scope().reuse_variables() packed_preds = tf.pack(preds, axis=2) preds = tf.transpose(packed_preds, perm=[0, 2, 1])
def encode_w_attn(self, inputs, mask, prev_states_fw, prev_states_bw, scope="", reuse=False): self.attn_cell_fw = LSTMAttnCell(HIDDEN_DIM, prev_states_fw) self.attn_cell_bw = LSTMAttnCell(HIDDEN_DIM, prev_states_bw) with tf.variable_scope(scope, reuse): output_tuple, final_state = tf.nn.bidirectional_dynamic_rnn(self.attn_cell_fw, self.attn_cell_bw, inputs, dtype=tf.float32, sequence_length=mask) return (output_tuple, final_state)
def add_prediction_op(self): questions = self.add_embedding(self.questions_placeholder) passages = self.add_embedding(self.passages_placeholder) # Question encoder with tf.variable_scope("question"): q_cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_DIM, activation=ACTIVATION_FUNC) q_outputs, q_final_tuple = tf.nn.dynamic_rnn( q_cell, questions, dtype=tf.float32, sequence_length=self.seq_length(self.questions_placeholder)) q_final_c, q_final_h = q_final_tuple q_final_h = tf.expand_dims(q_final_h, axis=1) # Passage encoder with attention p_outputs, p_final_tuple = self.encode_w_attn( passages, self.seq_length(self.passages_placeholder), q_outputs, scope="passage_attn") p_final_c, p_final_h = p_final_tuple p_final_h = tf.expand_dims(p_final_h, axis=1) # Attention state encoder (Match LSTM layer variant) with tf.variable_scope("attention"): a_cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_DIM, activation=ACTIVATION_FUNC) a_outputs, a_final_tuple = tf.nn.dynamic_rnn( a_cell, p_outputs, dtype=tf.float32, sequence_length=self.seq_length(self.passages_placeholder)) a_final_c, a_final_h = a_final_tuple a_final_h = tf.expand_dims(a_final_h, axis=1) # Concatenation of all final hidden states q_p_a_hidden = tf.concat(2, [q_final_h, p_final_h, a_final_h ]) # SHAPE: [BATCH, 1, 3*HIDDEN_DIM] preds = list() with tf.variable_scope("decoder"): d_cell_dim = 3 * HIDDEN_DIM # Run decoder with attention between DECODER and PASSAGE with ATTENTION (bet passage and question) d_cell = LSTMAttnCell(d_cell_dim, p_outputs, HIDDEN_DIM, activation=ACTIVATION_FUNC) # d_cell = tf.nn.rnn_cell.LSTMCell(d_cell_dim) # Make decoder cell with hidden dim # Create first-time-step input to LSTM (starter token) inp = self.add_embedding( self.start_token_placeholder ) # STARTER TOKEN, SHAPE: [BATCH, EMBEDDING_DIM] # make initial state for LSTM cell h_0 = tf.reshape( q_p_a_hidden, [-1, d_cell_dim]) # hidden state from passage and question c_0 = tf.reshape( tf.zeros((d_cell_dim)), [-1, d_cell_dim]) # empty memory SHAPE [BATCH, 2*HIDDEN_DIM] h_t = tf.nn.rnn_cell.LSTMStateTuple(c_0, h_0) # U and b for manipulating the output from LSTM to logit (LSTM output -> logit) U = tf.get_variable( 'U', shape=(d_cell_dim, VOCAB_SIZE), initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32) b = tf.get_variable('b', shape=(VOCAB_SIZE, ), dtype=tf.float32) for time_step in range(OUTPUT_MAX_LENGTH): o_t, h_t = d_cell(inp, h_t) o_drop_t = tf.nn.dropout(o_t, self.dropout_placeholder) y_t = tf.matmul(o_drop_t, U) + b # SHAPE: [BATCH, VOCAB_SIZE] # limit vocab size to words that we have seen in question or passage and popular words mask = self.get_vocab_masks() y_t = tf.multiply(y_t, mask) inp = tf.nn.softmax(y_t) inp_index = tf.argmax(inp, 1) inp = tf.nn.embedding_lookup(self.pretrained_embeddings, inp_index) preds.append(y_t) tf.get_variable_scope().reuse_variables() packed_preds = tf.pack(preds, axis=2) preds = tf.transpose(packed_preds, perm=[0, 2, 1]) return preds
def add_prediction_op(self): questions = self.add_embedding(self.questions_placeholder) passages = self.add_embedding(self.passages_placeholder) ####### DEBUG PART #### CHECKS OUT # print "##### debugging input embeddings " # print "questions dims : should be [None, ", QUESTION_MAX_LENGTH, ", ", EMBEDDING_DIM, " :", questions # print "passages dims : should be [None, ", PASSAGE_MAX_LENGTH, ", ", EMBEDDING_DIM, " :", passages # Question encoder with tf.variable_scope("question"): q_cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_DIM, activation=ACTIVATION_FUNC) q_outputs, q_final_tuple = tf.nn.dynamic_rnn(q_cell, questions, dtype=tf.float32, sequence_length=self.seq_length(self.questions_placeholder)) q_final_c, q_final_h = q_final_tuple q_final_h = tf.expand_dims(q_final_h, axis=1) ####### DEBUG PART #### print "shape of q_outputs : ", q_outputs print "shape of q_final_h : ", q_final_h print "\n\n##### debugging input embeddings " q_outputs = tf.Print(q_outputs, [q_outputs], message="q_outputs\n", summarize = QUESTION_MAX_LENGTH * HIDDEN_DIM * 3) print "\n\n" q_final_h = tf.Print(q_final_h, [q_final_h], message="q_final_h\n", summarize = TRAIN_BATCH_SIZE * HIDDEN_DIM) print "\n\n" q_final_c = tf.Print(q_final_c, [q_final_c], message="q_final_c\n", summarize = TRAIN_BATCH_SIZE * HIDDEN_DIM) ####################### # Passage encoder with attention p_outputs, p_final_tuple = self.encode_w_attn(passages, self.seq_length(self.passages_placeholder), q_outputs, scope = "passage_attn") p_final_c, p_final_h = p_final_tuple p_final_h = tf.expand_dims(p_final_h, axis=1) # # # # # # # # # # # # # Attention state encoder with tf.variable_scope("attention"): a_cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_DIM, activation=ACTIVATION_FUNC) a_outputs, a_final_tuple = tf.nn.dynamic_rnn(a_cell, p_outputs, dtype=tf.float32, sequence_length=self.seq_length(self.passages_placeholder)) a_final_c, a_final_h = a_final_tuple a_final_h = tf.expand_dims(a_final_h, axis=1) # # # # # # # # # # # # # q_last = tf.slice(q_outputs, [0, QUESTION_MAX_LENGTH - 1, 0], [-1, 1, -1]) # p_last = tf.slice(p_outputs, [0, PASSAGE_MAX_LENGTH - 1, 0], [-1, 1, -1]) # a_last = tf.slice(a_outputs, [0, PASSAGE_MAX_LENGTH - 1, 0], [-1, 1, -1]) q_p_a_hidden = tf.concat(2, [q_final_h, p_final_h, a_final_h]) # SHAPE: [BATCH, 1, 3*HIDDEN_DIM] ####### DEBUG PART #### # print "\n\n##### debugging sliced q_last " # print "sliced q_last shape: ", q_last # q_last = tf.Print(q_last, [q_last], message="q_last", summarize = 10) q_p_a_hidden = tf.Print(q_p_a_hidden, [q_p_a_hidden], message="q_p_a_hidden", summarize = TRAIN_BATCH_SIZE * HIDDEN_DIM * 3) ####################### preds = list() with tf.variable_scope("decoder"): d_cell_dim = 3 * HIDDEN_DIM # Run decoder with attention between DECODER and PASSAGE with ATTENTION (bet passage and question) d_cell = LSTMAttnCell(d_cell_dim, p_outputs, HIDDEN_DIM, activation=ACTIVATION_FUNC) # Create first-time-step input to LSTM (starter token) inp = self.add_embedding(self.start_token_placeholder) # STARTER TOKEN, SHAPE: [BATCH, EMBEDDING_DIM] # make initial state for LSTM cell h_0 = tf.reshape(q_p_a_hidden, [-1, d_cell_dim]) # hidden state from passage and question c_0 = tf.reshape(tf.zeros((d_cell_dim)), [-1, d_cell_dim]) # empty memory SHAPE [BATCH, 2*HIDDEN_DIM] h_t = tf.nn.rnn_cell.LSTMStateTuple(c_0, h_0) # U and b for manipulating the output from LSTM to logit (LSTM output -> logit) U = tf.get_variable('U', shape=(d_cell_dim, VOCAB_SIZE), initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32) b = tf.get_variable('b', shape=(VOCAB_SIZE, ), dtype=tf.float32) ####### DEBUG PART #### THIS PART CHECKS OUT # print "\n\n##### debugging decoder " # inp = tf.Print(inp, [inp], message = "starter token input : \n", summarize = EMBEDDING_DIM + 50) # h_0 = tf.Print(h_0, [h_0], message = "h_0 : \n", summarize = TRAIN_BATCH_SIZE * 3 * HIDDEN_DIM) # print "U : ", U # print "b : ", b ####################### for time_step in range(OUTPUT_MAX_LENGTH): o_t, h_t = d_cell(inp, h_t) o_drop_t = tf.nn.dropout(o_t, self.dropout_placeholder) y_t = tf.matmul(o_drop_t, U) + b # SHAPE: [BATCH, VOCAB_SIZE] # y_t = tf.Print(y_t, [y_t], message="y_t : \n", summarize = 500) # limit vocab size to words that we have seen in question or passage and popular words mask = self.get_vocab_masks() # mask = tf.Print(mask, [mask], message="mask : \n", summarize = 500) y_t = tf.multiply(y_t, mask) y_t = tf.Print(y_t, [y_t], message="post mask y_t : \n", summarize = 500) y_t = tf.nn.softmax(y_t) y_t = tf.Print(y_t, [y_t], message="post softmax y_t : \n", summarize = 500) if self.predicting: inp_index = tf.argmax(y_t, 1) inp = tf.nn.embedding_lookup(self.pretrained_embeddings, inp_index) else: inp = tf.slice(self.answers_placeholder, [0, time_step], [-1, 1]) inp = tf.nn.embedding_lookup(self.pretrained_embeddings, inp) inp = tf.reshape(inp, [-1, EMBEDDING_DIM]) inp = tf.Print(inp, [inp], message="inp : \n", summarize = 500) preds.append(y_t) tf.get_variable_scope().reuse_variables() packed_preds = tf.pack(preds, axis=2) preds = tf.transpose(packed_preds, perm=[0, 2, 1]) return preds
def add_prediction_op(self): questions = self.add_embedding(self.questions_placeholder) passages = self.add_embedding(self.passages_placeholder) # Question encoder with tf.variable_scope("question"): q_cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_DIM) q_outputs, _ = tf.nn.dynamic_rnn(q_cell, questions, dtype=tf.float32, sequence_length=self.seq_length( self.questions_placeholder)) # Passage encoder with attention p_outputs, p_hs, _ = self.encode_w_attn(passages, self.seq_length( self.passages_placeholder), q_outputs, scope="passage_attn") print "passage encoder with attention output shape :", p_outputs h_tilda_and_h = tf.concat(2, [p_outputs, p_hs]) print "concatenated h tilda nad h :", h_tilda_and_h # with tf.variable_scope("passage"): # p_cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_DIM) # p_outputs, p_state_tuple = tf.nn.dynamic_rnn(p_cell, passages, initial_state=q_state_tuple, dtype=tf.float32, sequence_length=self.seq_length(passages)) # Attention state encoder with tf.variable_scope("attention"): a_cell = tf.nn.rnn_cell.LSTMCell(2 * HIDDEN_DIM) a_outputs, _ = tf.nn.dynamic_rnn(a_cell, h_tilda_and_h, dtype=tf.float32, sequence_length=self.seq_length( self.passages_placeholder)) q_last = tf.slice(q_outputs, [0, QUESTION_MAX_LENGTH - 1, 0], [-1, 1, -1]) # HIDDEN p_last = tf.slice(p_outputs, [0, PASSAGE_MAX_LENGTH - 1, 0], [-1, 1, -1]) # HIDDEN a_last = tf.slice(a_outputs, [0, PASSAGE_MAX_LENGTH - 1, 0], [-1, 1, -1]) # 2 * HIDDEN q_p_a_hidden = tf.concat( 2, [q_last, p_last, a_last]) # SHAPE: [BATCH, 1, 4*HIDDEN_DIM] preds = list() with tf.variable_scope("decoder"): d_cell_dim = 4 * HIDDEN_DIM # Run decoder with attention between DECODER and PASSAGE with ATTENTION (bet passage and question) d_cell = LSTMAttnCell(d_cell_dim, p_outputs, HIDDEN_DIM) # d_cell = tf.nn.rnn_cell.LSTMCell(d_cell_dim) # Make decoder cell with hidden dim # Create first-time-step input to LSTM (starter token) inp = self.add_embedding( self.start_token_placeholder ) # STARTER TOKEN, SHAPE: [BATCH, EMBEDDING_DIM] # make initial state for LSTM cell h_0 = tf.reshape( q_p_a_hidden, [-1, d_cell_dim]) # hidden state from passage and question c_0 = tf.reshape( tf.zeros((d_cell_dim)), [-1, d_cell_dim]) # empty memory SHAPE [BATCH, 2*HIDDEN_DIM] h_t = tf.nn.rnn_cell.LSTMStateTuple(c_0, h_0) # U and b for manipulating the output from LSTM to logit (LSTM output -> logit) U = tf.get_variable( 'U', shape=(d_cell_dim, VOCAB_SIZE), initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32) b = tf.get_variable('b', shape=(VOCAB_SIZE, ), dtype=tf.float32) for time_step in range(OUTPUT_MAX_LENGTH): o_t, h_t = d_cell(inp, h_t) o_drop_t = tf.nn.dropout(o_t, self.dropout_placeholder) y_t = tf.matmul(o_drop_t, U) + b # SHAPE: [BATCH, VOCAB_SIZE] y_t = tf.nn.softmax(y_t) if self.predicting: inp_index = tf.argmax(y_t, 1) inp = tf.nn.embedding_lookup(self.pretrained_embeddings, inp_index) else: inp = tf.slice(self.answers_placeholder, [0, time_step], [-1, 1]) inp = tf.nn.embedding_lookup(self.pretrained_embeddings, inp) inp = tf.reshape(inp, [-1, EMBEDDING_DIM]) preds.append(y_t) tf.get_variable_scope().reuse_variables() packed_preds = tf.pack(preds, axis=2) preds = tf.transpose(packed_preds, perm=[0, 2, 1]) return preds
def add_prediction_op(self): questions = self.add_embedding(self.questions_placeholder) passages = self.add_embedding(self.passages_placeholder) # Question preprocessing encoder with tf.variable_scope("question"): q_cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_DIM) q_outputs, _ = tf.nn.dynamic_rnn(q_cell, questions, dtype=tf.float32, sequence_length=self.seq_length(self.questions_placeholder)) # Passage preprocessing encoder with tf.variable_scope("passage"): p_cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_DIM) p_outputs, _ = tf.nn.dynamic_rnn(p_cell, passages, dtype=tf.float32, sequence_length=self.seq_length(self.passages_placeholder)) # Match LSTM layer # Inputs: hidden sates from Passage preprocessing encoder (p_outputs) [None x PASSAGE_MAX_LENGTH x HIDDEN_DIM] # Encoder_inputs: hidden states from Question preprocessing encoder (q_outputs) [None x QUESTION_MAX_LENGTH x HIDDEN_DIM] # Dimension of Match LSTM attention cell: HIDDEN_DIM # Dimension of : HIDDEN_DIM # Masking to be done on Match LSTM: same as passage masking (seq_length(self.passages_placeholder)) match_outputs, _ = self.encode_w_attn(p_outputs, self.seq_length(self.passages_placeholder), q_outputs, scope = "match_LSTM_layer") # # Attention state encoder # with tf.variable_scope("attention"): # a_cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_DIM) # a_outputs, _ = tf.nn.dynamic_rnn(a_cell, p_outputs, dtype=tf.float32, sequence_length=self.seq_length(self.passages_placeholder)) q_last = tf.slice(q_outputs, [0, QUESTION_MAX_LENGTH - 1, 0], [-1, 1, -1]) p_last = tf.slice(p_outputs, [0, PASSAGE_MAX_LENGTH - 1, 0], [-1, 1, -1]) match_last = tf.slice(match_outputs, [0, PASSAGE_MAX_LENGTH - 1, 0], [-1, 1, -1]) q_p_match_hidden = tf.concat(2, [q_last, p_last, match_last]) # SHAPE: [BATCH, 1, 3*HIDDEN_DIM] preds = list() with tf.variable_scope("decoder"): d_cell_dim = 3 * HIDDEN_DIM # Run decoder with attention between DECODER and MATCH OUTPUTS d_cell = LSTMAttnCell(d_cell_dim, match_outputs, HIDDEN_DIM) # Create first-time-step input to LSTM (starter token) inp = self.add_embedding(self.start_token_placeholder) # STARTER TOKEN, SHAPE: [BATCH, EMBEDDING_DIM] # make initial state for LSTM cell h_0 = tf.reshape(q_p_match_hidden, [-1, d_cell_dim]) # hidden state from passage and question c_0 = tf.reshape(tf.zeros((d_cell_dim)), [-1, d_cell_dim]) # empty memory SHAPE [BATCH, 2*HIDDEN_DIM] h_t = tf.nn.rnn_cell.LSTMStateTuple(c_0, h_0) # U and b for manipulating the output from LSTM to logit (LSTM output -> logit) U = tf.get_variable('U', shape=(d_cell_dim, VOCAB_SIZE), initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32) b = tf.get_variable('b', shape=(VOCAB_SIZE, ), dtype=tf.float32) for time_step in range(OUTPUT_MAX_LENGTH): o_t, h_t = d_cell(inp, h_t) o_drop_t = tf.nn.dropout(o_t, self.dropout_placeholder) y_t = tf.matmul(o_drop_t, U) + b # SHAPE: [BATCH, VOCAB_SIZE] y_t = tf.nn.softmax(y_t) # if self.predicting: inp_index = tf.argmax(y_t, 1) inp = tf.nn.embedding_lookup(self.pretrained_embeddings, inp_index) # else: # inp = tf.slice(self.answers_placeholder, [0, time_step], [-1, 1]) # inp = tf.nn.embedding_lookup(self.pretrained_embeddings, inp) # inp = tf.reshape(inp, [-1, EMBEDDING_DIM]) preds.append(y_t) tf.get_variable_scope().reuse_variables() packed_preds = tf.pack(preds, axis=2) preds = tf.transpose(packed_preds, perm=[0, 2, 1]) return preds