def sent_level_attention(self): with tf.variable_scope('sent-level') as scope: sent_inputs = tf.reshape(self.word_outputs, [-1, self.max_sent_length, 2 * self.cell_dim]) # sentence encoder cell_fw = rnn.GRUCell(self.cell_dim, name='cell_fw') cell_bw = rnn.GRUCell(self.cell_dim, name='cell_bw') init_state_fw = tf.tile(tf.get_variable('init_state_fw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(sent_inputs)[0], 1]) init_state_bw = tf.tile(tf.get_variable('init_state_bw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(sent_inputs)[0], 1]) rnn_outputs, _ = bidirectional_rnn(cell_fw=cell_fw, cell_bw=cell_bw, inputs=sent_inputs, input_lengths=self.sent_lengths, initial_state_fw=init_state_fw, initial_state_bw=init_state_bw, scope=scope) sent_outputs, sent_att_weights = attention(inputs=rnn_outputs, att_dim=self.att_dim, sequence_lengths=self.sent_lengths) self.sent_outputs = tf.layers.dropout(sent_outputs, self.dropout_rate, training=self.is_training)
def _init_word_encoder(self): ''' Build Word Encoder part as in the paper :return: ''' with tf.variable_scope('word-encoder') as scope: # collapses num docs,num of sentences and creates (number sentences, number words,embedding) # treats each sentece independent of docs, sentence location word_inputs = tf.reshape(self.embedded_inputs, [-1, self.max_word_length, self.emb_size]) # containing the length of each sentence word_lengths = tf.reshape(self.word_lengths, [-1]) # define forward and backword GRU cells cell_fw = rnn.GRUCell(self.cell_dim, name='cell_fw') cell_bw = rnn.GRUCell(self.cell_dim, name='cell_bw') # initialize state of forward GRU cell as 0's, for each sentence in batch init_state_fw = tf.tile(tf.get_variable( 'init_state_fw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(word_inputs)[0], 1]) # same but for backward GRU cell init_state_bw = tf.tile(tf.get_variable( 'init_state_bw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(word_inputs)[0], 1]) # bidirectional_rnn returns outputs, state; why do we keep the output and not hidden state??? rnn_outputs, _ = bidirectional_rnn(cell_fw=cell_fw, cell_bw=cell_bw, inputs=word_inputs, input_lengths=word_lengths, initial_state_fw=init_state_fw, initial_state_bw=init_state_bw, scope=scope) # rnn_outputs.shape = [number sentences, number words, 2*self.cell_dim] # word_outputs sentence vectors, word_att_weights alpha # output dim for word_outputs (num sentences,1,2* hidden state cell dim); sentence vectors as in paper word_outputs, word_att_weights = attention( inputs=rnn_outputs, att_dim=self.att_dim, sequence_lengths=word_lengths) # apply dropout, only activate during training self.word_outputs = tf.layers.dropout(word_outputs, self.dropout_rate, training=self.is_training)
def _init_sent_encoder(self): ''' Build Sentence Encoder part as in the paper :return: ''' with tf.variable_scope('sent-encoder') as scope: # input shape: (number docs, max sentence per document, 2*cell_dim) sent_inputs = tf.reshape( self.word_outputs, [-1, self.max_sent_length, 2 * self.cell_dim]) # sentence encoder cell_fw = rnn.GRUCell(self.cell_dim, name='cell_fw') cell_bw = rnn.GRUCell(self.cell_dim, name='cell_bw') # for each document get the hidden state array init_state_fw = tf.tile(tf.get_variable( 'init_state_fw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(sent_inputs)[0], 1]) init_state_bw = tf.tile(tf.get_variable( 'init_state_bw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(sent_inputs)[0], 1]) rnn_outputs, _ = bidirectional_rnn(cell_fw=cell_fw, cell_bw=cell_bw, inputs=sent_inputs, input_lengths=self.sent_lengths, initial_state_fw=init_state_fw, initial_state_bw=init_state_bw, scope=scope) # rnn_outputs.shape = [num docs, number sentences, 2*self.cell_dim] # Returns document vectors # output dim for word_outputs (num docs,1,2* hidden state cell dim); sentence vectors as in paper sent_outputs, sent_att_weights = attention( inputs=rnn_outputs, att_dim=self.att_dim, sequence_lengths=self.sent_lengths) #dropout self.sent_outputs = tf.layers.dropout(sent_outputs, self.dropout_rate, training=self.is_training)
def _init_inter_review_encoder(self): # reviews encoding with tf.variable_scope('inter-review-encoder') as scope: review_inputs = tf.reshape( self.sent_outputs, [-1, self.max_review_length, 4 * self.emb_size]) sent_inputs_mask_temp = tf.cast(self.docs, tf.bool) sent_inputs_mask = tf.reduce_any(sent_inputs_mask_temp, reduction_indices=[3]) review_inputs_mask = tf.reduce_any(sent_inputs_mask, reduction_indices=[2]) # reviews GRU encoder cell_fw = rnn.GRUCell(self.cell_dim, name='cell_fw') cell_bw = rnn.GRUCell(self.cell_dim, name='cell_bw') init_state_fw = tf.tile(tf.get_variable( 'init_state_fw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(review_inputs)[0], 1]) init_state_bw = tf.tile(tf.get_variable( 'init_state_bw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(review_inputs)[0], 1]) rnn_outputs, _ = bidirectional_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=review_inputs, input_lengths=self.review_lengths, initial_state_fw=init_state_fw, initial_state_bw=init_state_bw, scope=scope) reviews_encoding = disan(rnn_outputs, review_inputs_mask, 'DiSAN', self.dropout_rate, self.is_training, 0., 'elu', None, 'reviews-encoding') self.review_outputs = reviews_encoding
def _init_sent_encoder(self): with tf.variable_scope('sentence') as scope: sentence_rnn_inputs = tf.reshape( self.word_outputs, [-1, self.max_num_sents, 2 * self.hidden_dim]) # sentence encoder cell_fw = rnn.GRUCell(self.hidden_dim) cell_bw = rnn.GRUCell(self.hidden_dim) init_state_fw = tf.tile( tf.get_variable('init_state_fw', shape=[1, self.hidden_dim], initializer=tf.constant_initializer(1.0)), multiples=[get_shape(sentence_rnn_inputs)[0], 1]) init_state_bw = tf.tile( tf.get_variable('init_state_bw', shape=[1, self.hidden_dim], initializer=tf.constant_initializer(1.0)), multiples=[get_shape(sentence_rnn_inputs)[0], 1]) sentence_rnn_outputs, _ = bidirectional_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=sentence_rnn_inputs, input_lengths=self.document_lengths, initial_state_fw=init_state_fw, initial_state_bw=init_state_bw, scope=scope) self.sentence_outputs, self.sent_att_weights, self.img_att_weights = visual_aspect_attention( text_input=sentence_rnn_outputs, visual_input=self.images, att_dim=self.att_dim, sequence_lengths=self.document_lengths) self.sentence_outputs = tf.nn.dropout( self.sentence_outputs, keep_prob=self.dropout_keep_prob)
def _init_word_encoder(self): with tf.variable_scope('word') as scope: word_rnn_inputs = tf.reshape( self.embedded_inputs, [-1, self.max_num_words, self.emb_size]) sentence_lengths = tf.reshape(self.sentence_lengths, [-1]) # word encoder cell_fw = rnn.GRUCell(self.hidden_dim) cell_bw = rnn.GRUCell(self.hidden_dim) init_state_fw = tf.tile( tf.get_variable('init_state_fw', shape=[1, self.hidden_dim], initializer=tf.constant_initializer(1.0)), multiples=[get_shape(word_rnn_inputs)[0], 1]) init_state_bw = tf.tile( tf.get_variable('init_state_bw', shape=[1, self.hidden_dim], initializer=tf.constant_initializer(1.0)), multiples=[get_shape(word_rnn_inputs)[0], 1]) word_rnn_outputs, _ = bidirectional_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=word_rnn_inputs, input_lengths=sentence_lengths, initial_state_fw=init_state_fw, initial_state_bw=init_state_bw, scope=scope) self.word_outputs, self.word_att_weights = text_attention( inputs=word_rnn_outputs, att_dim=self.att_dim, sequence_lengths=sentence_lengths) self.word_outputs = tf.nn.dropout(self.word_outputs, keep_prob=self.dropout_keep_prob)
def _init_word_encoder(self): with tf.variable_scope('word-encoder') as scope: word_inputs = tf.reshape(self.embedded_inputs, [-1, self.max_word_length, self.emb_size]) word_lengths = tf.reshape(self.word_lengths, [-1]) # word encoder cell_fw = rnn.GRUCell(self.cell_dim, name='cell_fw') cell_bw = rnn.GRUCell(self.cell_dim, name='cell_bw') init_state_fw = tf.tile(tf.get_variable( 'init_state_fw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(word_inputs)[0], 1]) init_state_bw = tf.tile(tf.get_variable( 'init_state_bw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(word_inputs)[0], 1]) rnn_outputs, _ = bidirectional_rnn(cell_fw=cell_fw, cell_bw=cell_bw, inputs=word_inputs, input_lengths=word_lengths, initial_state_fw=init_state_fw, initial_state_bw=init_state_bw, scope=scope) word_outputs, word_att_weights = attention( inputs=rnn_outputs, att_dim=self.att_dim, sequence_lengths=word_lengths) self.word_outputs = tf.layers.dropout(word_outputs, self.dropout_rate, training=self.is_training)
def __init__(self, batch_size=None, learning_rate=None, load_glove=True, is_training=True): # if batch size is not specified, default to value in hyperparams.py self.batch_size = batch_size or Hp.batch_size self.learning_rate = learning_rate or Hp.learning_rate # TODO: implement handling of character embedding # load pre-trained GloVe dictionary; create embedding matrix if 'word_matrix.npy' not in os.listdir('data'): word_glove = glove_dict(Hp.glove_word) if load_glove else {} word_matrix = embedding_matrix(word_glove, 'word') else: word_matrix = np.load('./data/word_matrix.npy') # input placeholders (integer encoded sentences) & labels with tf.variable_scope('inputs'): self.p_word_inputs = tf.placeholder( tf.int32, [self.batch_size, Hp.max_p_words], 'p_words') self.q_word_inputs = tf.placeholder( tf.int32, [self.batch_size, Hp.max_q_words], 'q_words') self.labels = tf.placeholder(tf.int32, [self.batch_size, 2], 'labels') # input length placeholders (actual non-padded length of each sequence in batch; dictates length of unrolling) with tf.variable_scope('seq_lengths'): self.p_word_lengths = tf.placeholder(tf.int32, [self.batch_size], 'p_words') self.q_word_lengths = tf.placeholder(tf.int32, [self.batch_size], 'q_words') # create tensor for word embedding matrix, lookup GloVe embeddings of inputs with tf.variable_scope('initial_embeddings'): self.word_matrix = tf.Variable(tf.constant(word_matrix, dtype=tf.float32), trainable=False, name='word_matrix') self.p_word_embeds = tf.nn.embedding_lookup(self.word_matrix, self.p_word_inputs, name='p_word_embeds') self.q_word_embeds = tf.nn.embedding_lookup(self.word_matrix, self.q_word_inputs, name='q_word_embeds') # encode both paragraph & question using bi-directional RNN with tf.variable_scope('p_encodings'): self.p_encodings, states = bidirectional_rnn( self.p_word_embeds, self.p_word_lengths, Hp.rnn1_cell, Hp.rnn1_layers, Hp.rnn1_units, Hp.rnn1_dropout, is_training) with tf.variable_scope('q_encodings'): self.q_encodings, _ = bidirectional_rnn( self.q_word_embeds, self.q_word_lengths, Hp.rnn1_cell, Hp.rnn1_layers, Hp.rnn1_units, Hp.rnn1_dropout, is_training) # proofread questions by attending over itself with tf.variable_scope('q_proofread'): self.q_pr_out, _, self.q_pr_attn = attention_alignment( self.q_encodings, self.q_word_lengths, self.q_encodings, self.q_word_lengths, Hp.attn_layers, Hp.attn_units, Hp.attn_dropout, Hp.attn_cell, Hp.attn_mech, is_training) # create question-aware paragraph encoding using bi-directional RNN with attention with tf.variable_scope('q_aware_encoding'): self.pq_encoding, _, self.p2q_attn = attention_alignment( self.p_encodings, self.p_word_lengths, self.q_pr_out, self.q_word_lengths, Hp.attn_layers, Hp.attn_units, Hp.attn_dropout, Hp.attn_cell, Hp.attn_mech, is_training) # create paragraph encoding with self-matching attention # TODO: if decoder is uni-directional, which hidden state from BiRNN should be fed to initial state? with tf.variable_scope('self_matching'): self.pp_encoding, _, self.p2p_attn = attention_alignment( self.pq_encoding, self.p_word_lengths, self.pq_encoding, self.p_word_lengths, Hp.attn_layers, Hp.attn_units, Hp.attn_dropout, Hp.attn_cell, Hp.attn_mech, is_training) # find pointers (in paragraph) to beginning and end of answer to question with tf.variable_scope('pointer_net'): self.pointer_prob = pointer_net(self.pp_encoding, self.p_word_lengths, 2, self.word_matrix, Hp.ptr_cell, Hp.ptr_layers, Hp.ptr_units, Hp.ptr_dropout, is_training) self.pointers = tf.unstack( tf.argmax(self.pointer_prob, axis=2, output_type=tf.int32)) # compute loss function with tf.variable_scope('loss'): loss = tf.zeros(()) pointers = tf.unstack(self.pointer_prob) labels = tf.unstack(self.labels, axis=1) equal = [] for i in range(2): loss += tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels[i], logits=pointers[i]) equal.append(tf.equal(self.pointers[i], labels[i])) self.loss = tf.reduce_mean(loss) self.correct = tf.cast(tf.stack(equal), tf.float32) self.all_correct = tf.cast( tf.equal(tf.reduce_sum(self.correct, axis=0), 2), tf.float32) self.exact_match = tf.reduce_mean(self.all_correct) self.train_step = tf.train.AdamOptimizer( self.learning_rate).minimize(self.loss)