def _encode(self): """ Employs two Bi-LSTMs to encode passage and question separately """ # hidden_size with tf.variable_scope('passage_encoding'): self.sep_p_encodes, _ = rnn('bi-oncell', self.p_emb, self.p_length, self.hidden_size) with tf.variable_scope('question_encoding'): self.sep_q_encodes, _ = rnn('bi-oncell', self.q_emb, self.q_length, self.hidden_size) self.sep_p_encodes1 = tf.concat([self.p_emb, self.sep_p_encodes], -1) self.sep_q_encodes1 = tf.concat([self.q_emb, self.sep_q_encodes], -1) self.sep_p_encodes = tc.layers.fully_connected( self.sep_p_encodes1, num_outputs=self.hidden_size * 2, activation_fn=tf.nn.elu) self.sep_q_encodes = tc.layers.fully_connected( self.sep_q_encodes1, num_outputs=self.hidden_size * 2, activation_fn=tf.nn.elu) if self.use_dropout: self.sep_p_encodes = tf.nn.dropout(self.sep_p_encodes, self.dropout_keep_prob) self.sep_q_encodes = tf.nn.dropout(self.sep_q_encodes, self.dropout_keep_prob) print(self.sep_p_encodes, tf.shape(self.sep_p_encodes), self.sep_q_encodes, tf.shape(self.sep_q_encodes))
def _fuse(self): """ Employs Bi-LSTM again to fuse the context information after match layer """ with tf.variable_scope('fusion'): self.fuse_passage_question_encodes, _ = rnn('bi-lstm', self.match_p_encodes, self.p_length, self.hidden_size, layer_num=1) match_layer2 = AttentionFlowMatchLayer2(self.hidden_size) self.passage_attention, _ = match_layer2.match( self.fuse_passage_question_encodes) with tf.variable_scope('self-attention'): self.fuse_p_encodes, _ = rnn('bi-lstm', self.passage_attention, self.p_length, self.hidden_size, layer_num=1) self.fuse_p_encodes = ln(self.fuse_p_encodes + self.fuse_passage_question_encodes) if self.use_dropout: self.fuse_p_encodes = tf.nn.dropout(self.fuse_p_encodes, 1 - self.dropout)
def _encode(self): """ Employs two Bi-LSTMs to encode passage and question separately """ with tf.variable_scope('passage_char_encoding'): shapes = self.p_char_emb.get_shape().as_list()#[batch_size , sequence_len_count_by_word , num_of_chars_in_cur_word , char_emb_dim] _ , self.sep_p_char_encodes = rnn('bi-gru' , inputs = tf.reshape(self.p_char_emb , (shapes[0] * shapes[1], shapes[2], -1)), length = tf.reshape(self.p_char_length , (shapes[0] * shapes[1] , )), hidden_size = self.char_hidden_size) #在前的是各个时间步的隐层状态,在后的是最后一个时间步的隐层状态 self.sep_p_char_encodes = tf.reshape(self.sep_p_char_encodes , (shapes[0] , shapes[1] , -1)) with tf.variable_scope('question_char_encoding'): shapes = self.q_char_emb.get_shape().as_list() _ , self.sep_q_char_encodes = rnn('bi-gru' , inputs = tf.reshape(self.p_char_emb , (shapes[0] * shapes[1], shapes[2], -1)), length = tf.reshape(self.q_char_length , (shapes[0] * shapes[1] , )), hidden_size = self.char_hidden_size) self.sep_q_char_encodes = tf.reshape(self.sep_q_char_encodes , (shapes[0] , shapes[1] , -1)) self.p_emb = tf.concat([self.p_emb , self.sep_p_char_encodes] , axis = 2) self.q_emb = tf.concat([self.q_emb , self.sep_q_char_encodes] , axis = 2) with tf.variable_scope('passage_encoding'): self.sep_p_encodes, _ = rnn('bi-lstm', self.p_emb, self.p_length, self.hidden_size) with tf.variable_scope('question_encoding'): self.sep_q_encodes, _ = rnn('bi-lstm', self.q_emb, self.q_length, self.hidden_size) if self.use_dropout: self.sep_p_encodes = tf.nn.dropout(self.sep_p_encodes, self.dropout_keep_prob) self.sep_q_encodes = tf.nn.dropout(self.sep_q_encodes, self.dropout_keep_prob)
def _fuse(self): """ Employs Bi-LSTM again to fuse the context information after match layer """ with tf.variable_scope('fusion1'): self.fuse_p_encodes, _ = rnn('bi-lstm', self.match_p_encodes, self.p_length, self.hidden_size, layer_num=1) with tf.variable_scope('fusion2'): sim_matrix = tf.matmul(self.fuse_p_encodes, self.fuse_p_encodes, transpose_b=True) sim_matrix /= self.hidden_size**0.5 context2contex_attn = tf.matmul(tf.nn.softmax(sim_matrix, -1), self.fuse_p_encodes) concat_outputs = tf.concat( [self.fuse_p_encodes, context2contex_attn], -1) self.fuse_p_encodes2, _ = rnn('bi-lstm', concat_outputs, self.p_length, self.hidden_size, layer_num=1) self.fuse_p_encodes3 = tf.contrib.layers.layer_norm( self.fuse_p_encodes + self.fuse_p_encodes2) if self.use_dropout: self.fuse_p_encodes3 = tf.nn.dropout(self.fuse_p_encodes3, 1 - self.dropout)
def _encode(self): """ Employs two Bi-LSTMs to encode passage and question separately """ with tf.variable_scope('encoding',reuse=tf.AUTO_REUSE): self.sep_p_encodes, _ = rnn('bi-lstm', self.p_emb, self.p_length, self.hidden_size,layer_num=2,dropout_keep_prob=1-self.dropout) self.sep_q_encodes, _ = rnn('bi-lstm', self.q_emb, self.q_length, self.hidden_size,layer_num=2,dropout_keep_prob=1-self.dropout)
def _char_encode(self): """ Encoding char_embedding so as to align with word_embedding """ with tf.variable_scope('passage_char_encoding'): shapes = tf.shape(self.p_char_emb) p_char_emb = tf.reshape( self.p_char_emb, (shapes[0] * shapes[1], shapes[2], self.char_vocab.embed_dim)) p_char_length = tf.reshape(self.p_char_length, [-1]) self.p_char_encodes, self.p_char_state = rnn( 'bi-lstm', p_char_emb, p_char_length, self.hidden_size) self.p_char_state = tf.reshape( tf.concat(self.p_char_state, axis=1), (shapes[0], shapes[1], self.hidden_size * 2)) with tf.variable_scope('question_char_encoding'): shapes = tf.shape(self.q_char_emb) q_char_emb = tf.reshape( self.q_char_emb, (shapes[0] * shapes[1], shapes[2], self.char_vocab.embed_dim)) q_char_length = tf.reshape(self.q_char_length, [-1]) self.q_char_encodes, self.q_char_state = rnn( 'bi-lstm', q_char_emb, q_char_length, self.hidden_size) self.q_char_state = tf.reshape( tf.concat(self.q_char_state, axis=1), (shapes[0], shapes[1], self.hidden_size * 2)) if self.use_dropout: self.p_char_state = tf.nn.dropout(self.p_char_state, self.dropout_keep_prob) self.q_char_state = tf.nn.dropout(self.q_char_state, self.dropout_keep_prob)
def _fuse(self): """ Employs Bi-LSTM again to fuse the context information after match layer """ with tf.variable_scope('fusion1'): self.fuse_p_encodes, _ = rnn('bi-lstm', self.match_p_encodes, self.p_length, self.hidden_size, layer_num=1) if self.use_dropout: self.fuse_p_encodes = tf.nn.dropout(self.fuse_p_encodes, 1 - self.dropout) with tf.variable_scope('fusion2'): sim_matrix = tf.matmul(self.fuse_p_encodes, self.fuse_p_encodes, transpose_b=True) sim_matrix /= self.hidden_size**0.5 context2contex_attn = tf.matmul(tf.nn.softmax(sim_matrix, -1), self.fuse_p_encodes) concat_outputs = tf.concat( [self.fuse_p_encodes, context2contex_attn], -1) dim = concat_outputs.get_shape().as_list()[-1] gate = tf.nn.sigmoid(dense(concat_outputs, dim, use_bias=False)) gate_output = concat_outputs * gate self.fuse_p_encodes2, _ = rnn('bi-lstm', gate_output, self.p_length, self.hidden_size, layer_num=1) if self.use_dropout: self.fuse_p_encodes2 = tf.nn.dropout(self.fuse_p_encodes2, 1 - self.dropout)
def _fuse(self): """ Employs Bi-LSTM again to fuse the context information after match layer """ with tf.variable_scope('fusion1'): self.fuse_p_encodes, _ = rnn('bi-lstm', self.match_p_encodes, self.p_length,self.hidden_size,layer_num=1) with tf.variable_scope('fusion2'): d_inputs = dropout(self.fuse_p_encodes, keep_prob=1-self.dropout) d_memory = dropout(self.fuse_p_encodes, keep_prob=1-self.dropout) JX = tf.shape(self.fuse_p_encodes)[1] inputs_ = tf.nn.relu( dense(d_inputs, self.hidden_size, use_bias=False, scope="inputs")) memory_ = tf.nn.relu( dense(d_memory, self.hidden_size, use_bias=False, scope="memory")) sim_matrix = tf.matmul(inputs_, memory_, transpose_b=True) sim_matrix /= self.hidden_size ** 0.5 mask_c = tf.tile(tf.expand_dims(self.c_mask, axis=1), [1, JX, 1]) context2contex_attn = tf.matmul(tf.nn.softmax(softmax_mask(sim_matrix,mask_c), -1), self.fuse_p_encodes) concat_outputs=tf.concat([self.fuse_p_encodes,context2contex_attn],-1) dim = concat_outputs.get_shape().as_list()[-1] d_concat_outputs = dropout(concat_outputs, keep_prob=1-self.dropout) gate = tf.nn.sigmoid(dense(d_concat_outputs, dim, use_bias=False)) gate_output=concat_outputs * gate self.fuse_p_encodes2, _ = rnn('bi-lstm', gate_output, self.p_length,self.hidden_size, layer_num=1)
def _fuse(self): """ Employs Bi-LSTM again to fuse the context information after match layer """ with tf.variable_scope('p_fusion'): self.fuse_p_encodes, self.fuse_P = rnn('bi-lstm', self.match_p_encodes, self.p_length, self.hidden_size, layer_num=1) if self.use_dropout: self.fuse_p_encodes = tf.nn.dropout(self.fuse_p_encodes, self.dropout_keep_prob) self.fuse_P = tf.nn.dropout(self.fuse_P, self.dropout_keep_prob) with tf.variable_scope('t_fusion'): self.fuse_t_encodes, self.fuse_T = rnn('bi-lstm', self.match_t_encodes, self.t_length, self.hidden_size, layer_num=1) if self.use_dropout: self.fuse_t_encodes = tf.nn.dropout(self.fuse_t_encodes, self.dropout_keep_prob) self.fuse_T = tf.nn.dropout(self.fuse_T, self.dropout_keep_prob)
def _shared_paramater_encoder(self): with tf.variable_scope('shared_paramater_encoder', reuse=tf.AUTO_REUSE): self.shared_p_encodes, _ = rnn('bi-lstm', self.sep_p_encodes1, self.p_length, self.hidden_size) self.shared_q_encodes, _ = rnn('bi-lstm', self.sep_q_encodes1, self.q_length, self.hidden_size)
def _encode(self): """ Employs two Bi-LSTMs to encode passage and question separately """ with tf.variable_scope('passage_encoding'): rnn = cudnn_gru(num_layers=3, num_units=self.hidden_size, batch_size=tf.shape(self.p_emb)[0], input_size=self.p_emb.get_shape().as_list()[-1], keep_prob=1-self.dropout) self.sep_p_encodes = rnn(self.p_emb, seq_len=self.p_length) self.sep_q_encodes = rnn(self.q_emb, seq_len=self.q_length)
def _encode(self): """ Employs two Bi-LSTMs to encode passage and question separately """ init = None if self.para_init: init_w = tf.constant_initializer(self.init1) init_b = tf.constant_initializer(self.init1) else: init_w = initializers.xavier_initializer() init_b = tf.zeros_initializer() if self.simple_net in [0, 1, 4]: with tf.variable_scope('passage_encoding'): self.sep_p_encodes = tc.layers.fully_connected( self.p_emb, num_outputs=2 * self.hidden_size, activation_fn=tf.nn.tanh, weights_initializer=init_w, biases_initializer=init_b) with tf.variable_scope('question_encoding'): self.sep_q_encodes = tc.layers.fully_connected( self.q_emb, num_outputs=2 * self.hidden_size, activation_fn=tf.nn.tanh, weights_initializer=init_w, biases_initializer=init_b) if self.simple_net in [2, 3, 5, 7, 8]: with tf.variable_scope('passage_encoding'): self.sep_p_encodes, self.seq_p_states, self.p_r = rnn( 'bi-lstm', self.p_emb, self.p_length, self.hidden_size, self.init1, batch_size=self.batch_size, debug=self.para_init) with tf.variable_scope('question_encoding'): self.sep_q_encodes, self.seq_q_states, _ = rnn( 'bi-lstm', self.q_emb, self.q_length, self.hidden_size, self.init1, batch_size=self.batch_size, debug=self.para_init) if self.use_dropout: self.sep_p_encodes = tf.nn.dropout(self.sep_p_encodes, self.dropout_keep_prob) self.sep_q_encodes = tf.nn.dropout(self.sep_q_encodes, self.dropout_keep_prob) #self.sep_p_encodes *= tf.expand_dims(self.passage_mask, -1) #self.sep_q_encodes *= tf.expand_dims(self.question_mask, -1) variable_summaries(self.sep_p_encodes) variable_summaries(self.sep_q_encodes)
def _encode(self): """ Employs two Bi-LSTMs to encode passage and question separately """ with tf.variable_scope('passage_encoding'): self.sep_p_encodes, _ = rnn('bi-lstm', self.p_emb, self.p_length, self.hidden_size) with tf.variable_scope('question_encoding'): self.sep_q_encodes, _ = rnn('bi-lstm', self.q_emb, self.q_length, self.hidden_size) if self.use_dropout: self.sep_p_encodes = tf.nn.dropout(self.sep_p_encodes, self.dropout_keep_prob) self.sep_q_encodes = tf.nn.dropout(self.sep_q_encodes, self.dropout_keep_prob)
def _encode_back(self): """ Employs two Bi-LSTMs to encode passage and question separately """ with tf.variable_scope('passage_encoding'): self.sep_p_encodes, _ = rnn('bi-lstm', self.p_emb, self.p_length, self.hidden_size) with tf.variable_scope('question_encoding'): self.sep_q_encodes, _ = rnn('bi-lstm', self.q_emb, self.q_length, self.hidden_size) if self.use_dropout: self.sep_p_encodes = tf.nn.dropout(self.sep_p_encodes, self.dropout_keep_prob) self.sep_q_encodes = tf.nn.dropout(self.sep_q_encodes, self.dropout_keep_prob)
def _encode(self): """ Employs two Bi-LSTMs to encode passage and question separately """ if self.use_dropout: self.p_emb = tf.nn.dropout(self.p_emb, self.dropout_keep_prob) self.q_emb = tf.nn.dropout(self.q_emb, self.dropout_keep_prob) with tf.variable_scope('passage_encoding'): # self.sep_p_encodes, _ = bilstm_layer(self.p_emb, self.p_length, self.hidden_size) self.sep_p_encodes, _ = rnn("bi-lstm", self.p_emb, self.p_length, self.hidden_size) with tf.variable_scope('question_encoding'): # self.sep_q_encodes, _ = bilstm_layer(self.q_emb, self.q_length, self.hidden_size) self.sep_q_encodes, _ = rnn("bi-lstm", self.q_emb, self.q_length, self.hidden_size)
def _encode(self): """ Employs two Bi-LSTMs to encode passage and question separately """ with tf.variable_scope('encoding'): self.sep_p_encodes, _ = rnn('bi-lstm', self.p_emb, self.p_length, self.hidden_size) tf.get_variable_scope().reuse_variables() self.sep_q_encodes, _ = rnn('bi-lstm', self.q_emb, self.q_length, self.hidden_size) if self.use_dropout: self.sep_p_encodes = tf.nn.dropout(self.sep_p_encodes, 1 - self.dropout) self.sep_q_encodes = tf.nn.dropout(self.sep_q_encodes, 1 - self.dropout)
def _encode(self): """ Employs two Bi-LSTMs to encode passage and question separately """ with tf.variable_scope('passage_encoding'): self.sep_p_encodes, self.sen_state = rnn('bi-lstm', self.p_all_emb, self.p_all_length, self.hidden_size) with tf.variable_scope('question_encoding'): self.sep_q_encodes, self.q_state = rnn('bi-lstm', self.q_emb, self.q_length, self.hidden_size) if self.use_dropout: self.sep_p_encodes = tf.nn.dropout(self.sep_p_encodes, self.dropout_keep_prob) self.sep_q_encodes = tf.nn.dropout(self.sep_q_encodes, self.dropout_keep_prob) self.sen_state = tf.nn.dropout(self.sen_state, self.dropout_keep_prob) self.q_state = tf.nn.dropout(self.q_state, self.dropout_keep_prob) # passage encode self.passages = tf.reshape(self.sen_state, shape=([self.batch_size, self.pass_length, self.vocab.embed_dim]))
def _fuse(self): """ Employs Bi-LSTM again to fuse the context information after match layer """ if self.simple_net in [0]: return if self.para_init: init_w = tf.constant_initializer(self.init1) init_b = tf.constant_initializer(self.init1) else: init_w = initializers.xavier_initializer() init_b = tf.zeros_initializer() with tf.variable_scope('fusion'): if self.simple_net in [1, 4]: self.fuse_p_encodes = tc.layers.fully_connected( self.match_p_encodes, num_outputs=2 * self.hidden_size, activation_fn=tf.nn.tanh, weights_initializer=init_w, biases_initializer=init_b) if self.simple_net in [2, 3, 8]: self.fuse_p_encodes, _, _ = rnn('bi-lstm', self.match_p_encodes, self.p_length, self.hidden_size, self.init1, batch_size=self.batch_size, layer_num=1, debug=self.para_init) if self.use_dropout: self.fuse_p_encodes = tf.nn.dropout(self.fuse_p_encodes, self.dropout_keep_prob)
def _fuse(self): """ match之后,使用Bi-LSTM来融合上下文信息 """ with tf.variable_scope('fusion'): self.fuse_p_encodes, _ = rnn('bi-lstm', self.match_p_encodes, self.p_length, self.hidden_size, layer_num=1) self.fuse_value = tf.reduce_mean(self.fuse_p_encodes) if self.use_dropout: self.fuse_p_encodes = tf.nn.dropout(self.fuse_p_encodes, self.dropout_keep_prob) with tf.variable_scope('self-matching'): match_layer = SelfMatchingLayer(self.hidden_size) tem_encodes = tf.identity(self.fuse_p_encodes) self_matching_encodes, _ = match_layer.match( self.fuse_p_encodes, tem_encodes, self.p_length) self.re_match_value = tf.reduce_mean(self_matching_encodes) if self.use_dropout: self.fuse_p_encodes = tf.nn.dropout(self_matching_encodes, self.dropout_keep_prob) else: self.fuse_p_encodes = self_matching_encodes
def _fuse(self): """ Employs Bi-LSTM again to fuse the context information after match layer """ with tf.variable_scope('fusion'): self.fuse_p_encodes, _ = rnn('bi-lstm', self.match_p_encodes, self.p_length, self.hidden_size, layer_num=1) if self.use_dropout: self.fuse_p_encodes = tf.nn.dropout(self.fuse_p_encodes, self.dropout_keep_prob) #####加入self-matching机制####################### with tf.variable_scope('self-matching'): match_layer = SelfMatchingLayer(self.hidden_size) tem_encodes = tf.identity(self.fuse_p_encodes) self_matching_encodes, _ = match_layer.match( self.fuse_p_encodes, tem_encodes, self.p_length) self.re_match_value = tf.reduce_mean(self_matching_encodes) if self.use_dropout: self.fuse_p_encodes = tf.nn.dropout(self_matching_encodes, self.dropout_keep_prob) else: self.fuse_p_encodes = self_matching_encodes
def _match(self): """ The core of RC model, get the question-aware passage encoding with either BIDAF or MLSTM """ with tf.variable_scope('match'): if self.algo == 'MLSTM': match_layer = MatchLSTMLayer(self.hidden_size) elif self.algo == 'BIDAF': match_layer = AttentionFlowMatchLayer(self.hidden_size) else: raise NotImplementedError( 'The algorithm {} is not implemented.'.format(self.algo)) self.match_p_encodes, _ = match_layer.match( self.sep_p_encodes, self.sep_q_encodes, self.p_t_length, self.q_t_length) self.match_p_encodes, _ = rnn('bi-lstm', self.match_p_encodes, self.p_t_length, self.hidden_size, layer_num=1) if self.use_dropout: self.match_p_encodes = tf.nn.dropout(self.match_p_encodes, self.dropout_keep_prob)
def _short_mem_encoder(self, scope_name, mem_name, att_q, mem, mem_length, temporal_encoding_len = 0): with tf.variable_scope(scope_name, reuse = tf.AUTO_REUSE): with tf.variable_scope('short_mem_encoder_%s' % mem_name, reuse = tf.AUTO_REUSE): #add input aware attention to rnn encoder #[batch_size * passage_window_size, self.mem_dim] smems, smem = rnn(self.rnn_type, mem, mem_length, self.mem_dim/2, dropout_keep_prob = self.dropout_keep_prob) if att_q is not None: #[batch_size, passage_window_size, max_len, self.mem_dim] smems = tf.reshape(smems, [self.batch_size, -1, tf.shape(smems)[-2], self.mem_dim]) #[batch_size, n, 1, 1, mem_dim] att_q = tf.expand_dims(tf.expand_dims(att_q, 2), 2) #[batch_size, 1, passage_window_size, max_len, self.mem_dim] smems = tf.expand_dims(smems, 1) #[batch_size, n, n_of_mem, max_mem_len] w = tf.nn.softmax(tf.reduce_sum(att_q * smems, -1)) #[batch_size, n, n_of_mem, max_mem_len, mem_dim] smems = smems * tf.expand_dims(w, -1) #[batch_size, n, n_of_mem, mem_dim] smem = tf.reduce_sum(smems, -2) else: smem = tf.reshape(smem, [self.batch_size, -1, self.mem_dim]) smem = tf.expand_dims(smem, 1) if temporal_encoding_len: T = tf.get_variable(name = 'T', shape = [1, 1, temporal_encoding_len, self.mem_dim]) smem += T return smem
def _fuse(self): """ match之后,使用双向lstm来融合上下文信息 """ # 聚类信息fuse with tf.variable_scope('p-routing-fusion'): self.routing_p_encodes, _ = rnn('bi-lstm', self.tp_emb, self.p_length, self.hidden_size)
def _fuse(self): """ Employs Bi-LSTM again to fuse the context information after match layer """ with tf.variable_scope('fusion'): self.fuse_p_encodes, _ = rnn('bi-lstm', self.match_p_encodes, self.p_length, self.hidden_size) if self.use_dropout: self.fuse_p_encodes = tf.nn.dropout(self.fuse_p_encodes, 1-self.dropout)
def _fuse(self): """ Employs Bi-LSTM again to fuse the context information after match layer """ with tf.variable_scope('fusion'): self.fuse_p_encodes, _ = rnn('bi-lstm', self.match_p_encodes, self.p_length, self.hidden_size, layer_num=1) if self.use_dropout: self.fuse_p_encodes = tf.nn.dropout(self.fuse_p_encodes, self.dropout_keep_prob)
def _long_mem_encoder(self, scope_name, mem_name, att_q, mem, mem_length): with tf.variable_scope(scope_name, reuse = tf.AUTO_REUSE): with tf.variable_scope('long_mem_encoder_%s'%mem_name, reuse = tf.AUTO_REUSE): lmems, lmem = rnn(self.rnn_type, mem, mem_length, self.mem_dim/2, dropout_keep_prob = self.dropout_keep_prob) lmem = tf.expand_dims(tf.expand_dims(lmem, 0), 0) return lmem
def _encode(self): """ Employs two Bi-LSTMs to encode passage and question separately """ with tf.variable_scope('encode'): batch_size = tf.shape(self.start_label)[0] with tf.variable_scope('passage_encoding'): with tf.variable_scope('token_level'): sep_p_t_encodes, _ = rnn('bi-lstm', self.p_t_emb, self.p_t_length, self.hidden_size) if self.use_char_emb: with tf.variable_scope('char_level'): _, sep_p_c_encodes = rnn('bi-lstm', self.p_c_emb, self.p_c_length, self.hidden_size) sep_p_c_encodes = tf.reshape( sep_p_c_encodes, [batch_size, self.p_pad_len, self.hidden_size * 2]) self.sep_p_encodes = tf.concat( [sep_p_t_encodes, sep_p_c_encodes], axis=-1) else: self.sep_p_encodes = sep_p_t_encodes with tf.variable_scope('question_encoding'): with tf.variable_scope('token_level'): sep_q_t_encodes, _ = rnn('bi-lstm', self.q_t_emb, self.q_t_length, self.hidden_size) if self.use_char_emb: with tf.variable_scope('char_level'): _, sep_q_c_encodes = rnn('bi-lstm', self.q_c_emb, self.q_c_length, self.hidden_size) sep_q_c_encodes = tf.reshape( sep_q_c_encodes, [batch_size, self.q_pad_len, self.hidden_size * 2]) self.sep_q_encodes = tf.concat( [sep_q_t_encodes, sep_q_c_encodes], axis=-1) else: self.sep_q_encodes = sep_q_t_encodes if self.use_dropout: self.sep_p_encodes = tf.nn.dropout(self.sep_p_encodes, self.dropout_keep_prob) self.sep_q_encodes = tf.nn.dropout(self.sep_q_encodes, self.dropout_keep_prob)
def _fuse(self): """ Employs Bi-LSTM again to fuse the context information after match layer """ with tf.variable_scope('fusion'): self.fuse_p_encodes, _ = rnn('bi-lstm', self.match_p_encodes, self.p_length, self.hidden_size, layer_num=1) #self.fuse_p_encodes = multihead_attention(queries=self.fuse_p_encodes, keys=self.fuse_p_encodes,values=self.fuse_p_encodes, num_heads=1,dropout_rate=self.dropout, training=True, causality=False,scope="vanilla_attention") if self.use_dropout: self.fuse_p_encodes = tf.nn.dropout(self.fuse_p_encodes, 1-self.dropout)
def _fuse(self): """ Employs Bi-LSTM again to fuse the context information after match layer 原文里的model层 原文是两层0.0 """ with tf.variable_scope('fusion'): self.fuse_p_encodes, _ = rnn('bi-lstm', self.match_p_encodes, self.p_length, self.hidden_size, layer_num=1) if self.use_dropout: self.fuse_p_encodes = tf.nn.dropout(self.fuse_p_encodes, self.dropout_keep_prob)
def _fuse(self): """ match之后,使用双向lstm来融合上下文信息 """ # 聚类信息fuse with tf.variable_scope('p-routing-fusion'): routing_p_encodes, _ = rnn('bi-lstm', self.tp_emb, self.p_length, self.hidden_size) with tf.variable_scope('self-match'): match_layer = SelfMatchingLayer(self.hidden_size) tem_encodes = tf.identity(routing_p_encodes) self.rou_p_encodes, _ = match_layer.match(routing_p_encodes, tem_encodes, self.p_length, self.p_mask)
def _encode(self): """ 使用几个双向LSTM分别对问题、文章和候选答案编码 问题作为历史信息要流入到文章、候选答案中 """ with tf.variable_scope('question_encoding'): self.sep_q_encodes, self.question_state = rnn('bi-lstm', self.q_emb, self.q_length, self.hidden_size) with tf.variable_scope('passage_encoding'): self.sep_p_encodes, _ = rnn('bi-lstm', self.p_emb, self.p_length, self.hidden_size, state=self.question_state, history=True) with tf.variable_scope('answer_f_encoding'): self.sep_af_encodes, _ = rnn('bi-lstm', self.a_f_emb, self.a_f_length, self.hidden_size, state=self.question_state, history=True) with tf.variable_scope('answer_s_encoding'): self.sep_as_encodes, _ = rnn('bi-lstm', self.a_s_emb, self.a_s_length, self.hidden_size, state=self.question_state, history=True) with tf.variable_scope('answer_t_encoding'): self.sep_at_encodes, _ = rnn('bi-lstm', self.a_t_emb, self.a_t_length, self.hidden_size, state=self.question_state, history=True)
def _encode(self): """ Employs two Bi-LSTMs to encode passage and question separately basic.rnn(rnn_type, inputs, length, hidden_size, layer_num=1, dropout_keep_prob=None, concat=True) rnn_type: the type of rnn inputs: padded inputs into rnn length: the valid length of the inputs hidden_size: the size of hidden units layer_num: multiple rnn layer are stacked if layer_num > 1 dropout_keep_prob: concat: When the rnn is bidirectional, the forward outputs and backward outputs are concatenated if this is True, else we add them. """ with tf.variable_scope('passage_encoding'): self.sep_p_encodes, _ = rnn('bi-lstm', self.p_emb, self.p_length, self.hidden_size) with tf.variable_scope('question_encoding'): self.sep_q_encodes, _ = rnn('bi-lstm', self.q_emb, self.q_length, self.hidden_size) if self.use_dropout: self.sep_p_encodes = tf.nn.dropout(self.sep_p_encodes, self.dropout_keep_prob) self.sep_q_encodes = tf.nn.dropout(self.sep_q_encodes, self.dropout_keep_prob)