def build_encoder(self, index, input_lengths, *args, **kargs): reuse = kargs["reuse"] word_emb = self.build_emebdding(index, *args, **kargs) dropout_rate = tf.cond(self.is_training, lambda: self.config.dropout_rate, lambda: 0.0) word_emb = tf.nn.dropout(word_emb, 1 - dropout_rate) with tf.variable_scope(self.config.scope + "_input_highway", reuse=reuse): input_dim = word_emb.get_shape()[-1] sent_repres = match_utils.multi_highway_layer( word_emb, input_dim, self.config.highway_layer_num) if self.config.rnn == "lstm": [sent_repres_fw, sent_repres_bw, sent_repres ] = layer_utils.my_lstm_layer(sent_repres, self.config.context_lstm_dim, input_lengths=input_lengths, scope_name=self.config.scope, reuse=reuse, is_training=self.is_training, dropout_rate=dropout_rate, use_cudnn=self.config.use_cudnn) elif self.config.rnn == "slstm": word_emb_proj = tf.layers.dense(word_emb, self.config.slstm_hidden_size) initial_hidden_states = word_emb_proj initial_cell_states = tf.identity(initial_hidden_states) [new_hidden_states, new_cell_states, dummynode_hidden_states ] = slstm_utils.slstm_cell(self.config, self.config.scope, self.config.slstm_hidden_size, input_lengths, initial_hidden_states, initial_cell_states, self.config.slstm_layer_num, dropout_rate, reuse=reuse) sent_repres = new_hidden_states return sent_repres
def build_encoder(self, sent_repres, input_lengths, *args, **kargs): reuse = kargs["reuse"] dropout_rate = tf.cond(self.is_training, lambda: self.config.dropout_rate, lambda: 0.0) with tf.variable_scope(self.config.scope + "_input_highway", reuse=reuse): if self.config.rnn == "lstm": [sent_repres_fw, sent_repres_bw, sent_repres ] = layer_utils.my_lstm_layer(sent_repres, self.config.context_lstm_dim, input_lengths=input_lengths, scope_name=self.config.scope, reuse=reuse, is_training=self.is_training, dropout_rate=dropout_rate, use_cudnn=self.config.use_cudnn, lstm_type=self.config.lstm_type) match_dim = self.config.context_lstm_dim * 2 elif self.config.rnn == "slstm": word_emb_proj = tf.layers.dense(word_emb, self.config.slstm_hidden_size) initial_hidden_states = word_emb_proj initial_cell_states = tf.identity(initial_hidden_states) [new_hidden_states, new_cell_states, dummynode_hidden_states ] = slstm_utils.slstm_cell(self.config, self.config.scope, self.config.slstm_hidden_size, input_lengths, initial_hidden_states, initial_cell_states, self.config.slstm_layer_num, dropout_rate, reuse=reuse) match_dim = self.config.slstm_hidden_size * 2 sent_repres = new_hidden_states return sent_repres, match_dim
def build_interactor(self, sent1_repres, sent2_repres, sent1_len, sent2_len, sent1_mask, sent2_mask, *args, **kargs): reuse = kargs["reuse"] input_dim = sent1_repres.get_shape()[-1] dropout_rate = tf.cond(self.is_training, lambda: self.config.dropout_rate, lambda: 0.0) with tf.variable_scope(self.config.scope + "_interaction_module", reuse=reuse): if self.config.with_self_attention: v_1_attn = esim_utils.multihead_attention( sent1_repres, sent1_repres, num_units=None, num_heads=self.config.num_heads, dropout_rate=dropout_rate, is_training=True, causality=False, scope="multihead_attention", reuse=None) v_2_attn = esim_utils.multihead_attention( sent2_repres, sent2_repres, num_units=None, num_heads=self.config.num_heads, dropout_rate=dropout_rate, is_training=True, causality=False, scope="multihead_attention", reuse=True) sent1_repres = tf.concat([sent1_repres, v_1_attn], axis=-1) sent2_repres = tf.concat([sent2_repres, v_2_attn], axis=-1) [query_attention_outputs, context_attention_outputs ] = esim_utils.query_context_attention(sent1_repres, sent2_repres, sent1_len, sent2_len, sent1_mask, sent2_mask, dropout_rate, self.config.scope, reuse=reuse) if self.config.rnn == "lstm": [sent1_repres_fw, sent1_repres_bw, sent1_repres ] = layer_utils.my_lstm_layer(query_attention_outputs, self.config.context_lstm_dim, input_lengths=sent1_len, scope_name=self.config.scope, reuse=None, is_training=self.is_training, dropout_rate=dropout_rate, use_cudnn=self.config.use_cudnn) [sent2_repres_fw, sent2_repres_bw, sent2_repres ] = layer_utils.my_lstm_layer(context_attention_outputs, self.config.context_lstm_dim, input_lengths=sent2_len, scope_name=self.config.scope, reuse=True, is_training=self.is_training, dropout_rate=dropout_rate, use_cudnn=self.config.use_cudnn) match_dim = self.config.context_lstm_dim * 8 elif self.config.rnn == "slstm": sent1_initial_hidden_states = tf.layers.dense( query_attention_outputs, self.config.slstm_hidden_size) sent1_initial_cell_states = tf.identity( sent1_initial_hidden_states) [ new_sent1_hidden_states, new_sent1_cell_states, dummynode_sent1_hidden_states ] = slstm_utils.slstm_cell(self.config, self.config.scope, self.config.slstm_hidden_size, sent1_len, sent1_initial_hidden_states, sent1_initial_cell_states, self.config.slstm_layer_num, dropout_rate, reuse=None) sent1_repres = new_sent1_hidden_states sent2_initial_hidden_states = tf.layers.dense( context_attention_outputs, self.config.slstm_hidden_size) sent2_initial_cell_states = tf.identity( sent2_initial_hidden_states) [ new_sent2_hidden_states, new_sent2_cell_states, dummynode_sent2_hidden_states ] = slstm_utils.slstm_cell(self.config, self.config.scope, self.config.slstm_hidden_size, sent2_len, sent2_initial_hidden_states, sent2_initial_cell_states, self.config.slstm_layer_num, dropout_rate, reuse=True) sent2_repres = new_sent2_hidden_states match_dim = self.config.slstm_hidden_size * 4 v_1_sum = tf.reduce_sum(sent1_repres, 1) v_1_ave = tf.div( v_1_sum, tf.expand_dims(tf.cast(sent1_len, tf.float32) + EPSILON, -1)) v_2_sum = tf.reduce_sum(sent2_repres, 1) v_2_ave = tf.div( v_2_sum, tf.expand_dims(tf.cast(sent2_len, tf.float32) + EPSILON, -1)) # v_1_max = tf.reduce_max(sent1_repres, 1) # v_2_max = tf.reduce_max(sent2_repres, 1) mask_q = tf.expand_dims(sent1_mask, -1) mask_c = tf.expand_dims(sent2_mask, -1) v_1_max = tf.reduce_max(qanet_layers.mask_logits( sent1_repres, mask_q), axis=1) v_2_max = tf.reduce_max(qanet_layers.mask_logits( sent2_repres, mask_c), axis=1) out1 = tf.concat([v_1_ave, v_1_max], axis=-1) out2 = tf.concat([v_2_ave, v_2_max], axis=-1) out = tf.concat([v_1_ave, v_2_ave, v_1_max, v_2_max], 1) return out1, out2, out, match_dim
def build_encoder(self, input_lengths, input_mask, *args, **kargs): reuse = kargs["reuse"] word_emb = self.build_emebdding(*args, **kargs) dropout_rate = tf.cond(self.is_training, lambda: self.config.dropout_rate, lambda: 0.0) word_emb = tf.nn.dropout(word_emb, 1 - dropout_rate) with tf.variable_scope(self.config.scope + "_input_highway", reuse=reuse): input_dim = word_emb.get_shape()[-1] sent_repres = match_utils.multi_highway_layer( word_emb, input_dim, self.config.highway_layer_num) if self.config.rnn == "lstm": [sent_repres_fw, sent_repres_bw, sent_repres ] = layer_utils.my_lstm_layer(sent_repres, self.config.context_lstm_dim, input_lengths=input_lengths, scope_name=self.config.scope, reuse=reuse, is_training=self.is_training, dropout_rate=dropout_rate, use_cudnn=self.config.use_cudnn) match_dim = self.config.context_lstm_dim * 6 elif self.config.rnn == "slstm": word_emb_proj = tf.layers.dense(word_emb, self.config.slstm_hidden_size) initial_hidden_states = word_emb_proj initial_cell_states = tf.identity(initial_hidden_states) [new_hidden_states, new_cell_states, dummynode_hidden_states ] = slstm_utils.slstm_cell(self.config, self.config.scope, self.config.slstm_hidden_size, input_lengths, initial_hidden_states, initial_cell_states, self.config.slstm_layer_num, dropout_rate, reuse=reuse) sent_repres = new_hidden_states match_dim = self.config.slstm_hidden_size * 3 if self.config.multi_head: mask = tf.cast(input_mask, tf.float32) ignore_padding = (1 - mask) ignore_padding = label_network_utils.attention_bias_ignore_padding( ignore_padding) encoder_self_attention_bias = ignore_padding sent_repres = label_network_utils.multihead_attention_texar( sent_repres, memory=None, memory_attention_bias=encoder_self_attention_bias, num_heads=8, num_units=128, dropout_rate=dropout_rate, scope="multihead_attention") v_attn = self_attn.multi_dimensional_attention( sent_repres, input_mask, 'multi_dim_attn_for_%s' % self.config.scope, 1 - dropout_rate, self.is_training, self.config.weight_decay, "relu") mask = tf.expand_dims(input_mask, -1) v_sum = tf.reduce_sum(sent_repres * tf.cast(mask, tf.float32), 1) v_ave = tf.div( v_sum, tf.expand_dims( tf.cast(input_lengths, tf.float32) + EPSILON, -1)) v_max = tf.reduce_max(qanet_layers.mask_logits(sent_repres, mask), axis=1) v_last = esim_utils.last_relevant_output(sent_repres, input_lengths) out = tf.concat([v_ave, v_max, v_last, v_attn], axis=-1) return out, match_dim
def build_encoder(self, index, input_lengths, input_mask, *args, **kargs): reuse = kargs["reuse"] word_emb = self.build_emebdding(index, *args, **kargs) dropout_rate = tf.cond(self.is_training, lambda: self.config.dropout_rate, lambda: 0.0) word_emb = tf.nn.dropout(word_emb, 1 - dropout_rate) with tf.variable_scope(self.config.scope + "_input_highway", reuse=reuse): input_dim = word_emb.get_shape()[-1] sent_repres = match_utils.multi_highway_layer( word_emb, input_dim, self.config.highway_layer_num) if self.config.rnn == "lstm": [sent_repres_fw, sent_repres_bw, sent_repres ] = layer_utils.my_lstm_layer(sent_repres, self.config.context_lstm_dim, input_lengths=input_lengths, scope_name=self.config.scope, reuse=reuse, is_training=self.is_training, dropout_rate=dropout_rate, use_cudnn=self.config.use_cudnn) elif self.config.rnn == "slstm": word_emb_proj = tf.layers.dense(word_emb, self.config.slstm_hidden_size) initial_hidden_states = word_emb_proj initial_cell_states = tf.identity(initial_hidden_states) [new_hidden_states, new_cell_states, dummynode_hidden_states ] = slstm_utils.slstm_cell(self.config, self.config.scope, self.config.slstm_hidden_size, input_lengths, initial_hidden_states, initial_cell_states, self.config.slstm_layer_num, dropout_rate, reuse=reuse) sent_repres = new_hidden_states elif self.config.rnn == "base_transformer": sent_repres = base_transformer_utils.transformer_encoder( sent_repres, target_space=None, hparams=self.config, features=None, make_image_summary=False) elif self.config.rnn == "universal_transformer": sent_repres, act_loss = universal_transformer_utils.universal_transformer_encoder( sent_repres, target_space=None, hparams=self.config, features=None, make_image_summary=False) elif self.config.rnn == "highway": sent_repres = sent_repres input_mask = tf.expand_dims(tf.cast(input_mask, tf.float32), axis=-1) sent_repres_sum = tf.reduce_sum(sent_repres * input_mask, axis=1) sent_repres_avr = tf.div( sent_repres_sum, tf.expand_dims( tf.cast(input_lengths, tf.float32) + EPSILON, -1)) if self.config.metric == "Hyperbolic": sent_repres = tf.clip_by_norm(sent_repres_sum, 1.0 - EPSILON, axes=1) else: sent_repres = sent_repres_avr if self.config.rnn == "universal_transformer": return sent_repres, act_loss else: return sent_repres