def _interaction_semantic_feature_layer(self, seq_input_left, seq_input_right, seq_len_left, seq_len_right, granularity="word"): assert granularity in ["char", "word"] #### embed emb_matrix = self._get_embedding_matrix(granularity) emb_seq_left = tf.nn.embedding_lookup(emb_matrix, seq_input_left) emb_seq_right = tf.nn.embedding_lookup(emb_matrix, seq_input_right) #### dropout random_seed = np.random.randint(10000000) emb_seq_left = word_dropout(emb_seq_left, training=self.training, dropout=self.params["embedding_dropout"], seed=random_seed) random_seed = np.random.randint(10000000) emb_seq_right = word_dropout(emb_seq_right, training=self.training, dropout=self.params["embedding_dropout"], seed=random_seed) #### encode enc_seq_left = encode(emb_seq_left, method=self.params["encode_method"], params=self.params, sequence_length=seq_len_left, mask_zero=self.params["embedding_mask_zero"], scope_name=self.model_name + "enc_seq_%s"%granularity, reuse=False) enc_seq_right = encode(emb_seq_right, method=self.params["encode_method"], params=self.params, sequence_length=seq_len_right, mask_zero=self.params["embedding_mask_zero"], scope_name=self.model_name + "enc_seq_%s" % granularity, reuse=True) #### attend # [batchsize, s1, s2] att_mat = tf.einsum("abd,acd->abc", enc_seq_left, enc_seq_right) feature_dim = self.params["encode_dim"] + self.params["max_seq_len_%s"%granularity] att_seq_left = attend(enc_seq_left, context=att_mat, feature_dim=feature_dim, method=self.params["attend_method"], scope_name=self.model_name + "att_seq_%s"%granularity, reuse=False) att_seq_right = attend(enc_seq_right, context=tf.transpose(att_mat), feature_dim=feature_dim, method=self.params["attend_method"], scope_name=self.model_name + "att_seq_%s" % granularity, reuse=True) #### MLP nonlinear projection sem_seq_left = self._mlp_layer(att_seq_left, fc_type=self.params["fc_type"], hidden_units=self.params["fc_hidden_units"], dropouts=self.params["fc_dropouts"], scope_name=self.model_name + "sem_seq_%s"%granularity, reuse=False) sem_seq_right = self._mlp_layer(att_seq_right, fc_type=self.params["fc_type"], hidden_units=self.params["fc_hidden_units"], dropouts=self.params["fc_dropouts"], scope_name=self.model_name + "sem_seq_%s" % granularity, reuse=True) return emb_seq_left, enc_seq_left, att_seq_left, sem_seq_left, \ emb_seq_right, enc_seq_right, att_seq_right, sem_seq_right
def _base_feature_extractor(self, emb_seq, seq_len, name, reuse): #### encode input_dim = self.params["embedding_dim"] enc_seq = encode(emb_seq, method=self.params["encode_method"], input_dim=input_dim, params=self.params, sequence_length=seq_len, mask_zero=self.params["embedding_mask_zero"], scope_name=self.model_name + "_encode_%s" % name, reuse=reuse, training=self.training) #### attend feature_dim = self.params["encode_dim"] context = None att_seq = attend(enc_seq, context=context, encode_dim=self.params["encode_dim"], feature_dim=feature_dim, attention_dim=self.params["attention_dim"], method=self.params["attend_method"], scope_name=self.model_name + "_attention_%s" % name, reuse=reuse, num_heads=self.params["attention_num_heads"]) return att_seq
def _semantic_feature_layer(self, seq_input, seq_len, granularity="word", reuse=False): assert granularity in ["char", "word"] #### embed emb_matrix = self._get_embedding_matrix(granularity) emb_seq = tf.nn.embedding_lookup(emb_matrix, seq_input) #### dropout random_seed = np.random.randint(10000000) emb_seq = word_dropout(emb_seq, training=self.training, dropout=self.params["embedding_dropout"], seed=random_seed) #### encode input_dim = self.params["embedding_dim"] enc_seq = encode(emb_seq, method=self.params["encode_method"], input_dim=input_dim, params=self.params, sequence_length=seq_len, mask_zero=self.params["embedding_mask_zero"], scope_name=self.model_name + "enc_seq_%s" % granularity, reuse=reuse, training=self.training) #### attend feature_dim = self.params["encode_dim"] context = None att_seq = attend(enc_seq, context=context, encode_dim=self.params["encode_dim"], feature_dim=feature_dim, attention_dim=self.params["attention_dim"], method=self.params["attend_method"], scope_name=self.model_name + "att_seq_%s" % granularity, reuse=reuse, num_heads=self.params["attention_num_heads"]) #### MLP nonlinear projection sem_seq = mlp_layer(att_seq, fc_type=self.params["fc_type"], hidden_units=self.params["fc_hidden_units"], dropouts=self.params["fc_dropouts"], scope_name=self.model_name + "sem_seq_%s" % granularity, reuse=reuse, training=self.training, seed=self.params["random_seed"]) return emb_seq, enc_seq, att_seq, sem_seq
def _semantic_feature_layer(self, seq_input, granularity="word", reuse=False, return_enc=False): assert granularity in ["char", "word"] #### embed emb_matrix = self._get_embedding_matrix(granularity) emb_seq = tf.nn.embedding_lookup(emb_matrix, seq_input) #### dropout emb_seq = word_dropout(emb_seq, training=self.training, dropout=self.params["embedding_dropout"], seed=self.params["random_seed"]) #### encode enc_seq = encode(emb_seq, method=self.params["encode_method"], params=self.params, scope_name=self.model_name + "enc_seq_%s" % granularity, reuse=reuse) #### attend feature_dim = self.params["encode_dim"] context = None att_seq = attend(enc_seq, context=context, feature_dim=feature_dim, method=self.params["attend_method"], scope_name=self.model_name + "att_seq_%s" % granularity, reuse=reuse) #### MLP nonlinear projection sem_seq = self._mlp_layer(att_seq, fc_type=self.params["fc_type"], hidden_units=self.params["fc_hidden_units"], dropouts=self.params["fc_dropouts"], scope_name=self.model_name + "sem_seq_%s" % granularity, reuse=reuse) if return_enc: return sem_seq, enc_seq else: return sem_seq
def _esim_semantic_feature_layer(self, emb_seq_left, emb_seq_right, seq_len_left, seq_len_right, granularity="word"): # for sharing embedding with other sub-graph # #### embed # emb_matrix = self._get_embedding_matrix(granularity) # emb_seq_left = tf.nn.embedding_lookup(emb_matrix, seq_input_left) # emb_seq_right = tf.nn.embedding_lookup(emb_matrix, seq_input_right) # # #### dropout # random_seed = np.random.randint(10000000) # emb_seq_left = word_dropout(emb_seq_left, # training=self.training, # dropout=self.params["embedding_dropout"], # seed=random_seed) # random_seed = np.random.randint(10000000) # emb_seq_right = word_dropout(emb_seq_right, # training=self.training, # dropout=self.params["embedding_dropout"], # seed=random_seed) #### encode enc_seq_left = encode(emb_seq_left, method=self.params["encode_method"], params=self.params, sequence_length=seq_len_left, mask_zero=self.params["embedding_mask_zero"], scope_name=self.model_name + "esim_enc_seq_%s" % granularity, reuse=False) enc_seq_right = encode(emb_seq_right, method=self.params["encode_method"], params=self.params, sequence_length=seq_len_right, mask_zero=self.params["embedding_mask_zero"], scope_name=self.model_name + "esim_enc_seq_%s" % granularity, reuse=True) #### align ali_seq_left, ali_seq_right = self._soft_attention_alignment( enc_seq_left, enc_seq_right) #### compose com_seq_left = tf.concat([ enc_seq_left, ali_seq_left, enc_seq_left * ali_seq_left, enc_seq_left - ali_seq_left, ], axis=-1) com_seq_right = tf.concat([ enc_seq_right, ali_seq_right, enc_seq_right * ali_seq_right, enc_seq_right - ali_seq_right, ], axis=-1) compare_seq_left = encode(com_seq_left, method=self.params["encode_method"], params=self.params, sequence_length=seq_len_left, mask_zero=self.params["embedding_mask_zero"], scope_name=self.model_name + "compare_seq_%s" % granularity, reuse=False) compare_seq_right = encode( com_seq_right, method=self.params["encode_method"], params=self.params, sequence_length=seq_len_right, mask_zero=self.params["embedding_mask_zero"], scope_name=self.model_name + "compare_seq_%s" % granularity, reuse=True) #### attend feature_dim = self.params["encode_dim"] att_seq_left = attend(compare_seq_left, context=None, encode_dim=self.params["encode_dim"], feature_dim=feature_dim, attention_dim=self.params["attention_dim"], method=self.params["attend_method"], scope_name=self.model_name + "agg_seq_%s" % granularity, reuse=False, num_heads=self.params["attention_num_heads"]) att_seq_right = attend(compare_seq_right, context=None, encode_dim=self.params["encode_dim"], feature_dim=feature_dim, attention_dim=self.params["attention_dim"], method=self.params["attend_method"], scope_name=self.model_name + "agg_seq_%s" % granularity, reuse=True, num_heads=self.params["attention_num_heads"]) return tf.concat([att_seq_left, att_seq_right], axis=-1)