def _bcnn_semantic_feature_layer(self, seq_left, seq_right, dpool_index=None, granularity="word"): name = self.model_name + granularity seq_len = self.params["max_seq_len_%s" % granularity] # [batch, s, d] => [batch, s, d, 1] seq_left = tf.expand_dims(seq_left, axis=-1) seq_right = tf.expand_dims(seq_right, axis=-1) left_ap_list = [None] * (self.params["bcnn_num_layers"] + 1) right_ap_list = [None] * (self.params["bcnn_num_layers"] + 1) left_ap_list[0] = self._all_ap(x=seq_left, seq_len=seq_len, name=name + "global_pooling_input_left") right_ap_list[0] = self._all_ap(x=seq_right, seq_len=seq_len, name=name + "global_pooling_input_right") x1 = seq_left x2 = seq_right d = self.params["embedding_dim"] outputs = [] for layer in range(self.params["bcnn_num_layers"]): x1, left_ap_list[layer + 1], x2, right_ap_list[ layer + 1], att_pooled = self._bcnn_cnn_layer( x1=x1, x2=x2, seq_len=seq_len, d=d, name=name + "cnn_layer_%d" % (layer + 1), dpool_index=dpool_index, granularity=granularity) d = self.params["bcnn_num_filters"] if self.params["bcnn_mp_att_pooling"] and att_pooled is not None: outputs.append(att_pooled) for l, r in zip(left_ap_list, right_ap_list): outputs.append( metrics.cosine_similarity( l, r, self.params["similarity_aggregation"])) outputs.append( metrics.dot_product(l, r, self.params["similarity_aggregation"])) outputs.append( metrics.euclidean_distance( l, r, self.params["similarity_aggregation"])) return tf.concat(outputs, axis=-1)
def _get_matching_features(self): with tf.name_scope(self.model_name): tf.set_random_seed(self.params["random_seed"]) with tf.name_scope("word_network"): if self.params["attend_method"] == "context-attention": emb_seq_word_left, enc_seq_word_left, att_seq_word_left, sem_seq_word_left, \ emb_seq_word_right, enc_seq_word_right, att_seq_word_right, sem_seq_word_right = \ self._interaction_semantic_feature_layer( self.seq_word_left, self.seq_word_right, self.seq_len_word_left, self.seq_len_word_right, granularity="word") else: emb_seq_word_left, enc_seq_word_left, att_seq_word_left, sem_seq_word_left = \ self._semantic_feature_layer( self.seq_word_left, self.seq_len_word_left, granularity="word", reuse=False) emb_seq_word_right, enc_seq_word_right, att_seq_word_right, sem_seq_word_right = \ self._semantic_feature_layer( self.seq_word_right, self.seq_len_word_right, granularity="word", reuse=True) #### matching # match score sim_word = tf.concat( [ metrics.cosine_similarity( sem_seq_word_left, sem_seq_word_right, self.params["similarity_aggregation"]), metrics.dot_product( sem_seq_word_left, sem_seq_word_right, self.params["similarity_aggregation"]), metrics.euclidean_distance( sem_seq_word_left, sem_seq_word_right, self.params["similarity_aggregation"]), # self._canberra_score(sem_seq_word_left, sem_seq_word_right), ], axis=-1) # match pyramid match_matrix_word = self._get_match_matrix(self.seq_word_left, emb_seq_word_left, enc_seq_word_left, self.seq_word_right, emb_seq_word_right, enc_seq_word_right, granularity="word") mp_word = self._mp_semantic_feature_layer( match_matrix_word, self.dpool_index_word, granularity="word") # esim esim_word = self._esim_semantic_feature_layer( emb_seq_word_left, emb_seq_word_right, self.seq_len_word_left, self.seq_len_word_right, granularity="word") # bcnn bcnn_word = self._bcnn_semantic_feature_layer( emb_seq_word_left, emb_seq_word_right, granularity="word") # dense deep_in_word = tf.concat( [sem_seq_word_left, sem_seq_word_right], axis=-1) deep_word = mlp_layer( deep_in_word, fc_type=self.params["fc_type"], hidden_units=self.params["fc_hidden_units"], dropouts=self.params["fc_dropouts"], scope_name=self.model_name + "deep_word", reuse=False, training=self.training, seed=self.params["random_seed"]) with tf.name_scope("char_network"): if self.params["attend_method"] == "context-attention": emb_seq_char_left, enc_seq_char_left, att_seq_char_left, sem_seq_char_left, \ emb_seq_char_right, enc_seq_char_right, att_seq_char_right, sem_seq_char_right = \ self._interaction_semantic_feature_layer( self.seq_char_left, self.seq_char_right, self.seq_len_char_left, self.seq_len_char_right, granularity="char") else: emb_seq_char_left, enc_seq_char_left, att_seq_char_left, sem_seq_char_left = \ self._semantic_feature_layer( self.seq_char_left, self.seq_len_char_left, granularity="char", reuse=False) emb_seq_char_right, enc_seq_char_right, att_seq_char_right, sem_seq_char_right = \ self._semantic_feature_layer( self.seq_char_right, self.seq_len_char_right, granularity="char", reuse=True) # match score sim_char = tf.concat( [ metrics.cosine_similarity( sem_seq_char_left, sem_seq_char_right, self.params["similarity_aggregation"]), metrics.dot_product( sem_seq_char_left, sem_seq_char_right, self.params["similarity_aggregation"]), metrics.euclidean_distance( sem_seq_char_left, sem_seq_char_right, self.params["similarity_aggregation"]), # self._canberra_score(sem_seq_char_left, sem_seq_char_right), ], axis=-1) # match pyramid match_matrix_char = self._get_match_matrix(self.seq_char_left, emb_seq_char_left, enc_seq_char_left, self.seq_char_right, emb_seq_char_right, enc_seq_char_right, granularity="char") mp_char = self._mp_semantic_feature_layer( match_matrix_char, self.dpool_index_char, granularity="char") # esim esim_char = self._esim_semantic_feature_layer( emb_seq_char_left, emb_seq_char_right, self.seq_len_char_left, self.seq_len_char_right, granularity="char") # bcnn bcnn_char = self._bcnn_semantic_feature_layer( emb_seq_char_left, emb_seq_char_right, granularity="char") # dense deep_in_char = tf.concat( [sem_seq_char_left, sem_seq_char_right], axis=-1) deep_char = mlp_layer( deep_in_char, fc_type=self.params["fc_type"], hidden_units=self.params["fc_hidden_units"], dropouts=self.params["fc_dropouts"], scope_name=self.model_name + "deep_char", reuse=False, training=self.training, seed=self.params["random_seed"]) with tf.name_scope("matching_features"): matching_features_word = tf.concat( [ sim_word, mp_word, esim_word, bcnn_word, deep_word, # sem_seq_word_left, sem_seq_word_right, ], axis=-1) matching_features_char = tf.concat( [ sim_char, mp_char, esim_char, bcnn_char, deep_char, # sem_seq_char_left, sem_seq_char_right, ], axis=-1) return matching_features_word, matching_features_char
def _get_matching_features(self): with tf.name_scope(self.model_name): tf.set_random_seed(self.params["random_seed"]) with tf.name_scope("word_network"): if self.params["attend_method"] == "context-attention": emb_seq_word_left, enc_seq_word_left, att_seq_word_left, sem_seq_word_left, \ emb_seq_word_right, enc_seq_word_right, att_seq_word_right, sem_seq_word_right = \ self._interaction_semantic_feature_layer( self.seq_word_left, self.seq_word_right, self.seq_len_word_left, self.seq_len_word_right, granularity="word") else: # 这里应该对应了dssm的双塔模型 # semantic layer第一层为针对seq_word_left的embedding # 然后输入embeding和training,做dropout,默认配置embedding_dropout = 0.3 # 注意这里一个语义特征组合层的输入,从basemode中通过self.trainning变量输入第一层 # 第二层为编码层,输入维度为embedding的维度,dssm使用fast text编码, 改编码方式的输出维度等于embeded的维度, reuse参数对fast text无作用。为何fast text直接返回输入值?? # 第三层为注意力层,dssm使用了多个注意力方法,所以注意力层有多个输出, 然后被tf.concat了 # 第四层为普通的多层FC,会根据params["fc_hidden_units"]元素个数,配合params["fc_dropouts"]来构建多层全连接神经网络 emb_seq_word_left, enc_seq_word_left, att_seq_word_left, sem_seq_word_left = \ self._semantic_feature_layer( self.seq_word_left, # 这里是tf.Variable,由baseModel初始化 self.seq_len_word_left, # 这里也是tf.Variable granularity="word", reuse=False) # reuse 为false的应该是Query塔 emb_seq_word_right, enc_seq_word_right, att_seq_word_right, sem_seq_word_right = \ self._semantic_feature_layer( self.seq_word_right, self.seq_len_word_right, granularity="word", reuse=True) # reuse 为true的应该是doc塔(一个query对应多个塔) # match score, 原论文中的余弦相似度。作者这里还加入了点积和欧几里得距离,相似度是一个三维向量 sim_word = tf.concat( [ metrics.cosine_similarity( sem_seq_word_left, sem_seq_word_right, self.params["similarity_aggregation"]), metrics.dot_product( sem_seq_word_left, sem_seq_word_right, self.params["similarity_aggregation"]), metrics.euclidean_distance( sem_seq_word_left, sem_seq_word_right, self.params["similarity_aggregation"]), # self._canberra_score(sem_seq_word_left, sem_seq_word_right), ], axis=-1) with tf.name_scope("char_network"): if self.params["attend_method"] == "context-attention": emb_seq_char_left, enc_seq_char_left, att_seq_char_left, sem_seq_char_left, \ emb_seq_char_right, enc_seq_char_right, att_seq_char_right, sem_seq_char_right = \ self._interaction_semantic_feature_layer( self.seq_char_left, self.seq_char_right, self.seq_len_char_left, self.seq_len_char_right, granularity="char") else: emb_seq_char_left, enc_seq_char_left, att_seq_char_left, sem_seq_char_left = \ self._semantic_feature_layer( self.seq_char_left, self.seq_len_char_left, granularity="char", reuse=False) emb_seq_char_right, enc_seq_char_right, att_seq_char_right, sem_seq_char_right = \ self._semantic_feature_layer( self.seq_char_right, self.seq_len_char_right, granularity="char", reuse=True) # match score sim_char = tf.concat( [ metrics.cosine_similarity( sem_seq_char_left, sem_seq_char_right, self.params["similarity_aggregation"]), metrics.dot_product( sem_seq_char_left, sem_seq_char_right, self.params["similarity_aggregation"]), metrics.euclidean_distance( sem_seq_char_left, sem_seq_char_right, self.params["similarity_aggregation"]), # self._canberra_score(sem_seq_char_left, sem_seq_char_right), ], axis=-1) with tf.name_scope("matching_features"): matching_features_word = sim_word matching_features_char = sim_char return matching_features_word, matching_features_char
def _get_matching_features(self): with tf.name_scope(self.model_name): tf.set_random_seed(self.params["random_seed"]) with tf.name_scope("word_network"): if self.params["attend_method"] == "context-attention": emb_seq_word_left, enc_seq_word_left, att_seq_word_left, sem_seq_word_left, \ emb_seq_word_right, enc_seq_word_right, att_seq_word_right, sem_seq_word_right = \ self._interaction_semantic_feature_layer( self.seq_word_left, self.seq_word_right, self.seq_len_word_left, self.seq_len_word_right, granularity="word") else: emb_seq_word_left, enc_seq_word_left, att_seq_word_left, sem_seq_word_left = \ self._semantic_feature_layer( self.seq_word_left, self.seq_len_word_left, granularity="word", reuse=False) emb_seq_word_right, enc_seq_word_right, att_seq_word_right, sem_seq_word_right = \ self._semantic_feature_layer( self.seq_word_right, self.seq_len_word_right, granularity="word", reuse=True) # match score sim_word = tf.concat( [ metrics.cosine_similarity( sem_seq_word_left, sem_seq_word_right, self.params["similarity_aggregation"]), metrics.dot_product( sem_seq_word_left, sem_seq_word_right, self.params["similarity_aggregation"]), metrics.euclidean_distance( sem_seq_word_left, sem_seq_word_right, self.params["similarity_aggregation"]), # self._canberra_score(sem_seq_word_left, sem_seq_word_right), ], axis=-1) with tf.name_scope("char_network"): if self.params["attend_method"] == "context-attention": emb_seq_char_left, enc_seq_char_left, att_seq_char_left, sem_seq_char_left, \ emb_seq_char_right, enc_seq_char_right, att_seq_char_right, sem_seq_char_right = \ self._interaction_semantic_feature_layer( self.seq_char_left, self.seq_char_right, self.seq_len_char_left, self.seq_len_char_right, granularity="char") else: emb_seq_char_left, enc_seq_char_left, att_seq_char_left, sem_seq_char_left = \ self._semantic_feature_layer( self.seq_char_left, self.seq_len_char_left, granularity="char", reuse=False) emb_seq_char_right, enc_seq_char_right, att_seq_char_right, sem_seq_char_right = \ self._semantic_feature_layer( self.seq_char_right, self.seq_len_char_right, granularity="char", reuse=True) # match score sim_char = tf.concat( [ metrics.cosine_similarity( sem_seq_char_left, sem_seq_char_right, self.params["similarity_aggregation"]), metrics.dot_product( sem_seq_char_left, sem_seq_char_right, self.params["similarity_aggregation"]), metrics.euclidean_distance( sem_seq_char_left, sem_seq_char_right, self.params["similarity_aggregation"]), # self._canberra_score(sem_seq_char_left, sem_seq_char_right), ], axis=-1) with tf.name_scope("matching_features"): matching_features_word = sim_word matching_features_char = sim_char return matching_features_word, matching_features_char