def _get_prediction(self): with tf.name_scope(self.model_name + "/"): with tf.name_scope("prediction"): lst = [] if "word" in self.params["granularity"]: lst.append(self.matching_features_word) if "char" in self.params["granularity"]: lst.append(self.matching_features_char) if self.params["use_features"]: out_0 = mlp_layer(self.features, fc_type=self.params["fc_type"], hidden_units=self.params["fc_hidden_units"], dropouts=self.params["fc_dropouts"], scope_name=self.model_name + "mlp_features", reuse=False, training=self.training, seed=self.params["random_seed"]) lst.append(out_0) out = tf.concat(lst, axis=-1) out = tf.layers.Dropout(self.params["final_dropout"])(out, training=self.training) out = mlp_layer(out, fc_type=self.params["fc_type"], hidden_units=self.params["fc_hidden_units"], dropouts=self.params["fc_dropouts"], scope_name=self.model_name + "mlp", reuse=False, training=self.training, seed=self.params["random_seed"]) logits = tf.layers.dense(out, 1, activation=None, kernel_initializer=tf.glorot_uniform_initializer( seed=self.params["random_seed"]), name=self.model_name + "logits") logits = tf.squeeze(logits, axis=1) proba = tf.nn.sigmoid(logits) return logits, proba
def _semantic_feature_layer(self, seq_input, seq_len, granularity="word", reuse=False): assert granularity in ["char", "word"] #### embed emb_matrix = self._get_embedding_matrix(granularity) emb_seq = tf.nn.embedding_lookup(emb_matrix, seq_input) #### dropout random_seed = np.random.randint(10000000) emb_seq = word_dropout(emb_seq, training=self.training, dropout=self.params["embedding_dropout"], seed=random_seed) #### encode input_dim = self.params["embedding_dim"] enc_seq = encode(emb_seq, method=self.params["encode_method"], input_dim=input_dim, params=self.params, sequence_length=seq_len, mask_zero=self.params["embedding_mask_zero"], scope_name=self.model_name + "enc_seq_%s" % granularity, reuse=reuse, training=self.training) #### attend feature_dim = self.params["encode_dim"] context = None att_seq = attend(enc_seq, context=context, encode_dim=self.params["encode_dim"], feature_dim=feature_dim, attention_dim=self.params["attention_dim"], method=self.params["attend_method"], scope_name=self.model_name + "att_seq_%s" % granularity, reuse=reuse, num_heads=self.params["attention_num_heads"]) #### MLP nonlinear projection sem_seq = mlp_layer(att_seq, fc_type=self.params["fc_type"], hidden_units=self.params["fc_hidden_units"], dropouts=self.params["fc_dropouts"], scope_name=self.model_name + "sem_seq_%s" % granularity, reuse=reuse, training=self.training, seed=self.params["random_seed"]) return emb_seq, enc_seq, att_seq, sem_seq
def _get_prediction(self): with tf.name_scope(self.model_name + "/"): with tf.name_scope("prediction"): lst = [] # granularity 指示了使用基于词的网络还是基于字符的网络。从main函数的参数来看,这个应该只能指定一个 if "word" in self.params["granularity"]: lst.append(self.matching_features_word) if "char" in self.params["granularity"]: lst.append(self.matching_features_char) # main函数中的配置默认为False, dssm没有对该参数进行更新,默认不use_features if self.params["use_features"]: # mlp_layer是多层感知机, 包含fc,dense和resent三种类型的网络层单元 out_0 = mlp_layer(self.features, fc_type=self.params["fc_type"], hidden_units=self.params["fc_hidden_units"], dropouts=self.params["fc_dropouts"], scope_name=self.model_name + "mlp_features", reuse=False, training=self.training, seed=self.params["random_seed"]) lst.append(out_0) out = tf.concat(lst, axis=-1) out = tf.layers.Dropout(self.params["final_dropout"])(out, training=self.training) # 这里为何又有fc_hidden_units个层的全连接网络?? out = mlp_layer(out, fc_type=self.params["fc_type"], hidden_units=self.params["fc_hidden_units"], dropouts=self.params["fc_dropouts"], scope_name=self.model_name + "mlp", reuse=False, training=self.training, seed=self.params["random_seed"]) # prediction这最后的单神经元,不含集货函数的层是干嘛的? logits = tf.layers.dense(out, 1, activation=None, kernel_initializer=tf.glorot_uniform_initializer( seed=self.params["random_seed"]), name=self.model_name + "logits") logits = tf.squeeze(logits, axis=1) proba = tf.nn.sigmoid(logits) return logits, proba
def _get_matching_features(self): with tf.name_scope(self.model_name): tf.set_random_seed(self.params["random_seed"]) with tf.name_scope("word_network"): if self.params["attend_method"] == "context-attention": emb_seq_word_left, enc_seq_word_left, att_seq_word_left, sem_seq_word_left, \ emb_seq_word_right, enc_seq_word_right, att_seq_word_right, sem_seq_word_right = \ self._interaction_semantic_feature_layer( self.seq_word_left, self.seq_word_right, self.seq_len_word_left, self.seq_len_word_right, granularity="word") else: emb_seq_word_left, enc_seq_word_left, att_seq_word_left, sem_seq_word_left = \ self._semantic_feature_layer( self.seq_word_left, self.seq_len_word_left, granularity="word", reuse=False) emb_seq_word_right, enc_seq_word_right, att_seq_word_right, sem_seq_word_right = \ self._semantic_feature_layer( self.seq_word_right, self.seq_len_word_right, granularity="word", reuse=True) #### matching # match score sim_word = tf.concat( [ metrics.cosine_similarity( sem_seq_word_left, sem_seq_word_right, self.params["similarity_aggregation"]), metrics.dot_product( sem_seq_word_left, sem_seq_word_right, self.params["similarity_aggregation"]), metrics.euclidean_distance( sem_seq_word_left, sem_seq_word_right, self.params["similarity_aggregation"]), # self._canberra_score(sem_seq_word_left, sem_seq_word_right), ], axis=-1) # match pyramid match_matrix_word = self._get_match_matrix(self.seq_word_left, emb_seq_word_left, enc_seq_word_left, self.seq_word_right, emb_seq_word_right, enc_seq_word_right, granularity="word") mp_word = self._mp_semantic_feature_layer( match_matrix_word, self.dpool_index_word, granularity="word") # esim esim_word = self._esim_semantic_feature_layer( emb_seq_word_left, emb_seq_word_right, self.seq_len_word_left, self.seq_len_word_right, granularity="word") # bcnn bcnn_word = self._bcnn_semantic_feature_layer( emb_seq_word_left, emb_seq_word_right, granularity="word") # dense deep_in_word = tf.concat( [sem_seq_word_left, sem_seq_word_right], axis=-1) deep_word = mlp_layer( deep_in_word, fc_type=self.params["fc_type"], hidden_units=self.params["fc_hidden_units"], dropouts=self.params["fc_dropouts"], scope_name=self.model_name + "deep_word", reuse=False, training=self.training, seed=self.params["random_seed"]) with tf.name_scope("char_network"): if self.params["attend_method"] == "context-attention": emb_seq_char_left, enc_seq_char_left, att_seq_char_left, sem_seq_char_left, \ emb_seq_char_right, enc_seq_char_right, att_seq_char_right, sem_seq_char_right = \ self._interaction_semantic_feature_layer( self.seq_char_left, self.seq_char_right, self.seq_len_char_left, self.seq_len_char_right, granularity="char") else: emb_seq_char_left, enc_seq_char_left, att_seq_char_left, sem_seq_char_left = \ self._semantic_feature_layer( self.seq_char_left, self.seq_len_char_left, granularity="char", reuse=False) emb_seq_char_right, enc_seq_char_right, att_seq_char_right, sem_seq_char_right = \ self._semantic_feature_layer( self.seq_char_right, self.seq_len_char_right, granularity="char", reuse=True) # match score sim_char = tf.concat( [ metrics.cosine_similarity( sem_seq_char_left, sem_seq_char_right, self.params["similarity_aggregation"]), metrics.dot_product( sem_seq_char_left, sem_seq_char_right, self.params["similarity_aggregation"]), metrics.euclidean_distance( sem_seq_char_left, sem_seq_char_right, self.params["similarity_aggregation"]), # self._canberra_score(sem_seq_char_left, sem_seq_char_right), ], axis=-1) # match pyramid match_matrix_char = self._get_match_matrix(self.seq_char_left, emb_seq_char_left, enc_seq_char_left, self.seq_char_right, emb_seq_char_right, enc_seq_char_right, granularity="char") mp_char = self._mp_semantic_feature_layer( match_matrix_char, self.dpool_index_char, granularity="char") # esim esim_char = self._esim_semantic_feature_layer( emb_seq_char_left, emb_seq_char_right, self.seq_len_char_left, self.seq_len_char_right, granularity="char") # bcnn bcnn_char = self._bcnn_semantic_feature_layer( emb_seq_char_left, emb_seq_char_right, granularity="char") # dense deep_in_char = tf.concat( [sem_seq_char_left, sem_seq_char_right], axis=-1) deep_char = mlp_layer( deep_in_char, fc_type=self.params["fc_type"], hidden_units=self.params["fc_hidden_units"], dropouts=self.params["fc_dropouts"], scope_name=self.model_name + "deep_char", reuse=False, training=self.training, seed=self.params["random_seed"]) with tf.name_scope("matching_features"): matching_features_word = tf.concat( [ sim_word, mp_word, esim_word, bcnn_word, deep_word, # sem_seq_word_left, sem_seq_word_right, ], axis=-1) matching_features_char = tf.concat( [ sim_char, mp_char, esim_char, bcnn_char, deep_char, # sem_seq_char_left, sem_seq_char_right, ], axis=-1) return matching_features_word, matching_features_char
def _interaction_semantic_feature_layer(self, seq_input_left, seq_input_right, seq_len_left, seq_len_right, granularity="word"): assert granularity in ["char", "word"] #### embed emb_matrix = self._get_embedding_matrix(granularity) emb_seq_left = tf.nn.embedding_lookup(emb_matrix, seq_input_left) emb_seq_right = tf.nn.embedding_lookup(emb_matrix, seq_input_right) #### dropout random_seed = np.random.randint(10000000) emb_seq_left = word_dropout(emb_seq_left, training=self.training, dropout=self.params["embedding_dropout"], seed=random_seed) random_seed = np.random.randint(10000000) emb_seq_right = word_dropout(emb_seq_right, training=self.training, dropout=self.params["embedding_dropout"], seed=random_seed) #### encode input_dim = self.params["embedding_dim"] enc_seq_left = encode(emb_seq_left, method=self.params["encode_method"], input_dim=input_dim, params=self.params, sequence_length=seq_len_left, mask_zero=self.params["embedding_mask_zero"], scope_name=self.model_name + "enc_seq_%s"%granularity, reuse=False, training=self.training) enc_seq_right = encode(emb_seq_right, method=self.params["encode_method"], input_dim=input_dim, params=self.params, sequence_length=seq_len_right, mask_zero=self.params["embedding_mask_zero"], scope_name=self.model_name + "enc_seq_%s" % granularity, reuse=True, training=self.training) #### attend # [batchsize, s1, s2] att_mat = tf.einsum("abd,acd->abc", enc_seq_left, enc_seq_right) feature_dim = self.params["encode_dim"] + self.params["max_seq_len_%s"%granularity] att_seq_left = attend(enc_seq_left, context=att_mat, feature_dim=feature_dim, method=self.params["attend_method"], scope_name=self.model_name + "att_seq_%s"%granularity, reuse=False) att_seq_right = attend(enc_seq_right, context=tf.transpose(att_mat), feature_dim=feature_dim, method=self.params["attend_method"], scope_name=self.model_name + "att_seq_%s" % granularity, reuse=True) #### MLP nonlinear projection sem_seq_left = mlp_layer(att_seq_left, fc_type=self.params["fc_type"], hidden_units=self.params["fc_hidden_units"], dropouts=self.params["fc_dropouts"], scope_name=self.model_name + "sem_seq_%s"%granularity, reuse=False, training=self.training, seed=self.params["random_seed"]) sem_seq_right = mlp_layer(att_seq_right, fc_type=self.params["fc_type"], hidden_units=self.params["fc_hidden_units"], dropouts=self.params["fc_dropouts"], scope_name=self.model_name + "sem_seq_%s" % granularity, reuse=True, training=self.training, seed=self.params["random_seed"]) return emb_seq_left, enc_seq_left, att_seq_left, sem_seq_left, \ emb_seq_right, enc_seq_right, att_seq_right, sem_seq_right