예제 #1
0
    def _bcnn_semantic_feature_layer(self,
                                     seq_left,
                                     seq_right,
                                     dpool_index=None,
                                     granularity="word"):
        name = self.model_name + granularity
        seq_len = self.params["max_seq_len_%s" % granularity]
        # [batch, s, d] => [batch, s, d, 1]
        seq_left = tf.expand_dims(seq_left, axis=-1)
        seq_right = tf.expand_dims(seq_right, axis=-1)

        left_ap_list = [None] * (self.params["bcnn_num_layers"] + 1)
        right_ap_list = [None] * (self.params["bcnn_num_layers"] + 1)
        left_ap_list[0] = self._all_ap(x=seq_left,
                                       seq_len=seq_len,
                                       name=name + "global_pooling_input_left")
        right_ap_list[0] = self._all_ap(x=seq_right,
                                        seq_len=seq_len,
                                        name=name +
                                        "global_pooling_input_right")

        x1 = seq_left
        x2 = seq_right
        d = self.params["embedding_dim"]
        outputs = []
        for layer in range(self.params["bcnn_num_layers"]):
            x1, left_ap_list[layer + 1], x2, right_ap_list[
                layer + 1], att_pooled = self._bcnn_cnn_layer(
                    x1=x1,
                    x2=x2,
                    seq_len=seq_len,
                    d=d,
                    name=name + "cnn_layer_%d" % (layer + 1),
                    dpool_index=dpool_index,
                    granularity=granularity)
            d = self.params["bcnn_num_filters"]
            if self.params["bcnn_mp_att_pooling"] and att_pooled is not None:
                outputs.append(att_pooled)

        for l, r in zip(left_ap_list, right_ap_list):
            outputs.append(
                metrics.cosine_similarity(
                    l, r, self.params["similarity_aggregation"]))
            outputs.append(
                metrics.dot_product(l, r,
                                    self.params["similarity_aggregation"]))
            outputs.append(
                metrics.euclidean_distance(
                    l, r, self.params["similarity_aggregation"]))
        return tf.concat(outputs, axis=-1)
예제 #2
0
    def _get_matching_features(self):
        with tf.name_scope(self.model_name):
            tf.set_random_seed(self.params["random_seed"])

            with tf.name_scope("word_network"):
                if self.params["attend_method"] == "context-attention":
                    emb_seq_word_left, enc_seq_word_left, att_seq_word_left, sem_seq_word_left, \
                    emb_seq_word_right, enc_seq_word_right, att_seq_word_right, sem_seq_word_right = \
                        self._interaction_semantic_feature_layer(
                            self.seq_word_left,
                            self.seq_word_right,
                            self.seq_len_word_left,
                            self.seq_len_word_right,
                            granularity="word")
                else:
                    emb_seq_word_left, enc_seq_word_left, att_seq_word_left, sem_seq_word_left = \
                        self._semantic_feature_layer(
                            self.seq_word_left,
                            self.seq_len_word_left,
                            granularity="word", reuse=False)
                    emb_seq_word_right, enc_seq_word_right, att_seq_word_right, sem_seq_word_right = \
                        self._semantic_feature_layer(
                            self.seq_word_right,
                            self.seq_len_word_right,
                            granularity="word", reuse=True)

                #### matching
                # match score
                sim_word = tf.concat(
                    [
                        metrics.cosine_similarity(
                            sem_seq_word_left, sem_seq_word_right,
                            self.params["similarity_aggregation"]),
                        metrics.dot_product(
                            sem_seq_word_left, sem_seq_word_right,
                            self.params["similarity_aggregation"]),
                        metrics.euclidean_distance(
                            sem_seq_word_left, sem_seq_word_right,
                            self.params["similarity_aggregation"]),
                        # self._canberra_score(sem_seq_word_left, sem_seq_word_right),
                    ],
                    axis=-1)

                # match pyramid
                match_matrix_word = self._get_match_matrix(self.seq_word_left,
                                                           emb_seq_word_left,
                                                           enc_seq_word_left,
                                                           self.seq_word_right,
                                                           emb_seq_word_right,
                                                           enc_seq_word_right,
                                                           granularity="word")
                mp_word = self._mp_semantic_feature_layer(
                    match_matrix_word,
                    self.dpool_index_word,
                    granularity="word")

                # esim
                esim_word = self._esim_semantic_feature_layer(
                    emb_seq_word_left,
                    emb_seq_word_right,
                    self.seq_len_word_left,
                    self.seq_len_word_right,
                    granularity="word")

                # bcnn
                bcnn_word = self._bcnn_semantic_feature_layer(
                    emb_seq_word_left, emb_seq_word_right, granularity="word")

                # dense
                deep_in_word = tf.concat(
                    [sem_seq_word_left, sem_seq_word_right], axis=-1)
                deep_word = mlp_layer(
                    deep_in_word,
                    fc_type=self.params["fc_type"],
                    hidden_units=self.params["fc_hidden_units"],
                    dropouts=self.params["fc_dropouts"],
                    scope_name=self.model_name + "deep_word",
                    reuse=False,
                    training=self.training,
                    seed=self.params["random_seed"])

            with tf.name_scope("char_network"):
                if self.params["attend_method"] == "context-attention":
                    emb_seq_char_left, enc_seq_char_left, att_seq_char_left, sem_seq_char_left, \
                    emb_seq_char_right, enc_seq_char_right, att_seq_char_right, sem_seq_char_right = \
                        self._interaction_semantic_feature_layer(
                            self.seq_char_left,
                            self.seq_char_right,
                            self.seq_len_char_left,
                            self.seq_len_char_right,
                            granularity="char")
                else:
                    emb_seq_char_left, enc_seq_char_left, att_seq_char_left, sem_seq_char_left = \
                        self._semantic_feature_layer(
                            self.seq_char_left,
                            self.seq_len_char_left,
                            granularity="char", reuse=False)
                    emb_seq_char_right, enc_seq_char_right, att_seq_char_right, sem_seq_char_right = \
                        self._semantic_feature_layer(
                            self.seq_char_right,
                            self.seq_len_char_right,
                            granularity="char", reuse=True)

                # match score
                sim_char = tf.concat(
                    [
                        metrics.cosine_similarity(
                            sem_seq_char_left, sem_seq_char_right,
                            self.params["similarity_aggregation"]),
                        metrics.dot_product(
                            sem_seq_char_left, sem_seq_char_right,
                            self.params["similarity_aggregation"]),
                        metrics.euclidean_distance(
                            sem_seq_char_left, sem_seq_char_right,
                            self.params["similarity_aggregation"]),
                        # self._canberra_score(sem_seq_char_left, sem_seq_char_right),
                    ],
                    axis=-1)

                # match pyramid
                match_matrix_char = self._get_match_matrix(self.seq_char_left,
                                                           emb_seq_char_left,
                                                           enc_seq_char_left,
                                                           self.seq_char_right,
                                                           emb_seq_char_right,
                                                           enc_seq_char_right,
                                                           granularity="char")
                mp_char = self._mp_semantic_feature_layer(
                    match_matrix_char,
                    self.dpool_index_char,
                    granularity="char")

                # esim
                esim_char = self._esim_semantic_feature_layer(
                    emb_seq_char_left,
                    emb_seq_char_right,
                    self.seq_len_char_left,
                    self.seq_len_char_right,
                    granularity="char")

                # bcnn
                bcnn_char = self._bcnn_semantic_feature_layer(
                    emb_seq_char_left, emb_seq_char_right, granularity="char")

                # dense
                deep_in_char = tf.concat(
                    [sem_seq_char_left, sem_seq_char_right], axis=-1)
                deep_char = mlp_layer(
                    deep_in_char,
                    fc_type=self.params["fc_type"],
                    hidden_units=self.params["fc_hidden_units"],
                    dropouts=self.params["fc_dropouts"],
                    scope_name=self.model_name + "deep_char",
                    reuse=False,
                    training=self.training,
                    seed=self.params["random_seed"])

            with tf.name_scope("matching_features"):
                matching_features_word = tf.concat(
                    [
                        sim_word,
                        mp_word,
                        esim_word,
                        bcnn_word,
                        deep_word,  # sem_seq_word_left, sem_seq_word_right,
                    ],
                    axis=-1)
                matching_features_char = tf.concat(
                    [
                        sim_char,
                        mp_char,
                        esim_char,
                        bcnn_char,
                        deep_char,  # sem_seq_char_left, sem_seq_char_right,
                    ],
                    axis=-1)

        return matching_features_word, matching_features_char
예제 #3
0
    def _get_matching_features(self):
        with tf.name_scope(self.model_name):
            tf.set_random_seed(self.params["random_seed"])

            with tf.name_scope("word_network"):
                if self.params["attend_method"] == "context-attention":
                    emb_seq_word_left, enc_seq_word_left, att_seq_word_left, sem_seq_word_left, \
                    emb_seq_word_right, enc_seq_word_right, att_seq_word_right, sem_seq_word_right = \
                        self._interaction_semantic_feature_layer(
                            self.seq_word_left,
                            self.seq_word_right,
                            self.seq_len_word_left,
                            self.seq_len_word_right,
                            granularity="word")
                else:
                    # 这里应该对应了dssm的双塔模型

                    # semantic layer第一层为针对seq_word_left的embedding
                    # 然后输入embeding和training,做dropout,默认配置embedding_dropout = 0.3
                    # 注意这里一个语义特征组合层的输入,从basemode中通过self.trainning变量输入第一层
                    # 第二层为编码层,输入维度为embedding的维度,dssm使用fast text编码, 改编码方式的输出维度等于embeded的维度, reuse参数对fast text无作用。为何fast text直接返回输入值??
                    # 第三层为注意力层,dssm使用了多个注意力方法,所以注意力层有多个输出, 然后被tf.concat了
                    # 第四层为普通的多层FC,会根据params["fc_hidden_units"]元素个数,配合params["fc_dropouts"]来构建多层全连接神经网络
                    emb_seq_word_left, enc_seq_word_left, att_seq_word_left, sem_seq_word_left = \
                        self._semantic_feature_layer(
                            self.seq_word_left, # 这里是tf.Variable,由baseModel初始化
                            self.seq_len_word_left, # 这里也是tf.Variable
                            granularity="word", reuse=False) # reuse 为false的应该是Query塔
                    emb_seq_word_right, enc_seq_word_right, att_seq_word_right, sem_seq_word_right = \
                        self._semantic_feature_layer(
                            self.seq_word_right,
                            self.seq_len_word_right,
                            granularity="word", reuse=True) # reuse 为true的应该是doc塔(一个query对应多个塔)
                # match score, 原论文中的余弦相似度。作者这里还加入了点积和欧几里得距离,相似度是一个三维向量
                sim_word = tf.concat(
                    [
                        metrics.cosine_similarity(
                            sem_seq_word_left, sem_seq_word_right,
                            self.params["similarity_aggregation"]),
                        metrics.dot_product(
                            sem_seq_word_left, sem_seq_word_right,
                            self.params["similarity_aggregation"]),
                        metrics.euclidean_distance(
                            sem_seq_word_left, sem_seq_word_right,
                            self.params["similarity_aggregation"]),
                        # self._canberra_score(sem_seq_word_left, sem_seq_word_right),
                    ],
                    axis=-1)

            with tf.name_scope("char_network"):
                if self.params["attend_method"] == "context-attention":
                    emb_seq_char_left, enc_seq_char_left, att_seq_char_left, sem_seq_char_left, \
                    emb_seq_char_right, enc_seq_char_right, att_seq_char_right, sem_seq_char_right = \
                        self._interaction_semantic_feature_layer(
                            self.seq_char_left,
                            self.seq_char_right,
                            self.seq_len_char_left,
                            self.seq_len_char_right,
                            granularity="char")
                else:
                    emb_seq_char_left, enc_seq_char_left, att_seq_char_left, sem_seq_char_left = \
                        self._semantic_feature_layer(
                            self.seq_char_left,
                            self.seq_len_char_left,
                            granularity="char", reuse=False)
                    emb_seq_char_right, enc_seq_char_right, att_seq_char_right, sem_seq_char_right = \
                        self._semantic_feature_layer(
                            self.seq_char_right,
                            self.seq_len_char_right,
                            granularity="char", reuse=True)
                # match score
                sim_char = tf.concat(
                    [
                        metrics.cosine_similarity(
                            sem_seq_char_left, sem_seq_char_right,
                            self.params["similarity_aggregation"]),
                        metrics.dot_product(
                            sem_seq_char_left, sem_seq_char_right,
                            self.params["similarity_aggregation"]),
                        metrics.euclidean_distance(
                            sem_seq_char_left, sem_seq_char_right,
                            self.params["similarity_aggregation"]),
                        # self._canberra_score(sem_seq_char_left, sem_seq_char_right),
                    ],
                    axis=-1)

            with tf.name_scope("matching_features"):
                matching_features_word = sim_word
                matching_features_char = sim_char

        return matching_features_word, matching_features_char
예제 #4
0
    def _get_matching_features(self):
        with tf.name_scope(self.model_name):
            tf.set_random_seed(self.params["random_seed"])

            with tf.name_scope("word_network"):
                if self.params["attend_method"] == "context-attention":
                    emb_seq_word_left, enc_seq_word_left, att_seq_word_left, sem_seq_word_left, \
                    emb_seq_word_right, enc_seq_word_right, att_seq_word_right, sem_seq_word_right = \
                        self._interaction_semantic_feature_layer(
                            self.seq_word_left,
                            self.seq_word_right,
                            self.seq_len_word_left,
                            self.seq_len_word_right,
                            granularity="word")
                else:
                    emb_seq_word_left, enc_seq_word_left, att_seq_word_left, sem_seq_word_left = \
                        self._semantic_feature_layer(
                            self.seq_word_left,
                            self.seq_len_word_left,
                            granularity="word", reuse=False)
                    emb_seq_word_right, enc_seq_word_right, att_seq_word_right, sem_seq_word_right = \
                        self._semantic_feature_layer(
                            self.seq_word_right,
                            self.seq_len_word_right,
                            granularity="word", reuse=True)
                # match score
                sim_word = tf.concat(
                    [
                        metrics.cosine_similarity(
                            sem_seq_word_left, sem_seq_word_right,
                            self.params["similarity_aggregation"]),
                        metrics.dot_product(
                            sem_seq_word_left, sem_seq_word_right,
                            self.params["similarity_aggregation"]),
                        metrics.euclidean_distance(
                            sem_seq_word_left, sem_seq_word_right,
                            self.params["similarity_aggregation"]),
                        # self._canberra_score(sem_seq_word_left, sem_seq_word_right),
                    ],
                    axis=-1)

            with tf.name_scope("char_network"):
                if self.params["attend_method"] == "context-attention":
                    emb_seq_char_left, enc_seq_char_left, att_seq_char_left, sem_seq_char_left, \
                    emb_seq_char_right, enc_seq_char_right, att_seq_char_right, sem_seq_char_right = \
                        self._interaction_semantic_feature_layer(
                            self.seq_char_left,
                            self.seq_char_right,
                            self.seq_len_char_left,
                            self.seq_len_char_right,
                            granularity="char")
                else:
                    emb_seq_char_left, enc_seq_char_left, att_seq_char_left, sem_seq_char_left = \
                        self._semantic_feature_layer(
                            self.seq_char_left,
                            self.seq_len_char_left,
                            granularity="char", reuse=False)
                    emb_seq_char_right, enc_seq_char_right, att_seq_char_right, sem_seq_char_right = \
                        self._semantic_feature_layer(
                            self.seq_char_right,
                            self.seq_len_char_right,
                            granularity="char", reuse=True)
                # match score
                sim_char = tf.concat(
                    [
                        metrics.cosine_similarity(
                            sem_seq_char_left, sem_seq_char_right,
                            self.params["similarity_aggregation"]),
                        metrics.dot_product(
                            sem_seq_char_left, sem_seq_char_right,
                            self.params["similarity_aggregation"]),
                        metrics.euclidean_distance(
                            sem_seq_char_left, sem_seq_char_right,
                            self.params["similarity_aggregation"]),
                        # self._canberra_score(sem_seq_char_left, sem_seq_char_right),
                    ],
                    axis=-1)

            with tf.name_scope("matching_features"):
                matching_features_word = sim_word
                matching_features_char = sim_char

        return matching_features_word, matching_features_char