예제 #1
0
    def _get_prediction(self):
        with tf.name_scope(self.model_name + "/"):
            with tf.name_scope("prediction"):
                lst = []
                if "word" in self.params["granularity"]:
                    lst.append(self.matching_features_word)
                if "char" in self.params["granularity"]:
                    lst.append(self.matching_features_char)
                if self.params["use_features"]:
                    out_0 = mlp_layer(self.features, fc_type=self.params["fc_type"],
                                      hidden_units=self.params["fc_hidden_units"],
                                      dropouts=self.params["fc_dropouts"],
                                      scope_name=self.model_name + "mlp_features",
                                      reuse=False,
                                      training=self.training,
                                      seed=self.params["random_seed"])
                    lst.append(out_0)
                out = tf.concat(lst, axis=-1)
                out = tf.layers.Dropout(self.params["final_dropout"])(out, training=self.training)
                out = mlp_layer(out, fc_type=self.params["fc_type"],
                                hidden_units=self.params["fc_hidden_units"],
                                dropouts=self.params["fc_dropouts"],
                                scope_name=self.model_name + "mlp",
                                reuse=False,
                                training=self.training,
                                seed=self.params["random_seed"])
                logits = tf.layers.dense(out, 1, activation=None,
                                         kernel_initializer=tf.glorot_uniform_initializer(
                                         seed=self.params["random_seed"]),
                                         name=self.model_name + "logits")
                logits = tf.squeeze(logits, axis=1)
                proba = tf.nn.sigmoid(logits)

        return logits, proba
예제 #2
0
    def _semantic_feature_layer(self,
                                seq_input,
                                seq_len,
                                granularity="word",
                                reuse=False):
        assert granularity in ["char", "word"]
        #### embed
        emb_matrix = self._get_embedding_matrix(granularity)
        emb_seq = tf.nn.embedding_lookup(emb_matrix, seq_input)

        #### dropout
        random_seed = np.random.randint(10000000)
        emb_seq = word_dropout(emb_seq,
                               training=self.training,
                               dropout=self.params["embedding_dropout"],
                               seed=random_seed)

        #### encode
        input_dim = self.params["embedding_dim"]
        enc_seq = encode(emb_seq,
                         method=self.params["encode_method"],
                         input_dim=input_dim,
                         params=self.params,
                         sequence_length=seq_len,
                         mask_zero=self.params["embedding_mask_zero"],
                         scope_name=self.model_name +
                         "enc_seq_%s" % granularity,
                         reuse=reuse,
                         training=self.training)

        #### attend
        feature_dim = self.params["encode_dim"]
        context = None
        att_seq = attend(enc_seq,
                         context=context,
                         encode_dim=self.params["encode_dim"],
                         feature_dim=feature_dim,
                         attention_dim=self.params["attention_dim"],
                         method=self.params["attend_method"],
                         scope_name=self.model_name +
                         "att_seq_%s" % granularity,
                         reuse=reuse,
                         num_heads=self.params["attention_num_heads"])

        #### MLP nonlinear projection
        sem_seq = mlp_layer(att_seq,
                            fc_type=self.params["fc_type"],
                            hidden_units=self.params["fc_hidden_units"],
                            dropouts=self.params["fc_dropouts"],
                            scope_name=self.model_name +
                            "sem_seq_%s" % granularity,
                            reuse=reuse,
                            training=self.training,
                            seed=self.params["random_seed"])

        return emb_seq, enc_seq, att_seq, sem_seq
예제 #3
0
    def _get_prediction(self):
        with tf.name_scope(self.model_name + "/"):
            with tf.name_scope("prediction"):
                lst = []
                # granularity 指示了使用基于词的网络还是基于字符的网络。从main函数的参数来看,这个应该只能指定一个
                if "word" in self.params["granularity"]:
                    lst.append(self.matching_features_word)
                if "char" in self.params["granularity"]:
                    lst.append(self.matching_features_char)
                    # main函数中的配置默认为False, dssm没有对该参数进行更新,默认不use_features
                if self.params["use_features"]:
                    # mlp_layer是多层感知机, 包含fc,dense和resent三种类型的网络层单元
                    out_0 = mlp_layer(self.features, fc_type=self.params["fc_type"],
                                      hidden_units=self.params["fc_hidden_units"],
                                      dropouts=self.params["fc_dropouts"],
                                      scope_name=self.model_name + "mlp_features",
                                      reuse=False,
                                      training=self.training,
                                      seed=self.params["random_seed"])
                    lst.append(out_0)
                out = tf.concat(lst, axis=-1)
                out = tf.layers.Dropout(self.params["final_dropout"])(out, training=self.training)
                # 这里为何又有fc_hidden_units个层的全连接网络??
                out = mlp_layer(out, fc_type=self.params["fc_type"],
                                hidden_units=self.params["fc_hidden_units"],
                                dropouts=self.params["fc_dropouts"],
                                scope_name=self.model_name + "mlp",
                                reuse=False,
                                training=self.training,
                                seed=self.params["random_seed"])
                # prediction这最后的单神经元,不含集货函数的层是干嘛的?
                logits = tf.layers.dense(out, 1, activation=None,
                                         kernel_initializer=tf.glorot_uniform_initializer(
                                         seed=self.params["random_seed"]),
                                         name=self.model_name + "logits")
                logits = tf.squeeze(logits, axis=1)
                proba = tf.nn.sigmoid(logits)

        return logits, proba
예제 #4
0
    def _get_matching_features(self):
        with tf.name_scope(self.model_name):
            tf.set_random_seed(self.params["random_seed"])

            with tf.name_scope("word_network"):
                if self.params["attend_method"] == "context-attention":
                    emb_seq_word_left, enc_seq_word_left, att_seq_word_left, sem_seq_word_left, \
                    emb_seq_word_right, enc_seq_word_right, att_seq_word_right, sem_seq_word_right = \
                        self._interaction_semantic_feature_layer(
                            self.seq_word_left,
                            self.seq_word_right,
                            self.seq_len_word_left,
                            self.seq_len_word_right,
                            granularity="word")
                else:
                    emb_seq_word_left, enc_seq_word_left, att_seq_word_left, sem_seq_word_left = \
                        self._semantic_feature_layer(
                            self.seq_word_left,
                            self.seq_len_word_left,
                            granularity="word", reuse=False)
                    emb_seq_word_right, enc_seq_word_right, att_seq_word_right, sem_seq_word_right = \
                        self._semantic_feature_layer(
                            self.seq_word_right,
                            self.seq_len_word_right,
                            granularity="word", reuse=True)

                #### matching
                # match score
                sim_word = tf.concat(
                    [
                        metrics.cosine_similarity(
                            sem_seq_word_left, sem_seq_word_right,
                            self.params["similarity_aggregation"]),
                        metrics.dot_product(
                            sem_seq_word_left, sem_seq_word_right,
                            self.params["similarity_aggregation"]),
                        metrics.euclidean_distance(
                            sem_seq_word_left, sem_seq_word_right,
                            self.params["similarity_aggregation"]),
                        # self._canberra_score(sem_seq_word_left, sem_seq_word_right),
                    ],
                    axis=-1)

                # match pyramid
                match_matrix_word = self._get_match_matrix(self.seq_word_left,
                                                           emb_seq_word_left,
                                                           enc_seq_word_left,
                                                           self.seq_word_right,
                                                           emb_seq_word_right,
                                                           enc_seq_word_right,
                                                           granularity="word")
                mp_word = self._mp_semantic_feature_layer(
                    match_matrix_word,
                    self.dpool_index_word,
                    granularity="word")

                # esim
                esim_word = self._esim_semantic_feature_layer(
                    emb_seq_word_left,
                    emb_seq_word_right,
                    self.seq_len_word_left,
                    self.seq_len_word_right,
                    granularity="word")

                # bcnn
                bcnn_word = self._bcnn_semantic_feature_layer(
                    emb_seq_word_left, emb_seq_word_right, granularity="word")

                # dense
                deep_in_word = tf.concat(
                    [sem_seq_word_left, sem_seq_word_right], axis=-1)
                deep_word = mlp_layer(
                    deep_in_word,
                    fc_type=self.params["fc_type"],
                    hidden_units=self.params["fc_hidden_units"],
                    dropouts=self.params["fc_dropouts"],
                    scope_name=self.model_name + "deep_word",
                    reuse=False,
                    training=self.training,
                    seed=self.params["random_seed"])

            with tf.name_scope("char_network"):
                if self.params["attend_method"] == "context-attention":
                    emb_seq_char_left, enc_seq_char_left, att_seq_char_left, sem_seq_char_left, \
                    emb_seq_char_right, enc_seq_char_right, att_seq_char_right, sem_seq_char_right = \
                        self._interaction_semantic_feature_layer(
                            self.seq_char_left,
                            self.seq_char_right,
                            self.seq_len_char_left,
                            self.seq_len_char_right,
                            granularity="char")
                else:
                    emb_seq_char_left, enc_seq_char_left, att_seq_char_left, sem_seq_char_left = \
                        self._semantic_feature_layer(
                            self.seq_char_left,
                            self.seq_len_char_left,
                            granularity="char", reuse=False)
                    emb_seq_char_right, enc_seq_char_right, att_seq_char_right, sem_seq_char_right = \
                        self._semantic_feature_layer(
                            self.seq_char_right,
                            self.seq_len_char_right,
                            granularity="char", reuse=True)

                # match score
                sim_char = tf.concat(
                    [
                        metrics.cosine_similarity(
                            sem_seq_char_left, sem_seq_char_right,
                            self.params["similarity_aggregation"]),
                        metrics.dot_product(
                            sem_seq_char_left, sem_seq_char_right,
                            self.params["similarity_aggregation"]),
                        metrics.euclidean_distance(
                            sem_seq_char_left, sem_seq_char_right,
                            self.params["similarity_aggregation"]),
                        # self._canberra_score(sem_seq_char_left, sem_seq_char_right),
                    ],
                    axis=-1)

                # match pyramid
                match_matrix_char = self._get_match_matrix(self.seq_char_left,
                                                           emb_seq_char_left,
                                                           enc_seq_char_left,
                                                           self.seq_char_right,
                                                           emb_seq_char_right,
                                                           enc_seq_char_right,
                                                           granularity="char")
                mp_char = self._mp_semantic_feature_layer(
                    match_matrix_char,
                    self.dpool_index_char,
                    granularity="char")

                # esim
                esim_char = self._esim_semantic_feature_layer(
                    emb_seq_char_left,
                    emb_seq_char_right,
                    self.seq_len_char_left,
                    self.seq_len_char_right,
                    granularity="char")

                # bcnn
                bcnn_char = self._bcnn_semantic_feature_layer(
                    emb_seq_char_left, emb_seq_char_right, granularity="char")

                # dense
                deep_in_char = tf.concat(
                    [sem_seq_char_left, sem_seq_char_right], axis=-1)
                deep_char = mlp_layer(
                    deep_in_char,
                    fc_type=self.params["fc_type"],
                    hidden_units=self.params["fc_hidden_units"],
                    dropouts=self.params["fc_dropouts"],
                    scope_name=self.model_name + "deep_char",
                    reuse=False,
                    training=self.training,
                    seed=self.params["random_seed"])

            with tf.name_scope("matching_features"):
                matching_features_word = tf.concat(
                    [
                        sim_word,
                        mp_word,
                        esim_word,
                        bcnn_word,
                        deep_word,  # sem_seq_word_left, sem_seq_word_right,
                    ],
                    axis=-1)
                matching_features_char = tf.concat(
                    [
                        sim_char,
                        mp_char,
                        esim_char,
                        bcnn_char,
                        deep_char,  # sem_seq_char_left, sem_seq_char_right,
                    ],
                    axis=-1)

        return matching_features_word, matching_features_char
예제 #5
0
    def _interaction_semantic_feature_layer(self, seq_input_left, seq_input_right, seq_len_left, seq_len_right, granularity="word"):
        assert granularity in ["char", "word"]
        #### embed
        emb_matrix = self._get_embedding_matrix(granularity)
        emb_seq_left = tf.nn.embedding_lookup(emb_matrix, seq_input_left)
        emb_seq_right = tf.nn.embedding_lookup(emb_matrix, seq_input_right)

        #### dropout
        random_seed = np.random.randint(10000000)
        emb_seq_left = word_dropout(emb_seq_left,
                               training=self.training,
                               dropout=self.params["embedding_dropout"],
                               seed=random_seed)
        random_seed = np.random.randint(10000000)
        emb_seq_right = word_dropout(emb_seq_right,
                                    training=self.training,
                                    dropout=self.params["embedding_dropout"],
                                    seed=random_seed)

        #### encode
        input_dim = self.params["embedding_dim"]
        enc_seq_left = encode(emb_seq_left, method=self.params["encode_method"],
                              input_dim=input_dim,
                              params=self.params,
                              sequence_length=seq_len_left,
                              mask_zero=self.params["embedding_mask_zero"],
                              scope_name=self.model_name + "enc_seq_%s"%granularity, reuse=False,
                              training=self.training)
        enc_seq_right = encode(emb_seq_right, method=self.params["encode_method"],
                               input_dim=input_dim,
                               params=self.params,
                               sequence_length=seq_len_right,
                               mask_zero=self.params["embedding_mask_zero"],
                               scope_name=self.model_name + "enc_seq_%s" % granularity, reuse=True,
                               training=self.training)

        #### attend
        # [batchsize, s1, s2]
        att_mat = tf.einsum("abd,acd->abc", enc_seq_left, enc_seq_right)
        feature_dim = self.params["encode_dim"] + self.params["max_seq_len_%s"%granularity]
        att_seq_left = attend(enc_seq_left, context=att_mat, feature_dim=feature_dim,
                                   method=self.params["attend_method"],
                                   scope_name=self.model_name + "att_seq_%s"%granularity,
                                   reuse=False)
        att_seq_right = attend(enc_seq_right, context=tf.transpose(att_mat), feature_dim=feature_dim,
                              method=self.params["attend_method"],
                              scope_name=self.model_name + "att_seq_%s" % granularity,
                              reuse=True)

        #### MLP nonlinear projection
        sem_seq_left = mlp_layer(att_seq_left, fc_type=self.params["fc_type"],
                                 hidden_units=self.params["fc_hidden_units"],
                                 dropouts=self.params["fc_dropouts"],
                                 scope_name=self.model_name + "sem_seq_%s"%granularity,
                                 reuse=False,
                                 training=self.training,
                                 seed=self.params["random_seed"])
        sem_seq_right = mlp_layer(att_seq_right, fc_type=self.params["fc_type"],
                                  hidden_units=self.params["fc_hidden_units"],
                                  dropouts=self.params["fc_dropouts"],
                                  scope_name=self.model_name + "sem_seq_%s" % granularity,
                                  reuse=True,
                                  training=self.training,
                                  seed=self.params["random_seed"])

        return emb_seq_left, enc_seq_left, att_seq_left, sem_seq_left, \
                emb_seq_right, enc_seq_right, att_seq_right, sem_seq_right