Esempio n. 1
0
    def _interaction_semantic_feature_layer(self, seq_input_left, seq_input_right, seq_len_left, seq_len_right, granularity="word"):
        assert granularity in ["char", "word"]
        #### embed
        emb_matrix = self._get_embedding_matrix(granularity)
        emb_seq_left = tf.nn.embedding_lookup(emb_matrix, seq_input_left)
        emb_seq_right = tf.nn.embedding_lookup(emb_matrix, seq_input_right)

        #### dropout
        random_seed = np.random.randint(10000000)
        emb_seq_left = word_dropout(emb_seq_left,
                               training=self.training,
                               dropout=self.params["embedding_dropout"],
                               seed=random_seed)
        random_seed = np.random.randint(10000000)
        emb_seq_right = word_dropout(emb_seq_right,
                                    training=self.training,
                                    dropout=self.params["embedding_dropout"],
                                    seed=random_seed)

        #### encode
        enc_seq_left = encode(emb_seq_left, method=self.params["encode_method"], params=self.params,
                              sequence_length=seq_len_left,
                         mask_zero=self.params["embedding_mask_zero"],
                                   scope_name=self.model_name + "enc_seq_%s"%granularity, reuse=False)
        enc_seq_right = encode(emb_seq_right, method=self.params["encode_method"], params=self.params,
                              sequence_length=seq_len_right,
                              mask_zero=self.params["embedding_mask_zero"],
                              scope_name=self.model_name + "enc_seq_%s" % granularity, reuse=True)

        #### attend
        # [batchsize, s1, s2]
        att_mat = tf.einsum("abd,acd->abc", enc_seq_left, enc_seq_right)
        feature_dim = self.params["encode_dim"] + self.params["max_seq_len_%s"%granularity]
        att_seq_left = attend(enc_seq_left, context=att_mat, feature_dim=feature_dim,
                                   method=self.params["attend_method"],
                                   scope_name=self.model_name + "att_seq_%s"%granularity,
                                   reuse=False)
        att_seq_right = attend(enc_seq_right, context=tf.transpose(att_mat), feature_dim=feature_dim,
                              method=self.params["attend_method"],
                              scope_name=self.model_name + "att_seq_%s" % granularity,
                              reuse=True)

        #### MLP nonlinear projection
        sem_seq_left = self._mlp_layer(att_seq_left, fc_type=self.params["fc_type"],
                                  hidden_units=self.params["fc_hidden_units"],
                                  dropouts=self.params["fc_dropouts"],
                                  scope_name=self.model_name + "sem_seq_%s"%granularity,
                                  reuse=False)
        sem_seq_right = self._mlp_layer(att_seq_right, fc_type=self.params["fc_type"],
                                       hidden_units=self.params["fc_hidden_units"],
                                       dropouts=self.params["fc_dropouts"],
                                       scope_name=self.model_name + "sem_seq_%s" % granularity,
                                       reuse=True)

        return emb_seq_left, enc_seq_left, att_seq_left, sem_seq_left, \
                emb_seq_right, enc_seq_right, att_seq_right, sem_seq_right
Esempio n. 2
0
    def _semantic_feature_layer(self,
                                seq_input,
                                seq_len,
                                granularity="word",
                                reuse=False):
        assert granularity in ["char", "word"]
        #### embed
        emb_matrix = self._get_embedding_matrix(granularity)
        emb_seq = tf.nn.embedding_lookup(emb_matrix, seq_input)

        #### dropout
        random_seed = np.random.randint(10000000)
        emb_seq = word_dropout(emb_seq,
                               training=self.training,
                               dropout=self.params["embedding_dropout"],
                               seed=random_seed)

        #### encode
        input_dim = self.params["embedding_dim"]
        enc_seq = encode(emb_seq,
                         method=self.params["encode_method"],
                         input_dim=input_dim,
                         params=self.params,
                         sequence_length=seq_len,
                         mask_zero=self.params["embedding_mask_zero"],
                         scope_name=self.model_name +
                         "enc_seq_%s" % granularity,
                         reuse=reuse,
                         training=self.training)

        #### attend
        feature_dim = self.params["encode_dim"]
        context = None
        att_seq = attend(enc_seq,
                         context=context,
                         encode_dim=self.params["encode_dim"],
                         feature_dim=feature_dim,
                         attention_dim=self.params["attention_dim"],
                         method=self.params["attend_method"],
                         scope_name=self.model_name +
                         "att_seq_%s" % granularity,
                         reuse=reuse,
                         num_heads=self.params["attention_num_heads"])

        #### MLP nonlinear projection
        sem_seq = mlp_layer(att_seq,
                            fc_type=self.params["fc_type"],
                            hidden_units=self.params["fc_hidden_units"],
                            dropouts=self.params["fc_dropouts"],
                            scope_name=self.model_name +
                            "sem_seq_%s" % granularity,
                            reuse=reuse,
                            training=self.training,
                            seed=self.params["random_seed"])

        return emb_seq, enc_seq, att_seq, sem_seq
Esempio n. 3
0
    def _semantic_feature_layer(self,
                                seq_input,
                                granularity="word",
                                reuse=False,
                                return_enc=False):
        assert granularity in ["char", "word"]
        #### embed
        emb_matrix = self._get_embedding_matrix(granularity)
        emb_seq = tf.nn.embedding_lookup(emb_matrix, seq_input)

        #### dropout
        emb_seq = word_dropout(emb_seq,
                               training=self.training,
                               dropout=self.params["embedding_dropout"],
                               seed=self.params["random_seed"])

        #### encode
        enc_seq = encode(emb_seq,
                         method=self.params["encode_method"],
                         params=self.params,
                         scope_name=self.model_name +
                         "enc_seq_%s" % granularity,
                         reuse=reuse)

        #### attend
        feature_dim = self.params["encode_dim"]
        context = None
        att_seq = attend(enc_seq,
                         context=context,
                         feature_dim=feature_dim,
                         method=self.params["attend_method"],
                         scope_name=self.model_name +
                         "att_seq_%s" % granularity,
                         reuse=reuse)

        #### MLP nonlinear projection
        sem_seq = self._mlp_layer(att_seq,
                                  fc_type=self.params["fc_type"],
                                  hidden_units=self.params["fc_hidden_units"],
                                  dropouts=self.params["fc_dropouts"],
                                  scope_name=self.model_name +
                                  "sem_seq_%s" % granularity,
                                  reuse=reuse)

        if return_enc:
            return sem_seq, enc_seq
        else:
            return sem_seq
Esempio n. 4
0
    def _build_task_graph(self, task_name):

        #### tf vars
        self.task_labels[task_name] = tf.placeholder(tf.int32,
                                                     shape=[None],
                                                     name="task_labels")
        self.labels[task_name] = tf.placeholder(tf.int32,
                                                shape=[None],
                                                name="labels")
        self.seq_word[task_name] = tf.placeholder(tf.int32,
                                                  shape=[None, None],
                                                  name="seq_word")

        #### embedding
        emb_seq = tf.nn.embedding_lookup(self.emb_matrix,
                                         self.seq_word[task_name])
        emb_seq = word_dropout(emb_seq,
                               training=self.training,
                               dropout=self.params["embedding_dropout"],
                               seed=self.params["random_seed"])

        #### features
        shared_features = self._shared_feature_extractor(emb_seq, seq_len=None)
        private_features = self._private_feature_extractor(emb_seq,
                                                           seq_len=None,
                                                           task_name=task_name)

        feature = tf.concat([shared_features, private_features], axis=1)
        feature = tf.layers.Dropout(self.params["fc_dropout"])(feature,
                                                               self.training)

        #### task classifier
        # for mtl-dataset, label is 0/1 for all the tasks
        logits = tf.layers.dense(feature, 2)
        probas = tf.nn.softmax(logits)
        loss_task = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=self.labels[task_name], logits=logits)
        loss_task = tf.reduce_mean(loss_task)

        #### auxiliary losses
        loss_adv = self._adversarial_loss(shared_features,
                                          self.task_labels[task_name])
        loss_diff = self._difference_loss(shared_features, private_features)
        loss_domain = self._domain_loss(private_features,
                                        self.task_labels[task_name])

        #### overall loss
        loss = loss_task
        if "loss_adv_weight" in self.params and self.params[
                "loss_adv_weight"] > 0:
            loss += self.params["loss_adv_weight"] * loss_adv
        if "loss_diff_weight" in self.params and self.params[
                "loss_diff_weight"] > 0:
            loss += self.params["loss_diff_weight"] * loss_diff
        if "loss_domain_weight" in self.params and self.params[
                "loss_domain_weight"] > 0:
            loss += self.params["loss_domain_weight"] * loss_domain
        if "loss_l2_lambda" in self.params and self.params[
                "loss_l2_lambda"] > 0:
            l2_losses = tf.add_n([
                tf.nn.l2_loss(v) for v in tf.trainable_variables()
                if "bias" not in v.name
            ])
            loss += self.params["loss_l2_lambda"] * l2_losses

        #### accuracy
        preds = tf.cast(tf.argmax(logits, axis=1), tf.int32)
        acc = tf.cast(tf.equal(preds, self.labels[task_name]), tf.float32)
        acc = tf.reduce_mean(acc)

        return probas, loss, acc