Beispiel #1
0
    def get_features(self, words, train=False, update=True):
        """
        get feature representations
        """
        # word embeddings
        wfeatures = np.array([
            self.get_w_repr(word, train=train, update=update) for word in words
        ])

        lex_features = []
        if self.dictionary and not self.type_constraint:
            ## add lexicon features
            lex_features = np.array(
                [self.get_lex_repr(word) for word in words])
        # char embeddings
        if self.c_in_dim > 0:
            cfeatures = [self.get_c_repr(word, train=train) for word in words]
            if len(lex_features) > 0:
                lex_features = dynet.inputTensor(lex_features)
                features = [
                    dynet.concatenate([w, c, l])
                    for w, c, l in zip(wfeatures, cfeatures, lex_features)
                ]
            else:
                features = [
                    dynet.concatenate([w, c])
                    for w, c in zip(wfeatures, cfeatures)
                ]
        else:
            features = wfeatures
        if train:  # only do at training time
            features = [dynet.noise(fe, self.noise_sigma) for fe in features]
        return features
Beispiel #2
0
    def predict(self,
                feature_vector,
                task_ids,
                train=False,
                soft_labels=False,
                temperature=None,
                dropout_rate=0.0,
                orthogonality_weight=0.0,
                domain_id=None):
        dynet.renew_cg()  # new graph

        feature_vector = feature_vector.toarray()
        feature_vector = np.squeeze(feature_vector, axis=0)

        # self.input = dynet.vecInput(self.vocab_size)
        # self.input.set(feature_vector)
        # TODO this takes too long; can we speed this up somehow?
        input = dynet.inputVector(feature_vector)
        for i in range(self.h_layers):
            if train:  # add some noise
                input = dynet.noise(input, self.noise_sigma)
                input = dynet.dropout(input, dropout_rate)
            input = self.layers[i](input)
        outputs = []
        for task_id in task_ids:
            output = self.output_layers_dict[task_id](input,
                                                      soft_labels=soft_labels,
                                                      temperature=temperature)
            outputs.append(output)

        constraint, adv_loss = 0, 0
        if orthogonality_weight != 0:
            # put the orthogonality constraint either directly on the
            # output layer or on the hidden layer if it's an MLP
            F0_layer = self.output_layers_dict["F0"]
            F1_layer = self.output_layers_dict["F1"]
            F0_param = F0_layer.W_mlp if self.add_hidden else F0_layer.W
            F1_param = F1_layer.W_mlp if self.add_hidden else F1_layer.W
            F0_W = dynet.parameter(F0_param)
            F1_W = dynet.parameter(F1_param)

            # calculate the matrix product of the task matrix with both others
            matrix_product = dynet.transpose(F0_W) * F1_W

            # take the squared Frobenius norm by squaring
            # every element and then summing them
            squared_frobenius_norm = dynet.sum_elems(
                dynet.square(matrix_product))
            constraint += squared_frobenius_norm
            # print('Constraint with first matrix:', squared_frobenius_norm.value())

        if domain_id is not None:
            # flip the gradient when back-propagating through here
            adv_input = dynet.flip_gradient(input)  # last state
            adv_output = self.adv_layer(adv_input)
            adv_loss = self.pick_neg_log(adv_output, domain_id)
            # print('Adversarial loss:', avg_adv_loss.value())
        return outputs, constraint, adv_loss
Beispiel #3
0
    def predict(self, seq, train=False, output_confidences=False, unk_tag=None, update_embeds=True):
        """
        predict tags for a sentence represented as char+word embeddings and compute losses for this instance
        """
        if not train:
            dynet.renew_cg()
        features = self.get_features(seq.words, train=train, update=update_embeds)

        output_expected_at_layer = self.predictors["task_expected_at"][seq.task_id]
        output_expected_at_layer -=1

        # go through layers
        # input is now combination of w + char emb
        prev = features
        prev_rev = features
        num_layers = self.h_layers

        for i in range(0,num_layers):
            predictor = self.predictors["inner"][i]
            forward_sequence, backward_sequence = predictor.predict_sequence(prev, prev_rev)        
            if i > 0 and self.activation:
                # activation between LSTM layers
                forward_sequence = [self.activation(s) for s in forward_sequence]
                backward_sequence = [self.activation(s) for s in backward_sequence]

            if i == output_expected_at_layer:
                output_predictor = self.predictors["output_layers_dict"][seq.task_id]
                concat_layer = [dynet.concatenate([f, b]) for f, b in zip(forward_sequence,reversed(backward_sequence))]

                if train and self.noise_sigma > 0.0:
                    concat_layer = [dynet.noise(fe,self.noise_sigma) for fe in concat_layer]
                # fill-in predictions and get loss per tag
                losses = output_predictor.predict_sequence(seq, concat_layer,
                                                           train=train, output_confidences=output_confidences,
                                                           unk_tag=unk_tag, dictionary=self.dictionary,
                                                           type_constraint=self.type_constraint)

            prev = forward_sequence
            prev_rev = backward_sequence 

        if train:
            # return losses
            return losses
        else:
            return seq.pred_tags, seq.tag_confidences
Beispiel #4
0
    def predict(self, seq, train=False, output_confidences=False, unk_tag=None, update_embeds=True):
        """
        predict tags for a sentence represented as char+word embeddings and compute losses for this instance
        """
        if not train:
            dynet.renew_cg()
        features = self.get_features(seq.words, train=train, update=update_embeds)

        output_expected_at_layer = self.predictors["task_expected_at"][seq.task_id]
        output_expected_at_layer -=1

        # go through layers
        # input is now combination of w + char emb
        prev = features
        prev_rev = features
        num_layers = self.h_layers

        for i in range(0,num_layers):
            predictor = self.predictors["inner"][i]
            forward_sequence, backward_sequence = predictor.predict_sequence(prev, prev_rev)        
            if i > 0 and self.activation:
                # activation between LSTM layers
                forward_sequence = [self.activation(s) for s in forward_sequence]
                backward_sequence = [self.activation(s) for s in backward_sequence]

            if i == output_expected_at_layer:
                output_predictor = self.predictors["output_layers_dict"][seq.task_id]
                concat_layer = [dynet.concatenate([f, b]) for f, b in zip(forward_sequence,reversed(backward_sequence))]

                if train and self.noise_sigma > 0.0:
                    concat_layer = [dynet.noise(fe,self.noise_sigma) for fe in concat_layer]
                # fill-in predictions and get loss per tag
                losses = output_predictor.predict_sequence(seq, concat_layer,
                                                           train=train, output_confidences=output_confidences,
                                                           unk_tag=unk_tag, dictionary=self.dictionary,
                                                           type_constraint=self.type_constraint)

            prev = forward_sequence
            prev_rev = backward_sequence 

        if train:
            # return losses
            return losses
        else:
            return seq.pred_tags, seq.tag_confidences
Beispiel #5
0
    def get_features(self, words, train=False, update=True):
        """
        get feature representations
        """
        # word embeddings
        wfeatures = np.array([self.get_w_repr(word, train=train, update=update) for word in words])

        lex_features = []
        if self.dictionary and not self.type_constraint:
            ## add lexicon features
            lex_features = np.array([self.get_lex_repr(word) for word in words])
        # char embeddings
        if self.c_in_dim > 0:
            cfeatures = [self.get_c_repr(word, train=train) for word in words]
            if len(lex_features) > 0:
                lex_features = dynet.inputTensor(lex_features)
                features = [dynet.concatenate([w,c,l]) for w,c,l in zip(wfeatures,cfeatures,lex_features)]
            else:
                features = [dynet.concatenate([w, c]) for w, c in zip(wfeatures, cfeatures)]
        else:
            features = wfeatures
        if train: # only do at training time
            features = [dynet.noise(fe,self.noise_sigma) for fe in features]
        return features
Beispiel #6
0
    def predict(self,
                feature_vector,
                train=False,
                soft_labels=False,
                temperature=None,
                dropout_rate=None):
        dynet.renew_cg()  # new graph

        feature_vector = feature_vector.toarray()
        feature_vector = np.squeeze(feature_vector, axis=0)

        # self.input = dynet.vecInput(self.vocab_size)
        # self.input.set(feature_vector)
        # TODO this takes too long; can we speed this up somehow?
        input = dynet.inputVector(feature_vector)
        for i in range(self.h_layers - 1):
            if train:  # add some noise
                input = dynet.noise(input, self.noise_sigma)
                input = dynet.dropout(input, dropout_rate)
            input = self.layers[i](input)
        output = self.layers[-1](input,
                                 soft_labels=soft_labels,
                                 temperature=temperature)
        return output
    def predict(self,
                word_indices,
                char_indices,
                train=False,
                soft_labels=False,
                temperature=None):
        """
        predict tags for a sentence represented as char+word embeddings
        """
        dynet.renew_cg()  # new graph

        char_emb = []
        rev_char_emb = []

        wfeatures = [self.wembeds[w] for w in word_indices]

        if self.c_in_dim > 0:
            # get representation for words
            for chars_of_token in char_indices:
                char_feats = [self.cembeds[c] for c in chars_of_token]
                # use last state as word representation
                f_char, b_char = self.char_rnn.predict_sequence(
                    char_feats, char_feats)
                last_state = f_char[-1]
                rev_last_state = b_char[-1]
                char_emb.append(last_state)
                rev_char_emb.append(rev_last_state)

            features = [
                dynet.concatenate([w, c, rev_c])
                for w, c, rev_c in zip(wfeatures, char_emb, rev_char_emb)
            ]
        else:
            features = wfeatures

        if train:  # only do at training time
            features = [dynet.noise(fe, self.noise_sigma) for fe in features]

        output_expected_at_layer = self.h_layers
        output_expected_at_layer -= 1

        # go through layers
        prev = features
        prev_rev = features
        num_layers = self.h_layers
        for i in range(0, num_layers):
            predictor = self.predictors["inner"][i]
            forward_sequence, backward_sequence = predictor.predict_sequence(
                prev, prev_rev)
            if i > 0 and self.activation:
                # activation between LSTM layers
                forward_sequence = [
                    self.activation(s) for s in forward_sequence
                ]
                backward_sequence = [
                    self.activation(s) for s in backward_sequence
                ]

            if i == output_expected_at_layer:
                output_predictor = self.predictors["output_layers_dict"]
                concat_layer = [
                    dynet.concatenate([f, b]) for f, b in zip(
                        forward_sequence, reversed(backward_sequence))
                ]
                if train and self.noise_sigma > 0.0:
                    concat_layer = [
                        dynet.noise(fe, self.noise_sigma)
                        for fe in concat_layer
                    ]
                output = output_predictor.predict_sequence(
                    concat_layer,
                    soft_labels=soft_labels,
                    temperature=temperature)
                return output

            prev = forward_sequence
            prev_rev = backward_sequence

        raise Exception("oops should not be here")
        return None
Beispiel #8
0
    def predict(self,
                word_indices,
                char_indices,
                task_id,
                train=False,
                soft_labels=False,
                temperature=None,
                orthogonality_weight=0.0,
                domain_id=None):
        """
        predict tags for a sentence represented as char+word embeddings
        :param domain_id: Predict adversarial loss if domain id is provided.
        """
        dynet.renew_cg()  # new graph

        char_emb = []
        rev_char_emb = []

        wfeatures = [self.wembeds[w] for w in word_indices]

        if self.c_in_dim > 0:
            # get representation for words
            for chars_of_token in char_indices:
                char_feats = [self.cembeds[c] for c in chars_of_token]
                # use last state as word representation
                f_char, b_char = self.char_rnn.predict_sequence(
                    char_feats, char_feats)
                last_state = f_char[-1]
                rev_last_state = b_char[-1]
                char_emb.append(last_state)
                rev_char_emb.append(rev_last_state)

            features = [
                dynet.concatenate([w, c, rev_c])
                for w, c, rev_c in zip(wfeatures, char_emb, rev_char_emb)
            ]
        else:
            features = wfeatures

        if train:  # only do at training time
            features = [dynet.noise(fe, self.noise_sigma) for fe in features]

        output_expected_at_layer = self.h_layers
        output_expected_at_layer -= 1

        # go through layers
        prev = features
        prev_rev = features
        num_layers = self.h_layers
        constraint = 0
        adv_loss = 0
        for i in range(0, num_layers):
            predictor = self.predictors["inner"][i]
            forward_sequence, backward_sequence = predictor.predict_sequence(
                prev, prev_rev)
            if i > 0 and self.activation:
                # activation between LSTM layers
                forward_sequence = [
                    self.activation(s) for s in forward_sequence
                ]
                backward_sequence = [
                    self.activation(s) for s in backward_sequence
                ]

            if i == output_expected_at_layer:

                concat_layer = [
                    dynet.concatenate([f, b]) for f, b in zip(
                        forward_sequence, reversed(backward_sequence))
                ]
                if train and self.noise_sigma > 0.0:
                    concat_layer = [
                        dynet.noise(fe, self.noise_sigma)
                        for fe in concat_layer
                    ]

                if task_id not in ["src", "trg"]:
                    output_predictor = self.predictors["output_layers_dict"][
                        task_id]
                    output = output_predictor.predict_sequence(
                        concat_layer,
                        soft_labels=soft_labels,
                        temperature=temperature)
                else:
                    # one src example for all three outputs
                    output = []  # in this case it is a list
                    for t_id in self.task_ids:
                        output_predictor = self.predictors[
                            "output_layers_dict"][t_id]
                        output_t = output_predictor.predict_sequence(
                            concat_layer,
                            soft_labels=soft_labels,
                            temperature=temperature)
                        output.append(output_t)

                if orthogonality_weight != 0 and task_id != "Ft":
                    # put the orthogonality constraint either directly on the
                    # output layer or on the hidden layer if it's an MLP
                    # use orthogonality_weight only between F0 and F1
                    builder = self.predictors["output_layers_dict"][
                        "F0"].network_builder
                    task_param = builder.W_mlp if self.add_hidden else builder.W
                    task_W = dynet.parameter(task_param)

                    builder = self.predictors["output_layers_dict"][
                        "F1"].network_builder
                    other_param = builder.W_mlp if self.add_hidden else builder.W
                    other_task_W = dynet.parameter(other_param)

                    # calculate the matrix product of the task matrix with the other
                    matrix_product_1 = dynet.transpose(task_W) * other_task_W

                    # take the squared Frobenius norm by squaring
                    # every element and then summing them
                    squared_frobenius_norm = dynet.sum_elems(
                        dynet.square(matrix_product_1))
                    constraint = squared_frobenius_norm

                    #print('Constraint with first matrix:', squared_frobenius_norm.value())

                if domain_id is not None:
                    # flip the gradient when back-propagating through here
                    adv_input = dynet.flip_gradient(
                        concat_layer[-1])  # last state
                    adv_output = self.adv_layer(adv_input)
                    adv_loss = self.pick_neg_log(adv_output, domain_id)
                    #print('Adversarial loss:', avg_adv_loss.value())

                # output is list if task_id = 'src'
                return output, constraint, adv_loss

            prev = forward_sequence
            prev_rev = backward_sequence

        raise Exception("oops should not be here")
        return None