Exemple #1
0
 def pick_neg_log(self, pred, gold):
     # TODO make this a static function in both classes
     if not isinstance(gold, int) and not isinstance(gold, np.int64):
         # calculate cross-entropy loss against the whole vector
         dy_gold = dynet.inputVector(gold)
         return -dynet.sum_elems(dynet.cmult(dy_gold, dynet.log(pred)))
     return -dynet.log(dynet.pick(pred, gold))
Exemple #2
0
    def predict(self,
                feature_vector,
                task_ids,
                train=False,
                soft_labels=False,
                temperature=None,
                dropout_rate=0.0,
                orthogonality_weight=0.0,
                domain_id=None):
        dynet.renew_cg()  # new graph

        feature_vector = feature_vector.toarray()
        feature_vector = np.squeeze(feature_vector, axis=0)

        # self.input = dynet.vecInput(self.vocab_size)
        # self.input.set(feature_vector)
        # TODO this takes too long; can we speed this up somehow?
        input = dynet.inputVector(feature_vector)
        for i in range(self.h_layers):
            if train:  # add some noise
                input = dynet.noise(input, self.noise_sigma)
                input = dynet.dropout(input, dropout_rate)
            input = self.layers[i](input)
        outputs = []
        for task_id in task_ids:
            output = self.output_layers_dict[task_id](input,
                                                      soft_labels=soft_labels,
                                                      temperature=temperature)
            outputs.append(output)

        constraint, adv_loss = 0, 0
        if orthogonality_weight != 0:
            # put the orthogonality constraint either directly on the
            # output layer or on the hidden layer if it's an MLP
            F0_layer = self.output_layers_dict["F0"]
            F1_layer = self.output_layers_dict["F1"]
            F0_param = F0_layer.W_mlp if self.add_hidden else F0_layer.W
            F1_param = F1_layer.W_mlp if self.add_hidden else F1_layer.W
            F0_W = dynet.parameter(F0_param)
            F1_W = dynet.parameter(F1_param)

            # calculate the matrix product of the task matrix with both others
            matrix_product = dynet.transpose(F0_W) * F1_W

            # take the squared Frobenius norm by squaring
            # every element and then summing them
            squared_frobenius_norm = dynet.sum_elems(
                dynet.square(matrix_product))
            constraint += squared_frobenius_norm
            # print('Constraint with first matrix:', squared_frobenius_norm.value())

        if domain_id is not None:
            # flip the gradient when back-propagating through here
            adv_input = dynet.flip_gradient(input)  # last state
            adv_output = self.adv_layer(adv_input)
            adv_loss = self.pick_neg_log(adv_output, domain_id)
            # print('Adversarial loss:', avg_adv_loss.value())
        return outputs, constraint, adv_loss
 def get_coverage(self, a_t, prev_coverage, training=True):
     if not self.coverage:
         if not training:
             return None
         return dy.scalarInput(0), None
     coverage = a_t + prev_coverage
     if training:
         return (
             dy.sum_elems(dy.min_dim(dy.concatenate([a_t, coverage], d=1), d=1)),
             coverage,
         )
     return coverage
Exemple #4
0
    def __call__(self, x, soft_labels=False, temperature=None, train=False):
        if self.mlp:
            act = self.mlp_activation
            x_in = act(self.W_mlp * x + self.b_mlp)
        else:
            x_in = x

        logits = self.W*x_in + self.b
        if soft_labels and temperature:
            # calculate the soft labels smoothed with the temperature
            # see Distilling the Knowledge in a Neural Network
            elems = dynet.exp(logits / temperature)
            return dynet.cdiv(elems, dynet.sum_elems(elems))
        if self.act:
            return self.act(logits)
        return logits
Exemple #5
0
    def get_loss_and_prediction(self, input, targets, epsilon=1e-10):
        layers = self.compute_output_layer(input)
        #dim = layers[-1].dim()[0][0]
        #ts = np.ones(dim)
        #for t in targets:
        #ts[t] = 0

        #e = dy.inputTensor(ts)
        #me = - e
        #last = dy.cmult(layers[-1], me) + e

        ys = dy.vecInput(self.dim_out)
        ys.set([1 if i in targets else 0 for i in range(self.dim_out)])
        loss = dy.binary_log_loss(layers[-1], ys)

        output = layers[-1].value()
        res = {i for i, v in enumerate(output) if v > 0.5}

        return dy.sum_elems(loss), res
Exemple #6
0
    def __call__(self, x, soft_labels=False, temperature=None):
        if self.mlp:
            W_mlp = dynet.parameter(self.W_mlp)
            b_mlp = dynet.parameter(self.b_mlp)
            act = self.mlp_activation
            x_in = act(W_mlp * x + b_mlp)
        else:
            x_in = x
        # from params to expressions
        W = dynet.parameter(self.W)
        b = dynet.parameter(self.b)

        logits = (W * x_in + b) + dynet.scalarInput(1e-15)
        if soft_labels and temperature:
            # calculate the soft labels smoothed with the temperature
            # see Distilling the Knowledge in a Neural Network
            elems = dynet.exp(logits / temperature)
            return dynet.cdiv(elems, dynet.sum_elems(elems))
        return self.act(logits)
Exemple #7
0
    def get_loss(self, input, targets, epsilon=1e-10):
        layers = self.compute_output_layer(input)

        #ts = np.ones(dim)
        #for t in targets:
        #ts[t] = 0

        #print()
        #print("output", layers[-1].value())
        #print(targets)
        #e = dy.inputTensor(ts)
        #me = - e
        #last = dy.cmult(layers[-1], me) + e

        #print("gradient", last.value())
        #log_loss = dy.log(last + epsilon)
        #print(log_loss.value())
        ys = dy.vecInput(self.dim_out)
        ys.set([1 if i in targets else 0 for i in range(self.dim_out)])
        loss = dy.binary_log_loss(layers[-1], ys)
        return dy.sum_elems(loss)
Exemple #8
0
    def __call__(self, x, soft_labels=False, temperature=None, train=False):
        if self.mlp:
            W_mlp = dynet.parameter(self.W_mlp)
            b_mlp = dynet.parameter(self.b_mlp)
            act = self.mlp_activation
            x_in = act(W_mlp * x + b_mlp)
        else:
            x_in = x
        # from params to expressions
        W = dynet.parameter(self.W)
        b = dynet.parameter(self.b)

        logits = W*x_in + b
        if soft_labels and temperature:
            # calculate the soft labels smoothed with the temperature
            # see Distilling the Knowledge in a Neural Network
            elems = dynet.exp(logits / temperature)
            return dynet.cdiv(elems, dynet.sum_elems(elems))
        if self.act:
            return self.act(logits)
        return logits
 def pick_neg_log(self, pred, gold):
     if hasattr(gold, "__len__"):
         # calculate cross-entropy loss against the whole vector
         dy_gold = dynet.inputVector(gold)
         return -dynet.sum_elems(dynet.cmult(dy_gold, dynet.log(pred)))
     return -dynet.log(dynet.pick(pred, gold))
Exemple #10
0
    def predict(self,
                word_indices,
                char_indices,
                task_id,
                train=False,
                soft_labels=False,
                temperature=None,
                orthogonality_weight=0.0,
                domain_id=None):
        """
        predict tags for a sentence represented as char+word embeddings
        :param domain_id: Predict adversarial loss if domain id is provided.
        """
        dynet.renew_cg()  # new graph

        char_emb = []
        rev_char_emb = []

        wfeatures = [self.wembeds[w] for w in word_indices]

        if self.c_in_dim > 0:
            # get representation for words
            for chars_of_token in char_indices:
                char_feats = [self.cembeds[c] for c in chars_of_token]
                # use last state as word representation
                f_char, b_char = self.char_rnn.predict_sequence(
                    char_feats, char_feats)
                last_state = f_char[-1]
                rev_last_state = b_char[-1]
                char_emb.append(last_state)
                rev_char_emb.append(rev_last_state)

            features = [
                dynet.concatenate([w, c, rev_c])
                for w, c, rev_c in zip(wfeatures, char_emb, rev_char_emb)
            ]
        else:
            features = wfeatures

        if train:  # only do at training time
            features = [dynet.noise(fe, self.noise_sigma) for fe in features]

        output_expected_at_layer = self.h_layers
        output_expected_at_layer -= 1

        # go through layers
        prev = features
        prev_rev = features
        num_layers = self.h_layers
        constraint = 0
        adv_loss = 0
        for i in range(0, num_layers):
            predictor = self.predictors["inner"][i]
            forward_sequence, backward_sequence = predictor.predict_sequence(
                prev, prev_rev)
            if i > 0 and self.activation:
                # activation between LSTM layers
                forward_sequence = [
                    self.activation(s) for s in forward_sequence
                ]
                backward_sequence = [
                    self.activation(s) for s in backward_sequence
                ]

            if i == output_expected_at_layer:

                concat_layer = [
                    dynet.concatenate([f, b]) for f, b in zip(
                        forward_sequence, reversed(backward_sequence))
                ]
                if train and self.noise_sigma > 0.0:
                    concat_layer = [
                        dynet.noise(fe, self.noise_sigma)
                        for fe in concat_layer
                    ]

                if task_id not in ["src", "trg"]:
                    output_predictor = self.predictors["output_layers_dict"][
                        task_id]
                    output = output_predictor.predict_sequence(
                        concat_layer,
                        soft_labels=soft_labels,
                        temperature=temperature)
                else:
                    # one src example for all three outputs
                    output = []  # in this case it is a list
                    for t_id in self.task_ids:
                        output_predictor = self.predictors[
                            "output_layers_dict"][t_id]
                        output_t = output_predictor.predict_sequence(
                            concat_layer,
                            soft_labels=soft_labels,
                            temperature=temperature)
                        output.append(output_t)

                if orthogonality_weight != 0 and task_id != "Ft":
                    # put the orthogonality constraint either directly on the
                    # output layer or on the hidden layer if it's an MLP
                    # use orthogonality_weight only between F0 and F1
                    builder = self.predictors["output_layers_dict"][
                        "F0"].network_builder
                    task_param = builder.W_mlp if self.add_hidden else builder.W
                    task_W = dynet.parameter(task_param)

                    builder = self.predictors["output_layers_dict"][
                        "F1"].network_builder
                    other_param = builder.W_mlp if self.add_hidden else builder.W
                    other_task_W = dynet.parameter(other_param)

                    # calculate the matrix product of the task matrix with the other
                    matrix_product_1 = dynet.transpose(task_W) * other_task_W

                    # take the squared Frobenius norm by squaring
                    # every element and then summing them
                    squared_frobenius_norm = dynet.sum_elems(
                        dynet.square(matrix_product_1))
                    constraint = squared_frobenius_norm

                    #print('Constraint with first matrix:', squared_frobenius_norm.value())

                if domain_id is not None:
                    # flip the gradient when back-propagating through here
                    adv_input = dynet.flip_gradient(
                        concat_layer[-1])  # last state
                    adv_output = self.adv_layer(adv_input)
                    adv_loss = self.pick_neg_log(adv_output, domain_id)
                    #print('Adversarial loss:', avg_adv_loss.value())

                # output is list if task_id = 'src'
                return output, constraint, adv_loss

            prev = forward_sequence
            prev_rev = backward_sequence

        raise Exception("oops should not be here")
        return None
Exemple #11
0
 def pick_neg_log(self, pred, gold):
     if not isinstance(gold, int):
         # calculate cross-entropy loss against the whole vector
         dy_gold = dynet.inputVector(gold)
         return -dynet.sum_elems(dynet.cmult(dy_gold, dynet.log(pred)))
     return -dynet.log(dynet.pick(pred, gold))