예제 #1
0
    def word_assoc_score(self, source_idx, target_idx, relation):
        """
        NOTE THAT DROPOUT IS BEING APPLIED HERE
        :param source_idx: embedding index of source atom
        :param target_idx: embedding index of target atom
        :param relation: relation type
        :return: score
        """
        # prepare
        s = self.embeddings[source_idx]
        if self.no_assoc:
            A = dy.const_parameter(self.word_assoc_weights[relation])
        else:
            A = dy.parameter(self.word_assoc_weights[relation])
        dy.dropout(A, self.dropout)
        t = self.embeddings[target_idx]

        # compute
        if self.mode == BILINEAR_MODE:
            return dy.transpose(s) * A * t
        elif self.mode == DIAG_RANK1_MODE:
            diag_A = dyagonalize(A[0])
            rank1_BC = A[1] * dy.transpose(A[2])
            ABC = diag_A + rank1_BC
            return dy.transpose(s) * ABC * t
        elif self.mode == TRANSLATIONAL_EMBED_MODE:
            return -dy.l2_norm(s - t + A)
        elif self.mode == DISTMULT:
            return dy.sum_elems(dy.cmult(dy.cmult(s, A), t))
예제 #2
0
 def word_assoc_score(self, source_idx, target_idx, relation):
     """
     NOTE THAT DROPOUT IS BEING APPLIED HERE
     :param source_idx: embedding index of source atom
     :param target_idx: embedding index of target atom
     :param relation: relation type
     :return: score
     """
     # prepare
     s = self.embeddings[source_idx]
     if self.no_assoc:
         A = dy.const_parameter(self.word_assoc_weights[relation])
     else:
         A = dy.parameter(self.word_assoc_weights[relation])
     dy.dropout(A, self.dropout)
     t = self.embeddings[target_idx]
     
     # compute
     if self.mode == BILINEAR_MODE:
         return dy.transpose(s) * A * t
     elif self.mode == DIAG_RANK1_MODE:
         diag_A = dyagonalize(A[0])
         rank1_BC = A[1] * dy.transpose(A[2])
         ABC = diag_A + rank1_BC
         return dy.transpose(s) * ABC * t
     elif self.mode == TRANSLATIONAL_EMBED_MODE:
         return -dy.l2_norm(s - t + A)
     elif self.mode == DISTMULT:
         return dy.sum_elems(dy.cmult(dy.cmult(s, A), t))
예제 #3
0
 def __call__(self, word, alternative=None, const=False):
     idx = self.vocab.get(word, 0)
     if idx == 0 and alternative is not None:
         for word_i in alternative:
             idx = self.vocab.get(word_i, 0)
             if idx != 0:
                 break
     return self.lookup[idx] if not const else dn.transpose(
         dn.const_parameter(self.lookup))[idx]
예제 #4
0
number_of_learning_cycles = 2

m = dy.Model()
input = dy.vecInput(sizes[0])
trainer = dy.SimpleSGDTrainer(m)
values = [0] * number_of_nodes
weight = [0] * maximum
bias = [0] * maximum
a = 0

for i in range(maximum):
    if (i == 0):
        a = matrices[i]
        weight[i] = m.add_parameters(a.shape, init=a)
        bias = m.add_parameters((layer_nodes[i + 1].__len__()))
        con = dy.const_parameter(m.add_parameters(a.shape, init=a))
        weight[i] = dy.cmult(weight[i], con)
        result = weight[i] * input
        result = dy.logistic(result + bias)
        for j in range(layer_nodes[i + 1].__len__()):
            values[layer_nodes[i + 1][j]] = result[j]
    else:
        inp = []
        for node in input_layer[i]:
            inp.extend([values[node]])
        a = matrices[i]
        weight[i] = m.add_parameters(a.shape, init=a)
        inp = dy.concatenate(inp)
        bias = m.add_parameters(layer_nodes[i + 1].__len__())
        con = dy.const_parameter(m.add_parameters(a.shape, init=a))
        weight[i] = dy.cmult(weight[i], con)
예제 #5
0
    def predict(self, features, task_name, train=False):
        """
        Steps through the computation graph and obtains predictions for the
        provided input features.
        :param features: a list of word  embeddings for every word in the sequence
        :param task_name: the name of the task that should be predicted
        :param train: if the model is training; apply noise in this case
        :return output: the output predictions
                penalty: the summed subspace penalty (0 if no constraint)
        """
        if train:  # noise is added only at training time

            features = [dynet.noise(fe, self.noise_sigma) for fe in features]

        # only if we use cross-stitch we have a layer for each task;
        # otherwise we just have one layer for all tasks
        num_layers = self.h_layers
        inputs = [features] * len(self.task_names)
        inputs_rev = [features] * len(self.task_names)

        target_task_id = self.task_names.index(
            task_name) if self.cross_stitch else 0

        # collect the forward and backward sequences for each task at every
        # layer for the layer connection units
        layer_forward_sequences = []
        layer_backward_sequences = []

        penalty = dynet.const_parameter(self.subspace_penalty)

        for i in range(0, num_layers):
            forward_sequences = []
            backward_sequences = []
            for j in range(num_task_layers):
                predictor = self.predictors['inner'][i][j]
                forward_sequence, backward_sequence = predictor.predict_sequence(
                    inputs[j], inputs_rev[j])
                if i > 0 and self.activation:
                    # activation between LSTM layers
                    forward_sequence = [
                        self.activation(s) for s in forward_sequence
                    ]
                    backward_sequence = [
                        self.activation(s) for s in backward_sequence
                    ]
                forward_sequences.append(forward_sequence)
                backward_sequences.append(backward_sequence)

                if self.num_subspaces == 2 and self.constraint_weight != 0:
                    # returns a list per layer, i.e. here a list with one item
                    lstm_parameters = \
                        predictor.builder.get_parameter_expressions()[0]

                    # lstm parameters consists of these weights:
                    # Wix,Wih,Wic,bi,Wox,Woh,Woc,bo,Wcx,Wch,bc
                    for param_idx in range(len(lstm_parameters)):
                        if param_idx in self.constrain_matrices:
                            W = lstm_parameters[param_idx]
                            W_shape = np.array(W.value()).shape

                            if (len(W_shape) < 2):
                                W_shape = [W_shape[0], 1]

                            # split matrix into its two subspaces
                            W_subspaces = dynet.reshape(
                                W, (self.num_subspaces, W_shape[0] /
                                    float(self.num_subspaces), W_shape[1]))
                            subspace_1, subspace_2 = W_subspaces[
                                0], W_subspaces[1]

                            # calculate the matrix product of the two matrices
                            matrix_product = dynet.transpose(
                                subspace_1) * subspace_2

                            # take the squared Frobenius norm by squaring
                            # every element and then summing them
                            squared_frobenius_norm = dynet.sum_elems(
                                dynet.square(matrix_product))
                            penalty += squared_frobenius_norm

            if self.cross_stitch:
                # takes as input a list of input lists and produces a list of
                # outputs where the index indicates the task
                forward_sequences = self.predictors['cross_stitch'][i].stitch(
                    forward_sequences)
                backward_sequences = self.predictors['cross_stitch'][i].stitch(
                    backward_sequences)

            inputs = forward_sequences
            inputs_rev = backward_sequences
            layer_forward_sequences.append(forward_sequences)
            layer_backward_sequences.append(backward_sequences)

            if i == num_layers - 1:
                output_predictor = \
                    self.predictors['output_layers_dict'][task_name]

                # get the forward/backward states of all task layers
                task_forward_sequences = [
                    layer_seq_list[target_task_id][-1]
                    for layer_seq_list in layer_forward_sequences
                ]

                task_backward_sequences = [
                    layer_seq_list[target_task_id][0]
                    for layer_seq_list in layer_backward_sequences
                ]

                if (num_layers > 1):
                    forward_input = \
                        self.predictors['layer_stitch'][
                            target_task_id].stitch(task_forward_sequences)
                    backward_input = \
                        self.predictors['layer_stitch'][
                            target_task_id].stitch(task_backward_sequences)

                else:
                    forward_input = task_forward_sequences[0]
                    backward_input = task_backward_sequences[0]

                concat_layer = dynet.concatenate(
                    [forward_input, backward_input])

                if train and self.noise_sigma > 0.0:
                    concat_layer = dynet.noise(concat_layer, self.noise_sigma)

                output = []

                if ('sentiment' in task_name):  #Multi-label

                    for i in range(len(output_predictor)):

                        output.append(output_predictor[i](concat_layer))

                else:
                    output.append(output_predictor(concat_layer))

                #output = output_predictor.predict_sequence(concat_layer)

                return output, penalty
        raise Exception('Error: This place should not be reached.')
예제 #6
0
    def fit(self, train_languages, num_epochs, patience, optimizer, train_dir,
            dev_dir):
        """
        Train the model, return the train and dev score
        :param train_language: the language used for training
        :param num_epochs: the max number of epochs the model should be trained
        :param patience: the patience to use for early stopping
        :param optimizer: the optimizer that should be used
        :param train_dir: the directory containing the training files
        :param dev_dir: the directory containing the development files

        """

        first_train = True if self.best_epoch == (
            -1) else False  #Check whether this is a loaded model

        print("Reading training data from %s..." % train_dir, flush=True)
        train_X, train_Y, word2id = get_data(train_languages,
                                             self.task_names,
                                             word2id=self.word2id,
                                             task2label2id=self.task2label2id,
                                             data_dir=train_dir,
                                             train=first_train)
        print("Finished reading training data")

        print("Reading development data from %s..." % train_dir, flush=True)
        dev_X, dev_Y, _ = get_data(train_languages,
                                   self.task_names,
                                   word2id,
                                   self.task2label2id,
                                   data_dir=dev_dir,
                                   train=False)
        print("Finished reading development data")

        print('Length of training data:', len(train_X), flush=True)
        print('Length of development data:', len(dev_X), flush=True)

        if (first_train):
            self.word2id = word2id

            print('Building the computation graph...', flush=True)
            self.predictors= \
                self.build_computation_graph()

        if optimizer == SGD:
            trainer = dynet.SimpleSGDTrainer(self.model)
        elif optimizer == ADAM:
            trainer = dynet.AdamTrainer(self.model)
        else:
            raise ValueError('%s is not a valid optimizer.' % optimizer)

        train_data = list(zip(train_X, train_Y))

        num_iterations = 0
        num_epochs_no_improvement = 0

        train_score = {}
        dev_score = {}

        print('Training model with %s for %d epochs and patience of %d.' %
              (optimizer, num_epochs, patience))

        for epoch in range(self.best_epoch + 1, num_epochs):

            print('', flush=True)

            bar = Bar('Training epoch %d/%d...' % (epoch + 1, num_epochs),
                      max=len(train_data),
                      flush=True)

            # keep track of the # of updates, total loss, and total # of
            # predicted instances per task
            task2num_updates = {task: 0 for task in self.task_names}
            task2total_loss = {task: 0.0 for task in self.task_names}
            task2total_predicted = {task: 0.0 for task in self.task_names}
            total_loss = 0.0
            total_penalty = 0.0
            total_predicted = 0.0

            random.shuffle(train_data)

            # for every instance, we optimize the loss of the corresponding task
            for word_indices, task2label_id_seq in train_data:
                # get the concatenated word and char-based features for every
                # word in the sequence
                features = self.get_word_features(word_indices)

                for task, y in task2label_id_seq.items():

                    output, penalty = self.predict(features, task, train=True)

                    if task not in TASK_NAMES:
                        raise NotImplementedError('Task %s has not been '
                                                  'implemented yet.' % task)


                    loss = dynet.esum([pick_neg_log(o, gold) for \
                        o, gold in zip(output, y)])

                    lv = loss.value()

                    # sum the loss and the subspace constraint penalty

                    combined_loss = loss + dynet.const_parameter(
                        self.constraint_weight_param) * penalty

                    total_loss += lv
                    total_penalty += penalty.value()
                    total_predicted += 1
                    task2total_loss[task] += lv
                    task2total_predicted[task] += 1
                    task2num_updates[task] += 1

                    # back-propagate through the combined loss
                    combined_loss.backward()
                    trainer.update()
                bar.next()
                num_iterations += 1

            print(
                "\nEpoch %d. Loss per instance: %.3f. Penalty per instance: %.3f. "
                % (epoch + 1, total_loss / total_predicted,
                   total_penalty / total_predicted),
                end='',
                flush=True)

            print('Loss per instance by task: ')

            for task in task2total_loss.keys():
                print(
                    '%s: %.3f. ' %
                    (task, task2total_loss[task] / task2total_predicted[task]),
                    end='',
                    flush=True)
            print('', flush=True)

            # evaluate after every epoch

            avg_train_score_by_task_list = [
            ]  #Each item stores the avg train score (by task) for a particular language
            avg_dev_score_by_task_list = [
            ]  #Each item stores the avg dev score (by task) for a particular language
            train_data_size_list = [
            ]  #Each item stores the size for a particular language train set
            dev_data_size_list = [
            ]  #Each item stores the size for a particular language dev set

            for lang in train_languages:

                train_eval_X, train_eval_Y, _ = utils.get_data(
                    [lang],
                    model.task_names,
                    model.word2id,
                    model.task2label2id,
                    data_dir=args.train_dir,
                    train=False)

                train_data_size_list += [len(train_eval_Y)]

                dev_eval_X, dev_eval_Y, _ = utils.get_data(
                    [lang],
                    model.task_names,
                    model.word2id,
                    model.task2label2id,
                    data_dir=args.dev_dir,
                    train=False)

                dev_data_size_list += [len(dev_eval_Y)]

                train_score = self.evaluate(train_eval_X, train_eval_Y, lang,
                                            args.threshold)

                dev_score = self.evaluate(dev_eval_X, dev_eval_Y, lang,
                                          args.threshold)

                avg_train_score_by_task_list.append(
                    utils.average_by_task(train_score))
                avg_dev_score_by_task_list.append(
                    utils.average_by_task(dev_score))

                print('=' * 50)
                print('\tStart logging for {} in epoch {}'.format(
                    test_lang, epoch + 1))

                utils.log_fit(self.log_dir, epoch + 1, train_languages,
                              test_lang, task_names, train_score, dev_score)

                print('\tFinish logging for {} in epoch {}'.format(
                    test_lang, epoch + 1))

            #Compute the weighted average over all languages and use it to determine the overall performance of training
            total_train_size = len(train_Y)
            total_dev_size = len(dev_Y)

            avg_train_score = util.average_by_lang(
                avg_train_score_by_task_list, train_data_size_list,
                total_train_size)

            avg_dev_score = util.average_by_lang(avg_dev_score_by_task_list,
                                                 dev_data_size_list,
                                                 total_dev_size)

            if avg_dev_score > self.avg_dev_score:

                self.avg_dev_score = avg_dev_score
                self.avg_train_score = avg_train_score

                self.best_train_dict = train_score
                self.best_dev_dict = dev_score

                self.best_epoch = epoch
                num_epochs_no_improvement = 0
                print('Saving model to directory %s...' % self.model_dir,
                      flush=True)
                self.save()
            else:

                num_epochs_no_improvement += 1

            if num_epochs_no_improvement == patience:
                #dynet.load(self.model_file, self.model)
                break

        print('Finished training', flush=True)
        print('Loading the best performing model from %s...'\
                      % self.model_dir, flush=True)

        self.model.populate(self.model_file)

        return self.best_train_dict, self.best_dev_dict, self.avg_train_score, self.avg_dev_score
예제 #7
0
def parameters(*params, **kargs):
    trainable = 'trainable' not in kargs or kargs['trainable']
    yield tuple(map(lambda x:dy.parameter(x) if trainable else dy.const_parameter(x), params))