Esempio n. 1
0
    def test_parameter_updated_with_training(self):
        matrix_layer_before_training = self.access_named_parameter(self.model, "_matrix_layer.weight")
        sp_matrix_1_before_training = self.access_named_parameter(self.model, "_specific_matrix_1.weight")
        sp_matrix_2_before_training = self.access_named_parameter(self.model, "_specific_matrix_2.weight")
        for epoch in range(0,5):
            self.optimizer.zero_grad()
            composed, rep_1, rep_2 = self.model(self.input_1)
            loss_1 = loss_functions.get_loss_cosine_distance(original_phrase=self.input_1["l"], composed_phrase=rep_1,
                                                             dim=1, normalize=False)
            composed, rep_1, rep_2 = self.model(self.input_2)
            loss_2 = loss_functions.get_loss_cosine_distance(original_phrase=self.input_1["l"], composed_phrase=rep_2,
                                                             dim=1, normalize=False)
            loss = loss_1 + loss_2
            loss.backward()
            self.optimizer.step()
            matrix_layer_after_training  = self.access_named_parameter(self.model, "_matrix_layer.weight")
            sp_matrix_1_after_training = self.access_named_parameter(self.model, "_specific_matrix_1.weight")
            sp_matrix_2_after_training = self.access_named_parameter(self.model, "_specific_matrix_2.weight")
        difference_matrix_layer = torch.sum(
            matrix_layer_before_training - matrix_layer_after_training).item()
        difference_sp_matrix_1 = torch.sum(
            sp_matrix_1_before_training - sp_matrix_1_after_training).item()
        difference_sp_matrix_2 = torch.sum(
            sp_matrix_2_before_training - sp_matrix_2_after_training).item()

        np.testing.assert_equal(difference_matrix_layer != 0.0, True)
        np.testing.assert_equal(difference_sp_matrix_1 != 0.0, True)
        np.testing.assert_equal(difference_sp_matrix_2 != 0.0, True)
    def test_model_loss(self):
        self.optimizer.zero_grad()
        composed, rep_1, rep_2 = self.model(self.input_1)
        loss_1 = loss_functions.get_loss_cosine_distance(original_phrase=self.input_1["l"], composed_phrase=rep_1,
                                                         dim=1, normalize=False).item()
        composed, rep_1, rep_2 = self.model(self.input_2)
        loss_2 = loss_functions.get_loss_cosine_distance(original_phrase=self.input_1["l"], composed_phrase=rep_2,
                                                         dim=1, normalize=False).item()
        np.testing.assert_equal(math.isnan(loss_1), False)
        np.testing.assert_equal(math.isnan(loss_2), False)

        np.testing.assert_equal(loss_1 >= 0, True)
        np.testing.assert_equal(loss_2 >= 0, True)
Esempio n. 3
0
    def test_parameter_get_updated(self):
        """Test whether initial weight matrices are being updated during training. These parameters should be different
        after training vs before training."""
        tw_tensor_before_training = self.acess_named_parameter(
            self.model, "_transformation_tensor")
        combining_tensor_1_before_training = self.acess_named_parameter(
            self.model, "_combining_tensor_1")
        combining_tensor_2_before_training = self.acess_named_parameter(
            self.model, "_combining_tensor_2")

        for epoch in range(0, 10):
            self.optimizer.zero_grad()
            composed, rep_1, rep_2 = self.model(self.input_1)
            loss_1 = loss_functions.get_loss_cosine_distance(
                original_phrase=self.input_1["l"],
                composed_phrase=rep_1,
                dim=1,
                normalize=False)
            composed, rep_1, rep_2 = self.model(self.input_2)
            loss_2 = loss_functions.get_loss_cosine_distance(
                original_phrase=self.input_1["l"],
                composed_phrase=rep_2,
                dim=1,
                normalize=False)
            loss = loss_1 + loss_2
            loss.backward()
            self.optimizer.step()
            tw_tensor_after_training = self.acess_named_parameter(
                self.model, "_transformation_tensor")
            combining_tensor_1_after_training = self.acess_named_parameter(
                self.model, "_combining_tensor_1")
            combining_tensor_2_after_training = self.acess_named_parameter(
                self.model, "_combining_tensor_2")
        difference_combining_tensor_1 = torch.sum(
            combining_tensor_1_before_training -
            combining_tensor_1_after_training).item()
        difference_combining_tensor_2 = torch.sum(
            combining_tensor_2_before_training -
            combining_tensor_2_after_training).item()
        differemce_combining_tensors = torch.sum(
            combining_tensor_1_after_training -
            combining_tensor_2_after_training).item()
        difference_tw_tensor = torch.sum(tw_tensor_before_training -
                                         tw_tensor_after_training).item()
        np.testing.assert_equal(difference_combining_tensor_1 != 0.0, True)
        np.testing.assert_equal(difference_combining_tensor_2 != 0.0, True)
        np.testing.assert_equal(differemce_combining_tensors != 0.0, True)
        np.testing.assert_equal(difference_tw_tensor != 0.0, True)
def predict(test_loader, model, device):
    """
    predicts labels on unseen data (test set)
    :param test_loader: dataloader torch object with test- or validation data
    :param model: trained model
    :param device: the device
    :return: predictions and losses for the learned attribute and the final composed representation, the original
    phrases
    """
    test_loss_att = []
    test_loss_final = []
    test_loss_reconstructed = []
    predictions_final_rep = []
    predictions_attribute_rep = []
    predictions_reconstructed_rep = []
    orig_phrases = []
    model.to(device)
    for batch in test_loader:
        batch["device"] = device
        composed, rep1, rep2 = model(batch)
        composed = composed.squeeze().to("cpu")
        rep2 = rep2.squeeze().to("cpu")
        rep1 = rep1.squeeze().to("cpu")
        for pred in rep2:
            predictions_attribute_rep.append(pred.detach().numpy())
        for pred in composed:
            predictions_final_rep.append(pred.detach().numpy())
        for pred in rep1:
            predictions_reconstructed_rep.append(pred.detach().numpy())
        loss_att = get_loss_cosine_distance(composed_phrase=rep2,
                                            original_phrase=batch["l"])
        loss_reconstructed = get_loss_cosine_distance(
            composed_phrase=rep1, original_phrase=batch["l"])
        loss_final = get_loss_cosine_distance(composed_phrase=composed,
                                              original_phrase=batch["l"])
        test_loss_att.append(loss_att.item())
        test_loss_reconstructed.append(loss_reconstructed.item())
        test_loss_final.append(loss_final.item())
        test_loss_final.append(loss_final.item())
        orig_phrases.append(batch["label"])
    orig_phrases = [item for sublist in orig_phrases for item in sublist]
    predictions_final_rep = np.array(predictions_final_rep)
    predictions_attribute_rep = np.array(predictions_attribute_rep)
    predictions_reconstructed_rep = np.array(predictions_reconstructed_rep)
    return predictions_final_rep, predictions_attribute_rep, predictions_reconstructed_rep, np.average(
        test_loss_final), np.average(test_loss_att), np.average(
            test_loss_reconstructed), orig_phrases
Esempio n. 5
0
    def test_model_loss(self):
        self.optimizer.zero_grad()
        composed = self.model(self.input)
        loss = loss_functions.get_loss_cosine_distance(original_phrase=self.input["l"], composed_phrase=composed,
                                                       dim=1, normalize=False).item()
        np.testing.assert_equal(math.isnan(loss), False)

        np.testing.assert_equal(loss >= 0, True)
    def test_parameter_updated_with_training(self):
        adj_1_before_training = self.access_named_parameter(self.model, "_adj_matrix_1")
        adj_2_before_training = self.access_named_parameter(self.model, "_adj_matrix_2")
        noun_1_before_training = self.access_named_parameter(self.model, "_noun_matrix_1")
        noun_2_before_training = self.access_named_parameter(self.model, "_noun_matrix_2")

        general_adj_weights_before_training = self.access_named_parameter(self.model, "_general_adj_matrix")
        general_noun_weights_before_training = self.access_named_parameter(self.model, "_general_noun_matrix")

        for epoch in range(0,5):
            self.optimizer.zero_grad()
            composed, rep_1, rep_2 = self.model(self.input_1)
            loss_1 = loss_functions.get_loss_cosine_distance(original_phrase=self.input_1["l"], composed_phrase=rep_1,
                                                             dim=1, normalize=False)
            composed, rep_1, rep_2 = self.model(self.input_2)
            loss_2 = loss_functions.get_loss_cosine_distance(original_phrase=self.input_1["l"], composed_phrase=rep_2,
                                                             dim=1, normalize=False)
            loss = loss_1 + loss_2
            loss.backward()
            self.optimizer.step()
            adj_1_after_training = self.access_named_parameter(self.model, "_adj_matrix_1")
            adj_2_after_training = self.access_named_parameter(self.model, "_adj_matrix_2")
            noun_1_after_training = self.access_named_parameter(self.model, "_noun_matrix_1")
            noun_2_after_training = self.access_named_parameter(self.model, "_noun_matrix_2")
            general_adj_weights_after_training = self.access_named_parameter(self.model, "_general_adj_matrix")
            general_noun_weights_after_training = self.access_named_parameter(self.model,
                                                                               "_general_noun_matrix")
            
        difference_adj1_layer = torch.sum(
            adj_1_before_training - adj_1_after_training ).item()
        difference_adj2_layer = torch.sum(
            adj_2_before_training - adj_2_after_training).item()
        difference_noun1_layer = torch.sum(
            noun_1_before_training- noun_1_after_training).item()
        difference_noun2_layer = torch.sum(noun_2_before_training - noun_2_after_training).item()

        difference_general_adj = torch.sum(general_adj_weights_before_training - general_adj_weights_after_training).item()
        difference_general_noun = torch.sum(general_noun_weights_before_training - general_noun_weights_after_training).item()

        np.testing.assert_equal(difference_adj1_layer != 0.0, True)
        np.testing.assert_equal(difference_adj2_layer != 0.0, True)
        np.testing.assert_equal(difference_noun1_layer != 0.0, True)
        np.testing.assert_equal(difference_noun2_layer != 0.0, True)
        np.testing.assert_equal(difference_general_adj != 0.0, True)
        np.testing.assert_equal(difference_general_noun != 0.0, True)
Esempio n. 7
0
 def train_matrix_pretrain(self):
     optimizer = optim.Adam(self.model_pretrain.parameters())
     for batch in self.pretrain_loader:
         batch["device"] = "cpu"
         out = self.model_pretrain(batch).squeeze().to("cpu")
         loss = get_loss_cosine_distance(composed_phrase=out,
                                         original_phrase=batch["l"])
         loss.backward()
         optimizer.step()
     torch.save(self.model_pretrain.state_dict(), "models/matrix_pretrain")
Esempio n. 8
0
 def test_matrix_transfer_ranking(self):
     """Test whether the transfer matrix ranking model can be called and whether the loss can be computed"""
     batch = next(iter(self.pretrain_loader))
     batch["device"] = "cpu"
     out = self.model_transfer_rank(batch).squeeze().to("cpu")
     loss = get_loss_cosine_distance(composed_phrase=out,
                                     original_phrase=batch["l"]).item()
     np.testing.assert_equal(out.shape, [4, 300])
     np.testing.assert_equal(math.isnan(loss), False)
     np.testing.assert_equal(loss >= 0, True)
Esempio n. 9
0
 def test_matrix_pretrain(self):
     """Test whether matrix pretraining model can be used to compute the loss and whether the composed
     representation has the correct shape"""
     batch = next(iter(self.pretrain_loader))
     batch["device"] = "cpu"
     out = self.model_pretrain(batch).squeeze().to("cpu")
     loss = get_loss_cosine_distance(composed_phrase=out,
                                     original_phrase=batch["l"]).item()
     np.testing.assert_equal(out.shape, [4, 300])
     np.testing.assert_equal(math.isnan(loss), False)
     np.testing.assert_equal(loss >= 0, True)
 def test_cosine_distance(self):
     """Test whether the cosine distance is 0 for two equal batches of embeddings"""
     embedding_1 = torch.from_numpy(
         np.array([[0.1, 0.2, 0.3], [0.1, 0.2, 0.3]]))
     embedding_2 = torch.from_numpy(
         np.array([[0.1, 0.2, 0.3], [0.1, 0.2, 0.3]]))
     distance = loss_functions.get_loss_cosine_distance(
         original_phrase=embedding_1,
         composed_phrase=embedding_2,
         dim=1,
         normalize=True)
     np.testing.assert_equal(distance.item(), 0.0)
Esempio n. 11
0
    def test_model_loss(self):
        """
        Test whether the composition model can be ran and whether the loss can be computed. The loss should be
        a number larger than zero and not NaN
        """
        self.optimizer.zero_grad()
        composed, rep_1, rep_2 = self.model(self.input_1)
        loss_1 = loss_functions.get_loss_cosine_distance(
            original_phrase=self.input_1["l"],
            composed_phrase=rep_1,
            dim=1,
            normalize=False).item()
        composed, rep_1, rep_2 = self.model(self.input_2)
        loss_2 = loss_functions.get_loss_cosine_distance(
            original_phrase=self.input_1["l"],
            composed_phrase=rep_2,
            dim=1,
            normalize=False).item()
        np.testing.assert_equal(math.isnan(loss_1), False)
        np.testing.assert_equal(math.isnan(loss_2), False)

        np.testing.assert_equal(loss_1 >= 0, True)
        np.testing.assert_equal(loss_2 >= 0, True)
Esempio n. 12
0
    def test_parameter_updated_with_training(self):
        noun_matrix_before_training = self.access_named_parameter(self.model, "_adj_matrix")
        adj_matrix_before_training = self.access_named_parameter(self.model, "_noun_matrix")
        for epoch in range(0, 5):
            self.optimizer.zero_grad()
            composed = self.model(self.input)
            loss = loss_functions.get_loss_cosine_distance(original_phrase=self.input["l"], composed_phrase=composed,
                                                           dim=1, normalize=False)
            loss.backward()
            self.optimizer.step()
            noun_matrix_after_training = self.access_named_parameter(self.model, "_adj_matrix")
            adj_matrix_after_training = self.access_named_parameter(self.model, "_noun_matrix")
        difference_noun_matrix = torch.sum(
            noun_matrix_before_training - noun_matrix_after_training).item()
        difference_adj_matrix = torch.sum(
            adj_matrix_before_training - adj_matrix_after_training).item()

        np.testing.assert_equal(difference_noun_matrix != 0.0, True)
        np.testing.assert_equal(difference_adj_matrix != 0.0, True)
Esempio n. 13
0
def pretrain(pretrain_loader, model, optimizer):
    """
    This function trains the model for one epoch on a given training set
    :param pretrain_loader: a dataloader that contains a specific training set
    :param model: the classifier
    :param optimizer: the optimizer
    :return: trained classifier and optimizer
    """
    pbar = trange(100, desc='Pretrain for one epoch...', leave=True)
    for batch in pretrain_loader:
        batch["device"] = device
        composed, rep1, rep2 = model(batch)
        phrase_loss = get_loss_cosine_distance(composed_phrase=rep1,
                                               original_phrase=batch["l"])
        phrase_loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        pbar.update(100 / len(pretrain_loader))
    return model, optimizer
def predict(test_loader, model, device):
    """
    predicts labels on unseen data (test set)
    :param test_loader: dataloader torch object with test data
    :param model: trained model
    :param config: config: config json file
    :return: predictions for the given dataset, the loss and accuracy over the whole dataset
    """
    test_loss = []
    predictions = []
    model.to(device)
    for batch in test_loader:
        batch["device"] = device
        out = model(batch).squeeze().to("cpu")
        for pred in out:
            predictions.append(pred.detach().numpy())
        loss = get_loss_cosine_distance(composed_phrase=out,
                                        original_phrase=batch["l"])
        test_loss.append(loss.item())
    predictions = np.array(predictions)
    return predictions, np.average(test_loss)
def train(config, train_loader, valid_loader, model_path, device):
    """
        method to pretrain a composition model
        :param config: config json file
        :param train_loader: dataloader torch object with training data
        :param valid_loader: dataloader torch object with validation data
        :return: the trained model
        """
    model = init_classifier(config)
    model.to(device)
    optimizer = optim.Adam(model.parameters())
    current_patience = 0
    tolerance = 1e-5
    lowest_loss = float("inf")
    best_epoch = 1
    epoch = 1
    train_loss = 0.0
    for epoch in range(1, config["num_epochs"] + 1):
        # training loop over all batches
        model.train()
        # these store the losses and accuracies for each batch for one epoch
        train_losses = []
        valid_losses = []
        # for word1, word2, labels in train_loader:
        for batch in train_loader:
            batch["device"] = device
            out = model(batch).squeeze().to("cpu")
            loss = get_loss_cosine_distance(composed_phrase=out,
                                            original_phrase=batch["l"])
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            train_losses.append(loss.item())
            # validation loop over validation batches
            model.eval()
        for batch in valid_loader:
            batch["device"] = device
            out = model(batch).squeeze().to("cpu")
            loss = get_loss_cosine_distance(composed_phrase=out,
                                            original_phrase=batch["l"])
            valid_losses.append(loss.item())

        # calculate average loss and accuracy over an epoch
        train_loss = np.average(train_losses)
        valid_loss = np.average(valid_losses)

        if lowest_loss - valid_loss > tolerance:
            lowest_loss = valid_loss
            best_epoch = epoch
            current_patience = 0
            torch.save(model.state_dict(), model_path)
        else:
            current_patience += 1
        if current_patience > config["patience"]:
            break

        logger.info(
            "current patience: %d , epoch %d , train loss: %.5f, validation loss: %.5f"
            % (current_patience, epoch, train_loss, valid_loss))
    logger.info(
        "training finnished after %d epochs, train loss: %.5f, best epoch : %d , best validation loss: %.5f"
        % (epoch, train_loss, best_epoch, lowest_loss))
Esempio n. 16
0
def train(config, pretrain_1, pretrain_2, train_loader, valid_loader_1,
          valid_loader_2, model_path, device):
    """
    This method trains a composition model jointly on two tasks. On top of that, the model is pretrained on the more
    general / harder task.
    :param config: the main configuration with the settings that should be used for training
    :param pretrain_loader: a dataloader with the dataset that should be used for pretraining
    :param train_loader: a trainloader that contains a "MultiRankingDataset". This returns two batches, each batch
    contains the instances of one of the two datasets
    :param valid_loader_1: the validation dataset loader of the first task (phrase reconstruction)
    :param valid_loader_2: the validation dataset loader of the second task (attribute composition)
    :param model_path: the path to save the model to
    :param device: the device type (CPU or GPU)
    """
    model = init_classifier(config)
    model.to(device)
    optimizer = optim.Adam(model.parameters())
    current_patience = 0
    tolerance = 1e-5
    lowest_loss = float("inf")
    best_epoch = 1
    epoch = 1
    train_loss = 0.0
    if pretrain_1:
        # use one training set for pretraining
        pretrain_loader = DataLoader(
            dataset_train_1,
            batch_size=config_1["iterator"]["batch_size"],
            shuffle=True,
            num_workers=0)
        model, optimizer = pretrain(pretrain_loader, model, optimizer)
    if pretrain_2:
        pretrain_loader = DataLoader(
            dataset_train_2,
            batch_size=config_1["iterator"]["batch_size"],
            shuffle=True,
            num_workers=0)
        model, optimizer = pretrain(pretrain_loader, model, optimizer)
    for epoch in range(1, config["num_epochs"] + 1):
        model.train()
        train_losses = []
        valid_losses_attribute = []
        valid_losses_phrase = []
        for batch_task_1, batch_task_2 in train_loader:
            batch_task_1["device"] = device
            batch_task_2["device"] = device

            composed, rep1, rep2 = model(batch_task_1)
            rep1 = rep1.squeeze().to("cpu")
            phrase_loss = get_loss_cosine_distance(
                composed_phrase=rep1, original_phrase=batch_task_1["l"])

            composed, rep1, rep2 = model(batch_task_2)
            rep2 = rep2.squeeze().to("cpu")
            attribute_loss = get_loss_cosine_distance(
                composed_phrase=rep2, original_phrase=batch_task_2["l"])

            loss = attribute_loss + phrase_loss
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            train_losses.append(loss.item())
        for batch in valid_loader_1:
            model.eval()
            batch["device"] = device
            _, out, _ = model(batch)
            out = out.squeeze().to("cpu")
            loss = get_loss_cosine_distance(composed_phrase=out,
                                            original_phrase=batch["l"])
            valid_losses_phrase.append(loss.item())
        for batch in valid_loader_2:
            batch["device"] = device
            _, _, out = model(batch)
            out = out.squeeze().to("cpu")
            loss = get_loss_cosine_distance(composed_phrase=out,
                                            original_phrase=batch["l"])
            valid_losses_attribute.append(loss.item())

        # calculate average loss and accuracy over an epoch
        train_loss = np.average(train_losses)
        valid_loss_label = np.average(valid_losses_attribute)
        valid_loss_phrase = np.average(valid_losses_phrase)
        total_valid_loss = (valid_loss_label + valid_loss_phrase) / 2

        if lowest_loss - total_valid_loss > tolerance:
            lowest_loss = total_valid_loss
            best_epoch = epoch
            current_patience = 0
            torch.save(model.state_dict(), model_path)
        else:
            current_patience += 1
        if current_patience > config["patience"]:
            break

        logger.info(
            "current patience: %d , epoch %d , train loss: %.3f, validation loss task 1: %.3f, validation loss task "
            "2: %.3f" % (current_patience, epoch, train_loss, valid_loss_label,
                         valid_loss_phrase))
    logger.info(
        "training finnished after %d epochs, train loss: %.5f, best epoch : %d , best validation loss: %.5f"
        % (epoch, train_loss, best_epoch, lowest_loss))