def test_parameter_updated_with_training(self): matrix_layer_before_training = self.access_named_parameter(self.model, "_matrix_layer.weight") sp_matrix_1_before_training = self.access_named_parameter(self.model, "_specific_matrix_1.weight") sp_matrix_2_before_training = self.access_named_parameter(self.model, "_specific_matrix_2.weight") for epoch in range(0,5): self.optimizer.zero_grad() composed, rep_1, rep_2 = self.model(self.input_1) loss_1 = loss_functions.get_loss_cosine_distance(original_phrase=self.input_1["l"], composed_phrase=rep_1, dim=1, normalize=False) composed, rep_1, rep_2 = self.model(self.input_2) loss_2 = loss_functions.get_loss_cosine_distance(original_phrase=self.input_1["l"], composed_phrase=rep_2, dim=1, normalize=False) loss = loss_1 + loss_2 loss.backward() self.optimizer.step() matrix_layer_after_training = self.access_named_parameter(self.model, "_matrix_layer.weight") sp_matrix_1_after_training = self.access_named_parameter(self.model, "_specific_matrix_1.weight") sp_matrix_2_after_training = self.access_named_parameter(self.model, "_specific_matrix_2.weight") difference_matrix_layer = torch.sum( matrix_layer_before_training - matrix_layer_after_training).item() difference_sp_matrix_1 = torch.sum( sp_matrix_1_before_training - sp_matrix_1_after_training).item() difference_sp_matrix_2 = torch.sum( sp_matrix_2_before_training - sp_matrix_2_after_training).item() np.testing.assert_equal(difference_matrix_layer != 0.0, True) np.testing.assert_equal(difference_sp_matrix_1 != 0.0, True) np.testing.assert_equal(difference_sp_matrix_2 != 0.0, True)
def test_model_loss(self): self.optimizer.zero_grad() composed, rep_1, rep_2 = self.model(self.input_1) loss_1 = loss_functions.get_loss_cosine_distance(original_phrase=self.input_1["l"], composed_phrase=rep_1, dim=1, normalize=False).item() composed, rep_1, rep_2 = self.model(self.input_2) loss_2 = loss_functions.get_loss_cosine_distance(original_phrase=self.input_1["l"], composed_phrase=rep_2, dim=1, normalize=False).item() np.testing.assert_equal(math.isnan(loss_1), False) np.testing.assert_equal(math.isnan(loss_2), False) np.testing.assert_equal(loss_1 >= 0, True) np.testing.assert_equal(loss_2 >= 0, True)
def test_parameter_get_updated(self): """Test whether initial weight matrices are being updated during training. These parameters should be different after training vs before training.""" tw_tensor_before_training = self.acess_named_parameter( self.model, "_transformation_tensor") combining_tensor_1_before_training = self.acess_named_parameter( self.model, "_combining_tensor_1") combining_tensor_2_before_training = self.acess_named_parameter( self.model, "_combining_tensor_2") for epoch in range(0, 10): self.optimizer.zero_grad() composed, rep_1, rep_2 = self.model(self.input_1) loss_1 = loss_functions.get_loss_cosine_distance( original_phrase=self.input_1["l"], composed_phrase=rep_1, dim=1, normalize=False) composed, rep_1, rep_2 = self.model(self.input_2) loss_2 = loss_functions.get_loss_cosine_distance( original_phrase=self.input_1["l"], composed_phrase=rep_2, dim=1, normalize=False) loss = loss_1 + loss_2 loss.backward() self.optimizer.step() tw_tensor_after_training = self.acess_named_parameter( self.model, "_transformation_tensor") combining_tensor_1_after_training = self.acess_named_parameter( self.model, "_combining_tensor_1") combining_tensor_2_after_training = self.acess_named_parameter( self.model, "_combining_tensor_2") difference_combining_tensor_1 = torch.sum( combining_tensor_1_before_training - combining_tensor_1_after_training).item() difference_combining_tensor_2 = torch.sum( combining_tensor_2_before_training - combining_tensor_2_after_training).item() differemce_combining_tensors = torch.sum( combining_tensor_1_after_training - combining_tensor_2_after_training).item() difference_tw_tensor = torch.sum(tw_tensor_before_training - tw_tensor_after_training).item() np.testing.assert_equal(difference_combining_tensor_1 != 0.0, True) np.testing.assert_equal(difference_combining_tensor_2 != 0.0, True) np.testing.assert_equal(differemce_combining_tensors != 0.0, True) np.testing.assert_equal(difference_tw_tensor != 0.0, True)
def predict(test_loader, model, device): """ predicts labels on unseen data (test set) :param test_loader: dataloader torch object with test- or validation data :param model: trained model :param device: the device :return: predictions and losses for the learned attribute and the final composed representation, the original phrases """ test_loss_att = [] test_loss_final = [] test_loss_reconstructed = [] predictions_final_rep = [] predictions_attribute_rep = [] predictions_reconstructed_rep = [] orig_phrases = [] model.to(device) for batch in test_loader: batch["device"] = device composed, rep1, rep2 = model(batch) composed = composed.squeeze().to("cpu") rep2 = rep2.squeeze().to("cpu") rep1 = rep1.squeeze().to("cpu") for pred in rep2: predictions_attribute_rep.append(pred.detach().numpy()) for pred in composed: predictions_final_rep.append(pred.detach().numpy()) for pred in rep1: predictions_reconstructed_rep.append(pred.detach().numpy()) loss_att = get_loss_cosine_distance(composed_phrase=rep2, original_phrase=batch["l"]) loss_reconstructed = get_loss_cosine_distance( composed_phrase=rep1, original_phrase=batch["l"]) loss_final = get_loss_cosine_distance(composed_phrase=composed, original_phrase=batch["l"]) test_loss_att.append(loss_att.item()) test_loss_reconstructed.append(loss_reconstructed.item()) test_loss_final.append(loss_final.item()) test_loss_final.append(loss_final.item()) orig_phrases.append(batch["label"]) orig_phrases = [item for sublist in orig_phrases for item in sublist] predictions_final_rep = np.array(predictions_final_rep) predictions_attribute_rep = np.array(predictions_attribute_rep) predictions_reconstructed_rep = np.array(predictions_reconstructed_rep) return predictions_final_rep, predictions_attribute_rep, predictions_reconstructed_rep, np.average( test_loss_final), np.average(test_loss_att), np.average( test_loss_reconstructed), orig_phrases
def test_model_loss(self): self.optimizer.zero_grad() composed = self.model(self.input) loss = loss_functions.get_loss_cosine_distance(original_phrase=self.input["l"], composed_phrase=composed, dim=1, normalize=False).item() np.testing.assert_equal(math.isnan(loss), False) np.testing.assert_equal(loss >= 0, True)
def test_parameter_updated_with_training(self): adj_1_before_training = self.access_named_parameter(self.model, "_adj_matrix_1") adj_2_before_training = self.access_named_parameter(self.model, "_adj_matrix_2") noun_1_before_training = self.access_named_parameter(self.model, "_noun_matrix_1") noun_2_before_training = self.access_named_parameter(self.model, "_noun_matrix_2") general_adj_weights_before_training = self.access_named_parameter(self.model, "_general_adj_matrix") general_noun_weights_before_training = self.access_named_parameter(self.model, "_general_noun_matrix") for epoch in range(0,5): self.optimizer.zero_grad() composed, rep_1, rep_2 = self.model(self.input_1) loss_1 = loss_functions.get_loss_cosine_distance(original_phrase=self.input_1["l"], composed_phrase=rep_1, dim=1, normalize=False) composed, rep_1, rep_2 = self.model(self.input_2) loss_2 = loss_functions.get_loss_cosine_distance(original_phrase=self.input_1["l"], composed_phrase=rep_2, dim=1, normalize=False) loss = loss_1 + loss_2 loss.backward() self.optimizer.step() adj_1_after_training = self.access_named_parameter(self.model, "_adj_matrix_1") adj_2_after_training = self.access_named_parameter(self.model, "_adj_matrix_2") noun_1_after_training = self.access_named_parameter(self.model, "_noun_matrix_1") noun_2_after_training = self.access_named_parameter(self.model, "_noun_matrix_2") general_adj_weights_after_training = self.access_named_parameter(self.model, "_general_adj_matrix") general_noun_weights_after_training = self.access_named_parameter(self.model, "_general_noun_matrix") difference_adj1_layer = torch.sum( adj_1_before_training - adj_1_after_training ).item() difference_adj2_layer = torch.sum( adj_2_before_training - adj_2_after_training).item() difference_noun1_layer = torch.sum( noun_1_before_training- noun_1_after_training).item() difference_noun2_layer = torch.sum(noun_2_before_training - noun_2_after_training).item() difference_general_adj = torch.sum(general_adj_weights_before_training - general_adj_weights_after_training).item() difference_general_noun = torch.sum(general_noun_weights_before_training - general_noun_weights_after_training).item() np.testing.assert_equal(difference_adj1_layer != 0.0, True) np.testing.assert_equal(difference_adj2_layer != 0.0, True) np.testing.assert_equal(difference_noun1_layer != 0.0, True) np.testing.assert_equal(difference_noun2_layer != 0.0, True) np.testing.assert_equal(difference_general_adj != 0.0, True) np.testing.assert_equal(difference_general_noun != 0.0, True)
def train_matrix_pretrain(self): optimizer = optim.Adam(self.model_pretrain.parameters()) for batch in self.pretrain_loader: batch["device"] = "cpu" out = self.model_pretrain(batch).squeeze().to("cpu") loss = get_loss_cosine_distance(composed_phrase=out, original_phrase=batch["l"]) loss.backward() optimizer.step() torch.save(self.model_pretrain.state_dict(), "models/matrix_pretrain")
def test_matrix_transfer_ranking(self): """Test whether the transfer matrix ranking model can be called and whether the loss can be computed""" batch = next(iter(self.pretrain_loader)) batch["device"] = "cpu" out = self.model_transfer_rank(batch).squeeze().to("cpu") loss = get_loss_cosine_distance(composed_phrase=out, original_phrase=batch["l"]).item() np.testing.assert_equal(out.shape, [4, 300]) np.testing.assert_equal(math.isnan(loss), False) np.testing.assert_equal(loss >= 0, True)
def test_matrix_pretrain(self): """Test whether matrix pretraining model can be used to compute the loss and whether the composed representation has the correct shape""" batch = next(iter(self.pretrain_loader)) batch["device"] = "cpu" out = self.model_pretrain(batch).squeeze().to("cpu") loss = get_loss_cosine_distance(composed_phrase=out, original_phrase=batch["l"]).item() np.testing.assert_equal(out.shape, [4, 300]) np.testing.assert_equal(math.isnan(loss), False) np.testing.assert_equal(loss >= 0, True)
def test_cosine_distance(self): """Test whether the cosine distance is 0 for two equal batches of embeddings""" embedding_1 = torch.from_numpy( np.array([[0.1, 0.2, 0.3], [0.1, 0.2, 0.3]])) embedding_2 = torch.from_numpy( np.array([[0.1, 0.2, 0.3], [0.1, 0.2, 0.3]])) distance = loss_functions.get_loss_cosine_distance( original_phrase=embedding_1, composed_phrase=embedding_2, dim=1, normalize=True) np.testing.assert_equal(distance.item(), 0.0)
def test_model_loss(self): """ Test whether the composition model can be ran and whether the loss can be computed. The loss should be a number larger than zero and not NaN """ self.optimizer.zero_grad() composed, rep_1, rep_2 = self.model(self.input_1) loss_1 = loss_functions.get_loss_cosine_distance( original_phrase=self.input_1["l"], composed_phrase=rep_1, dim=1, normalize=False).item() composed, rep_1, rep_2 = self.model(self.input_2) loss_2 = loss_functions.get_loss_cosine_distance( original_phrase=self.input_1["l"], composed_phrase=rep_2, dim=1, normalize=False).item() np.testing.assert_equal(math.isnan(loss_1), False) np.testing.assert_equal(math.isnan(loss_2), False) np.testing.assert_equal(loss_1 >= 0, True) np.testing.assert_equal(loss_2 >= 0, True)
def test_parameter_updated_with_training(self): noun_matrix_before_training = self.access_named_parameter(self.model, "_adj_matrix") adj_matrix_before_training = self.access_named_parameter(self.model, "_noun_matrix") for epoch in range(0, 5): self.optimizer.zero_grad() composed = self.model(self.input) loss = loss_functions.get_loss_cosine_distance(original_phrase=self.input["l"], composed_phrase=composed, dim=1, normalize=False) loss.backward() self.optimizer.step() noun_matrix_after_training = self.access_named_parameter(self.model, "_adj_matrix") adj_matrix_after_training = self.access_named_parameter(self.model, "_noun_matrix") difference_noun_matrix = torch.sum( noun_matrix_before_training - noun_matrix_after_training).item() difference_adj_matrix = torch.sum( adj_matrix_before_training - adj_matrix_after_training).item() np.testing.assert_equal(difference_noun_matrix != 0.0, True) np.testing.assert_equal(difference_adj_matrix != 0.0, True)
def pretrain(pretrain_loader, model, optimizer): """ This function trains the model for one epoch on a given training set :param pretrain_loader: a dataloader that contains a specific training set :param model: the classifier :param optimizer: the optimizer :return: trained classifier and optimizer """ pbar = trange(100, desc='Pretrain for one epoch...', leave=True) for batch in pretrain_loader: batch["device"] = device composed, rep1, rep2 = model(batch) phrase_loss = get_loss_cosine_distance(composed_phrase=rep1, original_phrase=batch["l"]) phrase_loss.backward() optimizer.step() optimizer.zero_grad() pbar.update(100 / len(pretrain_loader)) return model, optimizer
def predict(test_loader, model, device): """ predicts labels on unseen data (test set) :param test_loader: dataloader torch object with test data :param model: trained model :param config: config: config json file :return: predictions for the given dataset, the loss and accuracy over the whole dataset """ test_loss = [] predictions = [] model.to(device) for batch in test_loader: batch["device"] = device out = model(batch).squeeze().to("cpu") for pred in out: predictions.append(pred.detach().numpy()) loss = get_loss_cosine_distance(composed_phrase=out, original_phrase=batch["l"]) test_loss.append(loss.item()) predictions = np.array(predictions) return predictions, np.average(test_loss)
def train(config, train_loader, valid_loader, model_path, device): """ method to pretrain a composition model :param config: config json file :param train_loader: dataloader torch object with training data :param valid_loader: dataloader torch object with validation data :return: the trained model """ model = init_classifier(config) model.to(device) optimizer = optim.Adam(model.parameters()) current_patience = 0 tolerance = 1e-5 lowest_loss = float("inf") best_epoch = 1 epoch = 1 train_loss = 0.0 for epoch in range(1, config["num_epochs"] + 1): # training loop over all batches model.train() # these store the losses and accuracies for each batch for one epoch train_losses = [] valid_losses = [] # for word1, word2, labels in train_loader: for batch in train_loader: batch["device"] = device out = model(batch).squeeze().to("cpu") loss = get_loss_cosine_distance(composed_phrase=out, original_phrase=batch["l"]) loss.backward() optimizer.step() optimizer.zero_grad() train_losses.append(loss.item()) # validation loop over validation batches model.eval() for batch in valid_loader: batch["device"] = device out = model(batch).squeeze().to("cpu") loss = get_loss_cosine_distance(composed_phrase=out, original_phrase=batch["l"]) valid_losses.append(loss.item()) # calculate average loss and accuracy over an epoch train_loss = np.average(train_losses) valid_loss = np.average(valid_losses) if lowest_loss - valid_loss > tolerance: lowest_loss = valid_loss best_epoch = epoch current_patience = 0 torch.save(model.state_dict(), model_path) else: current_patience += 1 if current_patience > config["patience"]: break logger.info( "current patience: %d , epoch %d , train loss: %.5f, validation loss: %.5f" % (current_patience, epoch, train_loss, valid_loss)) logger.info( "training finnished after %d epochs, train loss: %.5f, best epoch : %d , best validation loss: %.5f" % (epoch, train_loss, best_epoch, lowest_loss))
def train(config, pretrain_1, pretrain_2, train_loader, valid_loader_1, valid_loader_2, model_path, device): """ This method trains a composition model jointly on two tasks. On top of that, the model is pretrained on the more general / harder task. :param config: the main configuration with the settings that should be used for training :param pretrain_loader: a dataloader with the dataset that should be used for pretraining :param train_loader: a trainloader that contains a "MultiRankingDataset". This returns two batches, each batch contains the instances of one of the two datasets :param valid_loader_1: the validation dataset loader of the first task (phrase reconstruction) :param valid_loader_2: the validation dataset loader of the second task (attribute composition) :param model_path: the path to save the model to :param device: the device type (CPU or GPU) """ model = init_classifier(config) model.to(device) optimizer = optim.Adam(model.parameters()) current_patience = 0 tolerance = 1e-5 lowest_loss = float("inf") best_epoch = 1 epoch = 1 train_loss = 0.0 if pretrain_1: # use one training set for pretraining pretrain_loader = DataLoader( dataset_train_1, batch_size=config_1["iterator"]["batch_size"], shuffle=True, num_workers=0) model, optimizer = pretrain(pretrain_loader, model, optimizer) if pretrain_2: pretrain_loader = DataLoader( dataset_train_2, batch_size=config_1["iterator"]["batch_size"], shuffle=True, num_workers=0) model, optimizer = pretrain(pretrain_loader, model, optimizer) for epoch in range(1, config["num_epochs"] + 1): model.train() train_losses = [] valid_losses_attribute = [] valid_losses_phrase = [] for batch_task_1, batch_task_2 in train_loader: batch_task_1["device"] = device batch_task_2["device"] = device composed, rep1, rep2 = model(batch_task_1) rep1 = rep1.squeeze().to("cpu") phrase_loss = get_loss_cosine_distance( composed_phrase=rep1, original_phrase=batch_task_1["l"]) composed, rep1, rep2 = model(batch_task_2) rep2 = rep2.squeeze().to("cpu") attribute_loss = get_loss_cosine_distance( composed_phrase=rep2, original_phrase=batch_task_2["l"]) loss = attribute_loss + phrase_loss loss.backward() optimizer.step() optimizer.zero_grad() train_losses.append(loss.item()) for batch in valid_loader_1: model.eval() batch["device"] = device _, out, _ = model(batch) out = out.squeeze().to("cpu") loss = get_loss_cosine_distance(composed_phrase=out, original_phrase=batch["l"]) valid_losses_phrase.append(loss.item()) for batch in valid_loader_2: batch["device"] = device _, _, out = model(batch) out = out.squeeze().to("cpu") loss = get_loss_cosine_distance(composed_phrase=out, original_phrase=batch["l"]) valid_losses_attribute.append(loss.item()) # calculate average loss and accuracy over an epoch train_loss = np.average(train_losses) valid_loss_label = np.average(valid_losses_attribute) valid_loss_phrase = np.average(valid_losses_phrase) total_valid_loss = (valid_loss_label + valid_loss_phrase) / 2 if lowest_loss - total_valid_loss > tolerance: lowest_loss = total_valid_loss best_epoch = epoch current_patience = 0 torch.save(model.state_dict(), model_path) else: current_patience += 1 if current_patience > config["patience"]: break logger.info( "current patience: %d , epoch %d , train loss: %.3f, validation loss task 1: %.3f, validation loss task " "2: %.3f" % (current_patience, epoch, train_loss, valid_loss_label, valid_loss_phrase)) logger.info( "training finnished after %d epochs, train loss: %.5f, best epoch : %d , best validation loss: %.5f" % (epoch, train_loss, best_epoch, lowest_loss))