def train(model, dataloader, optimizer, criterion, epoch_number, max_gradient_norm): """ Train a model for one epoch on some input data with a given optimizer and criterion. Args: model: A torch module that must be trained on some input data. dataloader: A DataLoader object to iterate over the training data. optimizer: A torch optimizer to use for training on the input model. criterion: A loss criterion to use for training. epoch_number: The number of the epoch for which training is performed. max_gradient_norm: Max. norm for gradient norm clipping. Returns: epoch_time: The total time necessary to train the epoch. epoch_loss: The training loss computed for the epoch. epoch_accuracy: The accuracy computed for the epoch. """ # Switch the model to train mode. model.train() device = model.device epoch_start = time.time() batch_time_avg = 0.0 running_loss = 0.0 correct_preds = 0 #tqdm_batch_iterator = tqdm(dataloader) for batch_index, batch in enumerate(dataloader): batch_start = time.time() # Move input and output data to the GPU if it is used. premises = batch["premise"].to(device) premises_lengths = batch["premise_length"].to(device) hypotheses = batch["hypothesis"].to(device) hypotheses_lengths = batch["hypothesis_length"].to(device) labels = batch["label"].to(device) similarity = batch["similarity"].to(device) optimizer.zero_grad() logits, probs = model(premises, premises_lengths, hypotheses, hypotheses_lengths, similarity) loss = criterion(logits, labels) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), max_gradient_norm) optimizer.step() batch_time_avg += time.time() - batch_start running_loss += loss.item() correct_preds += correct_predictions(probs, labels) epoch_time = time.time() - epoch_start epoch_loss = running_loss / len(dataloader) epoch_accuracy = correct_preds / len(dataloader.dataset) return epoch_time, epoch_loss, epoch_accuracy
def test(model, num_classes, dataloader, print_Confusion=False): model.eval() device = model.device time_start = time.time() batch_time = 0.0 correct_preds = 0.0 confusion = torch.zeros(num_classes, num_classes, dtype=torch.long) # Deactivate autograd for evaluation. with torch.no_grad(): for batch in dataloader: batch_start = time.time() # Move input and output data to the GPU if one is used. premises = batch["premise"].to(device) premises_lengths = batch["premise_length"].to(device) hypotheses = batch["hypothesis"].to(device) hypotheses_lengths = batch["hypothesis_length"].to(device) labels = batch["label"].to(device) _, probs = model(premises, premises_lengths, hypotheses, hypotheses_lengths) _, pred = probs.max(dim=1) for j in range(pred.size()[0]): confusion[pred[j], labels[j]] += 1 correct_preds += correct_predictions(probs, labels) batch_time += time.time() - batch_start batch_time /= len(dataloader) total_time = time.time() - time_start accuracy = correct_preds / (len(dataloader.dataset)) if print_Confusion == True: print("Confusion matrix:") print(confusion) print("Report precision, recall, and f1:") for i in range(confusion.size()[0]): p = confusion[i, i].item() / confusion[i, :].sum().item() r = confusion[i, i].item() / confusion[:, i].sum().item() f1 = 2 * p * r / (p + r) print("Label {}: {:.3f}, {:.3f}, {:.3f}".format(i, p, r, f1)) p = confusion[1, 1].item() / confusion[:, 1].sum().item() r = confusion[1, 1].item() / confusion[1, :].sum().item() f1 = 2 * p * r / (p + r) #print("Report precision, recall, and f1:" , p, r, f1) return batch_time, total_time, f1, accuracy
def test(model, dataloader): """ Test the accuracy of a model on some dataset. Args: model: The torch module on which testing must be performed. dataloader: A DataLoader object to iterate over some dataset. Returns: batch_time: The average time to predict the classes of a batch. total_time: The total time to process the whole dataset. accuracy: The accuracy of the model on the input data. """ # Switch the model to eval mode. model.eval() device = model.device time_start = time.time() batch_time = 0.0 accuracy = 0.0 pred = [] true = [] # Deactivate autograd for evaluation. with torch.no_grad(): for batch in dataloader: batch_start = time.time() # Move input and output data to the GPU if one is used. premises = batch['premise'].to(device) premises_lengths = batch['premise_length'].to(device) hypotheses = batch['hypothesis'].to(device) hypotheses_lengths = batch['hypothesis_length'].to(device) labels = batch['label'].to(device) _, probs = model(premises, premises_lengths, hypotheses, hypotheses_lengths) accuracy += correct_predictions(probs, labels) batch_time += time.time() - batch_start _, out_classes = probs.max(dim=1) pred.extend(out_classes.cpu().tolist()) true.extend(labels.cpu().tolist()) batch_time /= len(dataloader) total_time = time.time() - time_start accuracy /= (len(dataloader.dataset)) print('=== confusion matrix ===') print(metrics.confusion_matrix(true, pred)) print(metrics.f1_score(true, pred)) pickle.dump([pred, true], open('tmp.result', 'wb')) return batch_time, total_time, accuracy
def validate(model, dataloader, criterion): """ Compute the loss and accuracy of a model on some validation dataset. Args: model: A torch module for which the loss and accuracy must be computed. dataloader: A DataLoader object to iterate over the validation data. criterion: A loss criterion to use for computing the loss. epoch: The number of the epoch for which validation is performed. device: The device on which the model is located. Returns: epoch_time: The total time to compute the loss and accuracy on the entire validation set. epoch_loss: The loss computed on the entire validation set. epoch_accuracy: The accuracy computed on the entire validation set. """ # Switch to evaluate mode. model.eval() device = model.device epoch_start = time.time() running_loss = 0.0 running_accuracy = 0.0 # Deactivate autograd for evaluation. with torch.no_grad(): for batch in dataloader: # Move input and output data to the GPU if one is used. premises = batch["premise"].to(device) premises_lengths = batch["premise_length"].to(device) hypotheses = batch["hypothesis"].to(device) hypotheses_lengths = batch["hypothesis_length"].to(device) labels = batch["label"].to(device) similarity = batch["similarity"].to(device) logits, probs = model(premises, premises_lengths, hypotheses, hypotheses_lengths, similarity) loss = criterion(logits, labels) running_loss += loss.item() running_accuracy += correct_predictions(probs, labels) epoch_time = time.time() - epoch_start epoch_loss = running_loss / len(dataloader) epoch_accuracy = running_accuracy / (len(dataloader.dataset)) return epoch_time, epoch_loss, epoch_accuracy
def test(model, dataloader): """ Test the accuracy of a model on some labelled test dataset. Args: model: The torch module on which testing must be performed. dataloader: A DataLoader object to iterate over some dataset. Returns: batch_time: The average time to predict the classes of a batch. total_time: The total time to process the whole dataset. accuracy: The accuracy of the model on the input data. """ # Switch the model to eval mode. model.eval() device = model.device time_start = time.time() batch_time = 0.0 accuracy = 0.0 # Deactivate autograd for evaluation. with torch.no_grad(): for batch in dataloader: batch_start = time.time() # Move input and output data to the GPU if one is used. premises = batch["premise"].to(device) premises_lengths = batch["premise_length"].to(device) hypotheses = batch["hypothesis"].to(device) hypotheses_lengths = batch["hypothesis_length"].to(device) labels = batch["label"].to(device) _, probs = model(premises, premises_lengths, hypotheses, hypotheses_lengths) accuracy += correct_predictions(probs, labels) batch_time += time.time() - batch_start batch_time /= len(dataloader) total_time = time.time() - time_start accuracy /= (len(dataloader.dataset)) return batch_time, total_time, accuracy
def test(model, num_classes, dataloader, print_Confusion=False): """ Test the accuracy of a model on some labelled test dataset. Args: model: The torch module on which testing must be performed. dataloader: A DataLoader object to iterate over some dataset. Returns: batch_time: The average time to predict the classes of a batch. total_time: The total time to process the whole dataset. accuracy: The accuracy of the model on the input data. """ # Switch the model to eval mode. model.eval() device = model.device time_start = time.time() batch_time = 0.0 correct_preds = 0.0 confusion = torch.zeros(num_classes, num_classes, dtype=torch.long) # Deactivate autograd for evaluation. with torch.no_grad(): for batch in dataloader: batch_start = time.time() # Move input and output data to the GPU if one is used. premises = batch["premise"].to(device) premises_lengths = batch["premise_length"].to(device) hypotheses = batch["hypothesis"].to(device) hypotheses_lengths = batch["hypothesis_length"].to(device) labels = batch["label"].to(device) similarity = batch["similarity"].to(device) _, probs = model(premises, premises_lengths, hypotheses, hypotheses_lengths, similarity) _, pred = probs.max(dim=1) for j in range(pred.size()[0]): confusion[pred[j], labels[j]] += 1 correct_preds += correct_predictions(probs, labels) batch_time += time.time() - batch_start batch_time /= len(dataloader) total_time = time.time() - time_start accuracy = correct_preds / (len(dataloader.dataset)) if print_Confusion == True: print("Confusion matrix:") print(confusion) print("Report precision, recall, and f1:") for i in range(confusion.size()[0]): p = confusion[i, i].item() / confusion[i, :].sum().item() r = confusion[i, i].item() / confusion[:, i].sum().item() f1 = 2 * p * r / (p + r) print("Label {}: {:.3f}, {:.3f}, {:.3f}".format(i, p, r, f1)) p = confusion[1, 1].item() / confusion[:, 1].sum().item() r = confusion[1, 1].item() / confusion[1, :].sum().item() f1 = 2 * p * r / (p + r) #print("Report precision, recall, and f1:" , p, r, f1) return batch_time, total_time, f1, accuracy
def train(model, dataloader, embeddings, embeddingString, optimizer, criterion, epoch_number, batch_size, max_gradient_norm, testing=True): """ Train a model for one epoch on some input data with a given optimizer and criterion. Args: model: A torch module that must be trained on some input data. dataloader: A DataLoader object to iterate over the training data. optimizer: A torch optimizer to use for training on the input model. criterion: A loss criterion to use for training. epoch_number: The number of the epoch for which training is performed. max_gradient_norm: Max. norm for gradient norm clipping. Returns: epoch_time: The total time necessary to train the epoch. epoch_loss: The training loss computed for the epoch. epoch_accuracy: The accuracy computed for the epoch. """ # Switch the model to train mode. model.train() device = model.device epoch_start = time.time() batch_time_avg = 0.0 running_loss = 0.0 correct_preds = 0 tqdm_batch_iterator = tqdm(dataloader) count = 0 for batch_index, batch in enumerate(tqdm_batch_iterator): batch_start = time.time() # Move input and output data to the GPU if it is used. premises = batch["premises"] # .to(device) premises_lengths = torch.LongTensor( batch["premises_lengths"]).to(device) hypotheses = batch["hypotheses"] # .to(device) hypotheses_lengths = torch.LongTensor( batch["hypotheses_lengths"]).to(device) labels = torch.LongTensor(batch["labels"]).to(device) premise_polarities = get_tensor_of_tensor( batch["premise_polarities"], batch['max_premise_length']).to(device) hypothesis_polarities = get_tensor_of_tensor( batch["hypothesis_polarities"], batch['max_hypothesis_length']).to(device) optimizer.zero_grad() batch_embeddings = embeddings.get_batch(count) logits, probs, _ = model(premises, premises_lengths, hypotheses, hypotheses_lengths, premise_polarities, hypothesis_polarities, batch_embeddings, embeddingString, batch['max_premise_length'], batch['max_hypothesis_length']) loss = criterion(logits, labels) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), max_gradient_norm) optimizer.step() count = count + batch_size batch_time_avg += time.time() - batch_start running_loss += loss.item() correct_pred, out_class = correct_predictions(probs, labels) correct_preds += correct_pred description = "Avg. batch proc. time: {:.4f}s, loss: {:.4f}"\ .format(batch_time_avg/(batch_index+1), running_loss/(batch_index+1)) tqdm_batch_iterator.set_description(description) if testing: break epoch_time = time.time() - epoch_start epoch_loss = running_loss / len(dataloader) epoch_accuracy = correct_preds / len(dataloader) return epoch_time, epoch_loss, epoch_accuracy
def validate(model, dataloader, embeddings, embeddingString, criterion, batch_size, testing): """ Compute the loss and accuracy of a model on some validation dataset. Args: model: A torch module for which the loss and accuracy must be computed. dataloader: A DataLoader object to iterate over the validation data. criterion: A loss criterion to use for computing the loss. epoch: The number of the epoch for which validation is performed. device: The device on which the model is located. Returns: epoch_time: The total time to compute the loss and accuracy on the entire validation set. epoch_loss: The loss computed on the entire validation set. epoch_accuracy: The accuracy computed on the entire validation set. """ # Switch to evaluate mode. model.eval() device = model.device epoch_start = time.time() running_loss = 0.0 running_accuracy = 0.0 # Deactivate autograd for evaluation. with torch.no_grad(): count = 0 for batch in dataloader: # Move input and output data to the GPU if one is used. premises = batch["premises"] #.to(device) premises_lengths = torch.LongTensor( batch["premises_lengths"]).to(device) hypotheses = batch["hypotheses"] #.to(device) hypotheses_lengths = torch.LongTensor( batch["hypotheses_lengths"]).to(device) labels = torch.LongTensor(batch["labels"]).to(device) premise_polarities = get_tensor_of_tensor( batch["premise_polarities"], batch['max_premise_length']).to(device) hypothesis_polarities = get_tensor_of_tensor( batch["hypothesis_polarities"], batch['max_hypothesis_length']).to(device) batch_embeddings = embeddings.get_batch(count) logits, probs, _ = model(premises, premises_lengths, hypotheses, hypotheses_lengths, premise_polarities, hypothesis_polarities, batch_embeddings, embeddingString, batch['max_premise_length'], batch['max_hypothesis_length']) # return logits, probs # print(premises_lengths) # premises_mask, embedded_premises, encoded_premises, premises = model( # premises, premises_lengths, hypotheses, hypotheses_lengths) # return premises_mask, embedded_premises, encoded_premises, premises loss = criterion(logits, labels) running_loss += loss.item() accuracy, out_class = correct_predictions(probs, labels) # running_accuracy += correct_predictions(probs, labels) running_accuracy += accuracy count = count + batch_size if testing: break epoch_time = time.time() - epoch_start epoch_loss = running_loss / len(dataloader) epoch_accuracy = running_accuracy / (len(dataloader)) return epoch_time, epoch_loss, epoch_accuracy
def _test(model, dataloader, embeddings, batch_size, testing): """ Test the accuracy of a model on some labelled test dataset. Args: model: The torch module on which testing must be performed. dataloader: A DataLoader object to iterate over some dataset. Returns: batch_time: The average time to predict the classes of a batch. total_time: The total time to process the whole dataset. accuracy: The accuracy of the model on the input data. """ # Switch the model to eval mode. model.eval() device = model.device time_start = time.time() batch_time = 0.0 running_accuracy = 0.0 out_classes = [] labels = [] all_premises = [] all_hypotheses = [] similarity_matrices = [] # Deactivate autograd for evaluation. with torch.no_grad(): count = 0 for batch in dataloader: batch_start = time.time() # Move input and output data to the GPU if one is used. premises = batch["premises"] # .to(device) premises_lengths = torch.LongTensor( batch["premises_lengths"]).to(device) hypotheses = batch["hypotheses"] # .to(device) hypotheses_lengths = torch.LongTensor( batch["hypotheses_lengths"]).to(device) label = torch.LongTensor(batch["labels"]).to(device) premise_polarities = get_tensor_of_tensor( batch["premise_polarities"], batch['max_premise_length']).to(device) hypothesis_polarities = get_tensor_of_tensor( batch["hypothesis_polarities"], batch['max_hypothesis_length']).to(device) batch_embeddings = embeddings.get_batch(count) _, probs, similarity_matrix = model( premises, premises_lengths, hypotheses, hypotheses_lengths, premise_polarities, hypothesis_polarities, batch_embeddings, batch['max_premise_length'], batch['max_hypothesis_length']) accuracy, out_class = correct_predictions(probs, label) batch_time += time.time() - batch_start running_accuracy += accuracy out_classes.append(out_class) labels.append(label) all_premises.append(premises) all_hypotheses.append(hypotheses) similarity_matrices.append(similarity_matrix) count = count + batch_size if testing: break batch_time /= len(dataloader) total_time = time.time() - time_start running_accuracy /= (len(dataloader)) return batch_time, total_time, running_accuracy, out_classes, labels, similarity_matrices, probs, all_premises, all_hypotheses