def load_model(filename, using_GPU): RNNseq_model = RNNSequenceModel(num_classes=2, embedding_dim=300 + 1024, hidden_size=300, num_layers=1, bidir=True, dropout1=0.5, dropout2=0, dropout3=0.1) RNNseq_model.load_state_dict(torch.load(filename)) if using_GPU: RNNseq_model.cuda() return RNNseq_model
def load_model(filename, embedding_dim): model = RNNSequenceModel(num_classes=2, embedding_dim=embedding_dim, hidden_size=hidden_size, num_layers=1, bidir=True, dropout1=dropouts[0], dropout2=dropouts[1], dropout3=dropouts[2]) model.load_state_dict(torch.load(filename)) if using_GPU: model.cuda() return model
""" 3. Model training """ ''' 3. 1 set up model, loss criterion, optimizer ''' # Instantiate the model # embedding_dim = glove + elmo + suffix indicator # dropout1: dropout on input to RNN # dropout2: dropout in RNN; would be used if num_layers!=1 # dropout3: dropout on hidden state of RNN to linear layer RNNseq_model = RNNSequenceModel(num_classes=2, embedding_dim=300 + 1024, hidden_size=300, num_layers=1, bidir=True, dropout1=0.5, dropout2=0, dropout3=0.1) # Move the model to the GPU if available if using_GPU: RNNseq_model = RNNseq_model.cuda() # Set up criterion for calculating loss loss_criterion = nn.NLLLoss() # Set up an optimizer for updating the parameters of the rnn_clf rnn_optimizer = optim.Adam(RNNseq_model.parameters(), lr=0.005) # Number of epochs (passes through the dataset) to train the model for. num_epochs = 10 ''' 3. 2 train model
def train_model(): """ 3. Model training """ ''' 3. 1 set up model, loss criterion, optimizer ''' # Instantiate the model # embedding_dim = glove + elmo + suffix indicator # dropout1: dropout on input to RNN # dropout2: dropout in RNN; would be used if num_layers!=1 # dropout3: dropout on hidden state of RNN to linear layer RNNseq_model = RNNSequenceModel(num_classes=2, embedding_dim=300 + 1024, hidden_size=300, num_layers=1, bidir=True, dropout1=0.5, dropout2=0, dropout3=0.1) # Move the model to the GPU if available if using_GPU: RNNseq_model = RNNseq_model.cuda() # Set up criterion for calculating loss loss_criterion = nn.NLLLoss() # Set up an optimizer for updating the parameters of the rnn_clf rnn_optimizer = optim.Adam(RNNseq_model.parameters(), lr=0.005) # Number of epochs (passes through the dataset) to train the model for. num_epochs = 10 ''' 3. 2 train model ''' train_loss = [] val_loss = [] performance_matrix = None val_f1s = [] train_f1s = [] # A counter for the number of gradient updates num_iter = 0 comparable = [] for epoch in tqdm(range(num_epochs)): # print("Starting epoch {}".format(epoch + 1)) for (__, example_text, example_lengths, labels) in train_dataloader_vua: example_text = Variable(example_text) example_lengths = Variable(example_lengths) labels = Variable(labels) if using_GPU: example_text = example_text.cuda() example_lengths = example_lengths.cuda() labels = labels.cuda() # predicted shape: (batch_size, seq_len, 2) predicted = RNNseq_model(example_text, example_lengths) batch_loss = loss_criterion(predicted.view(-1, 2), labels.view(-1)) rnn_optimizer.zero_grad() batch_loss.backward() rnn_optimizer.step() num_iter += 1 # Calculate validation and training set loss and accuracy every 200 gradient updates if num_iter % 200 == 0: avg_eval_loss, performance_matrix = evaluate( idx2pos, val_dataloader_vua, RNNseq_model, loss_criterion, using_GPU) val_loss.append(avg_eval_loss) val_f1s.append(performance_matrix[:, 2]) # print("Iteration {}. Validation Loss {}.".format(num_iter, avg_eval_loss)) # avg_eval_loss, performance_matrix = evaluate(idx2pos, train_dataloader_vua, RNNseq_model, # loss_criterion, using_GPU) # train_loss.append(avg_eval_loss) # train_f1s.append(performance_matrix[:, 2]) # print("Iteration {}. Training Loss {}.".format(num_iter, avg_eval_loss)) """ for additional training """ rnn_optimizer = optim.Adam(RNNseq_model.parameters(), lr=0.0001) for epoch in range(10): # print("Starting epoch {}".format(epoch + 1)) for (__, example_text, example_lengths, labels) in train_dataloader_vua: example_text = Variable(example_text) example_lengths = Variable(example_lengths) labels = Variable(labels) if using_GPU: example_text = example_text.cuda() example_lengths = example_lengths.cuda() labels = labels.cuda() # predicted shape: (batch_size, seq_len, 2) predicted = RNNseq_model(example_text, example_lengths) batch_loss = loss_criterion(predicted.view(-1, 2), labels.view(-1)) rnn_optimizer.zero_grad() batch_loss.backward() rnn_optimizer.step() num_iter += 1 # Calculate validation and training set loss and accuracy every 200 gradient updates if num_iter % 200 == 0: avg_eval_loss, performance_matrix = evaluate( idx2pos, val_dataloader_vua, RNNseq_model, loss_criterion, using_GPU) val_loss.append(avg_eval_loss) val_f1s.append(performance_matrix[:, 2]) # print("Iteration {}. Validation Loss {}.".format(num_iter, avg_eval_loss)) # avg_eval_loss, performance_matrix = evaluate(idx2pos, train_dataloader_vua, RNNseq_model, # loss_criterion, using_GPU) # train_loss.append(avg_eval_loss) # train_f1s.append(performance_matrix[:, 2]) # print("Iteration {}. Training Loss {}.".format(num_iter, avg_eval_loss)) # comparable.append(get_performance()) # print("Training done!") return RNNseq_model, loss_criterion
def train_model(train_dataloader, val_dataloader, embedding_dim, num_epochs=20, print_every=200, prefix=dataset, verbose=True): model = RNNSequenceModel(num_classes=2, embedding_dim=embedding_dim, hidden_size=hidden_size, num_layers=1, bidir=True, dropout1=dropouts[0], dropout2=dropouts[1], dropout3=dropouts[2]) model = model.cuda() if using_GPU else model loss_weight = None if class_weight: loss_weight = torch.Tensor(class_weight) loss_weight = loss_weight.cuda() if using_GPU else loss_weight loss_criterion = nn.NLLLoss(weight=loss_weight, reduction='sum' if class_weight else 'mean') rnn_optimizer = optim.Adam(model.parameters(), lr=0.005) optimal_score = [0, 0, 0, 0, 0] # Iteration, Acc, Precision, Recall, F1 optimal_state_dict = None num_iter = 0 # Number of gradient updates train_confusion_matrix = np.zeros( (2, 2)) # Keep track of training performance - resets every 200 updates for epoch in range(num_epochs): # Slower learning rate if epoch == num_epochs / 2: rnn_optimizer = optim.Adam(model.parameters(), lr=0.001) for (example_text, example_lengths, example_labels, example_features) in train_dataloader: example_text = Variable(example_text) example_lengths = Variable(example_lengths) example_labels = Variable(example_labels) if using_GPU: example_text = example_text.cuda() example_lengths = example_lengths.cuda() example_labels = example_labels.cuda() # predicted shape: (batch_size, seq_len, 2) predicted = model(example_text, example_lengths) batch_loss = loss_criterion(predicted.view(-1, 2), example_labels.view(-1)) rnn_optimizer.zero_grad() batch_loss.backward() rnn_optimizer.step() num_iter += 1 # Get predictions, update confusion matrix _, predicted_labels = torch.max(predicted.data, 2) train_confusion_matrix = eval_util.update_confusion_matrix( train_confusion_matrix, predicted_labels, example_labels.data) # Calculate validation and training set loss and accuracy every 200 gradient updates if num_iter % print_every == 0: if verbose: train_performance = eval_util.print_info( train_confusion_matrix) train_confusion_matrix = np.zeros((2, 2)) print(f"Iteration {num_iter}") print( f"Trn Performance: {train_performance}, Loss {batch_loss.item()}" ) if val_dataloader is not None: avg_eval_loss, performance = eval_util.evaluate( val_dataloader, model, loss_criterion, using_GPU) if performance[-1] > optimal_score[-1]: optimal_score = performance optimal_state_dict = model.state_dict() if verbose: print( f"Val Performance: {performance}, Loss {avg_eval_loss}" ) filename = f"models/{prefix}_iter_{num_iter}.pt" torch.save(model.state_dict(), filename) model.load_state_dict(optimal_state_dict) return model, optimal_score
""" 3. Model training """ ''' 3. 1 set up model, loss criterion, optimizer ''' # Instantiate the model # embedding_dim = glove + elmo + pos indicator # dropout1: dropout on input to RNN # dropout2: dropout in RNN; would be used if num_layers!=1 # dropout3: dropout on hidden state of RNN to linear layer RNNseq_model = RNNSequenceModel(num_classes=2, embedding_dim=300 + 1024, hidden_size=300, num_layers=1, bidir=True, dropout1=0.2, dropout2=0.2, dropout3=0.2) # Move the model to the GPU if available if using_GPU: RNNseq_model = RNNseq_model.cuda() # Set up criterion for calculating loss loss_criterion = nn.NLLLoss() # Set up an optimizer for updating the parameters of the rnn_clf rnn_optimizer = optim.SGD(RNNseq_model.parameters(), lr=0.08, momentum=0.9) # Number of epochs (passes through the dataset) to train the model for. num_epochs = 15 ''' 3. 2 train model
3. Model training """ ''' 3. 1 set up model, loss criterion, optimizer ''' # Instantiate the model # embedding_dim = glove + elmo + suffix indicator # dropout1: dropout on input to RNN # dropout2: dropout in RNN; would be used if num_layers!=1 # dropout3: dropout on hidden state of RNN to linear layer RNNseq_model = RNNSequenceModel(num_classes=2, embedding_dim=300 + 1024 + 250 + 30, hidden_size=300, num_layers=1, bidir=True, char_vocab_size=len(c2idx), char_embed_dim=50, dropout1=0.5, dropout2=0, dropout3=0.1) Transformer_model = Transformer(emb=300 + 1024 + 250 + 30, k=300, heads=1, depth=1, num_classes=2, char_vocab_size=len(c2idx), char_embed_dim=50) transformer_parameters = sum(p.numel() for p in Transformer_model.parameters() if p.requires_grad)
def train_model(train_dataloader, val_dataloader, fold_num, idx2pos, using_GPU): optimal_f1s = [] optimal_ps = [] optimal_rs = [] optimal_accs = [] predictions_all = [] RNNseq_model = RNNSequenceModel(num_classes=2, embedding_dim=300 + 1024, hidden_size=300, num_layers=1, bidir=True, dropout1=0.5, dropout2=0, dropout3=0.1) # Move the model to the GPU if available if using_GPU: RNNseq_model = RNNseq_model.cuda() # Set up criterion for calculating loss loss_criterion = nn.NLLLoss() # Set up an optimizer for updating the parameters of the rnn_clf rnn_optimizer = optim.Adam(RNNseq_model.parameters(), lr=0.005) # Number of epochs (passes through the dataset) to train the model for. num_epochs = 20 ''' 3. 2 train model ''' train_loss = [] val_loss = [] performance_matrix = None val_f1s = [] train_f1s = [] # A counter for the number of gradient updates num_iter = 0 comparable = [] for epoch in range(num_epochs): # print("Starting epoch {}".format(epoch + 1)) for (_, example_text, example_lengths, example_labels) in train_dataloader: example_text = Variable(example_text) example_lengths = Variable(example_lengths) example_labels = Variable(example_labels) if using_GPU: example_text = example_text.cuda() example_lengths = example_lengths.cuda() example_labels = example_labels.cuda() # predicted shape: (batch_size, seq_len, 2) predicted = RNNseq_model(example_text, example_lengths) # # _, predicted_labels = torch.max(predicted.data, 2) # print("# pred M:", torch.sum(predicted_labels), "# actual M:", torch.sum(example_labels)) # batch_loss = loss_criterion(predicted.view(-1, 2), example_labels.view(-1)) rnn_optimizer.zero_grad() batch_loss.backward() rnn_optimizer.step() num_iter += 1 # Calculate validation and training set loss and accuracy every 200 gradient updates if num_iter % 100 == 0: avg_eval_loss, performance_matrix = evaluate(idx2pos, val_dataloader, RNNseq_model, loss_criterion, using_GPU) val_loss.append(avg_eval_loss) val_f1s.append(performance_matrix[:, 2]) print("Iteration {}. Validation Loss {}. {}".format(num_iter, avg_eval_loss, performance_matrix)) filename = f"../models/sequence/TOEFL_fold_{fold_num}_iter_{num_iter}.pt" torch.save(RNNseq_model.state_dict(), filename) # avg_eval_loss, performance_matrix = evaluate(idx2pos, train_dataloader_vua, RNNseq_model, # loss_criterion, using_GPU) # train_loss.append(avg_eval_loss) # train_f1s.append(performance_matrix[:, 2]) # print("Iteration {}. Training Loss {}.".format(num_iter, avg_eval_loss)) return RNNseq_model
def train_model(): ''' 2. 3 10-fold cross validation ''' # separate the embedded_sentences and labels into 2 list, in order to pass into the TextDataset as argument sentences = [example[0] for example in embedded_trofi] poss = [example[1] for example in embedded_trofi] labels = [example[2] for example in embedded_trofi] # ten_folds is a list of 10 tuples, each tuple is (list_of_embedded_sentences, list_of_corresponding_labels) ten_folds = [] fold_size = int(3737 / 10) for i in range(10): ten_folds.append((sentences[i * fold_size:(i + 1) * fold_size], poss[i * fold_size:(i + 1) * fold_size], labels[i * fold_size:(i + 1) * fold_size])) idx2pos = {0: 'words that are not focus verbs', 1: 'focus verb'} optimal_f1s = [] optimal_ps = [] optimal_rs = [] optimal_accs = [] predictions_all = [] for i in tqdm(range(10)): ''' 2. 3 set up Dataloader for batching ''' training_sentences = [] training_labels = [] training_poss = [] for j in range(10): if j != i: training_sentences.extend(ten_folds[j][0]) training_poss.extend(ten_folds[j][1]) training_labels.extend(ten_folds[j][2]) training_dataset_trofi = TextDataset(training_sentences, training_poss, training_labels) val_dataset_trofi = TextDataset(ten_folds[i][0], ten_folds[i][1], ten_folds[i][2]) # Data-related hyperparameters batch_size = 10 # Set up a DataLoader for the training, validation, and test dataset train_dataloader_trofi = DataLoader(dataset=training_dataset_trofi, batch_size=batch_size, shuffle=True, collate_fn=TextDataset.collate_fn) val_dataloader_trofi = DataLoader(dataset=val_dataset_trofi, batch_size=batch_size, shuffle=False, collate_fn=TextDataset.collate_fn) """ 3. Model training """ ''' 3. 1 set up model, loss criterion, optimizer ''' # Instantiate the model # embedding_dim = glove + elmo + suffix indicator # dropout1: dropout on input to RNN # dropout2: dropout in RNN; would be used if num_layers=1 # dropout3: dropout on hidden state of RNN to linear layer RNNseq_model = RNNSequenceModel(num_classes=2, embedding_dim=300 + 1024, hidden_size=300, num_layers=1, bidir=True, dropout1=0.5, dropout2=0, dropout3=0.2) # Move the model to the GPU if available if using_GPU: RNNseq_model = RNNseq_model.cuda() # Set up criterion for calculating loss loss_criterion = nn.NLLLoss() # Set up an optimizer for updating the parameters of the rnn_clf rnn_optimizer = optim.Adam(RNNseq_model.parameters(), lr=0.001) # Number of epochs (passes through the dataset) to train the model for. num_epochs = 10 ''' 3. 2 train model ''' train_loss = [] val_loss = [] performance_matrix = None val_f1 = [] val_p = [] val_r = [] val_acc = [] train_f1 = [] # A counter for the number of gradient updates num_iter = 0 model_index = 0 comparable = [] for epoch in range(num_epochs): # print("Starting epoch {}".format(epoch + 1)) for (__, example_text, example_lengths, labels) in train_dataloader_trofi: example_text = Variable(example_text) example_lengths = Variable(example_lengths) labels = Variable(labels) if using_GPU: example_text = example_text.cuda() example_lengths = example_lengths.cuda() labels = labels.cuda() # predicted shape: (batch_size, seq_len, 2) predicted = RNNseq_model(example_text, example_lengths) batch_loss = loss_criterion(predicted.view(-1, 2), labels.view(-1)) rnn_optimizer.zero_grad() batch_loss.backward() rnn_optimizer.step() num_iter += 1 # Calculate validation and training set loss and accuracy every 200 gradient updates if num_iter % 200 == 0: avg_eval_loss, performance_matrix = evaluate( idx2pos, val_dataloader_trofi, RNNseq_model, loss_criterion, using_GPU) val_loss.append(avg_eval_loss) val_p.append(performance_matrix[1][0]) val_r.append(performance_matrix[1][1]) val_f1.append(performance_matrix[1][2]) val_acc.append(performance_matrix[1][3]) # print("Iteration {}. Validation Loss {}.".format(num_iter, avg_eval_loss)) # avg_eval_loss, performance_matrix = evaluate(idx2pos, train_dataloader_trofi, RNNseq_model, # loss_criterion, using_GPU) # train_loss.append(avg_eval_loss) # train_f1.append(performance_matrix[1][1]) # print("Iteration {}. Training Loss {}.".format(num_iter, avg_eval_loss)) # print("Training done for fold {}".format(i)) """ 3.3 plot the training process: MET F1 and losses for validation and training dataset """ # plt.figure(0) # plt.title('F1 for TroFI dataset on fold ' + str(i)) # plt.xlabel('iteration (unit:200)') # plt.ylabel('F1') # plt.plot(val_f1, 'g') # # plt.plot(train_f1, 'b') # plt.legend(['Validation F1', 'Training F1'], loc='upper right') # plt.show() # plt.figure(1) # plt.title('Loss for TroFi dataset on fold ' + str(i)) # plt.xlabel('iteration (unit:200)') # plt.ylabel('Loss') # plt.plot(val_loss, 'g') # # plt.plot(train_loss, 'b') # plt.legend(['Validation loss', 'Training loss'], loc='upper right') # plt.show() """ store the best f1 """ # print('val_f1: ', val_f1) idx = 0 if math.isnan(max(val_f1)): optimal_f1s.append(max(val_f1[6:])) idx = val_f1.index(optimal_f1s[-1]) optimal_ps.append(val_p[idx]) optimal_rs.append(val_r[idx]) optimal_accs.append(val_acc[idx]) else: optimal_f1s.append(max(val_f1)) idx = val_f1.index(optimal_f1s[-1]) optimal_ps.append(val_p[idx]) optimal_rs.append(val_r[idx]) optimal_accs.append(val_acc[idx]) """ print out the performance plot the performance on each fold """ # print('F1 on TroFi by 10-fold = ', optimal_f1s) # print('Precision on TroFi = ', np.mean(np.array(optimal_ps))) # print('Recall on TroFi = ', np.mean(np.array(optimal_rs))) # print('F1 on TroFi = ', np.mean(np.array(optimal_f1s))) # print('Accuracy on TroFi = ', np.mean(np.array(optimal_accs))) return optimal_f1s, np.mean(np.array(optimal_ps)), np.mean( np.array(optimal_rs)), np.mean(np.array(optimal_f1s)), np.mean( np.array(optimal_accs))