def train_model(): rnn_clf = RNNSequenceClassifier(num_classes=2, embedding_dim=300 + 1024 + 50, hidden_size=300, num_layers=1, bidir=True, dropout1=0.3, dropout2=0.2, dropout3=0.2) # Move the model to the GPU if available if using_GPU: rnn_clf = rnn_clf.cuda() # Set up criterion for calculating loss nll_criterion = nn.NLLLoss() # Set up an optimizer for updating the parameters of the rnn_clf rnn_clf_optimizer = optim.SGD(rnn_clf.parameters(), lr=0.01, momentum=0.9) # Number of epochs (passes through the dataset) to train the model for. num_epochs = 20 ''' 3. 2 train model ''' training_loss = [] val_loss = [] training_f1 = [] val_f1 = [] # A counter for the number of gradient updates num_iter = 0 for epoch in tqdm(range(num_epochs)): # print("Starting epoch {}".format(epoch + 1)) for (example_text, example_lengths, labels) in train_dataloader_vua: example_text = Variable(example_text) example_lengths = Variable(example_lengths) labels = Variable(labels) if using_GPU: example_text = example_text.cuda() example_lengths = example_lengths.cuda() labels = labels.cuda() # predicted shape: (batch_size, 2) predicted = rnn_clf(example_text, example_lengths) batch_loss = nll_criterion(predicted, labels) rnn_clf_optimizer.zero_grad() batch_loss.backward() rnn_clf_optimizer.step() num_iter += 1 # Calculate validation and training set loss and accuracy every 200 gradient updates if num_iter % 200 == 0: avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate( val_dataloader_vua, rnn_clf, nll_criterion, using_GPU) val_loss.append(avg_eval_loss) val_f1.append(f1) print( "Iteration {}. Validation Loss {}. Accuracy {}. Precision {}. Recall {}. F1 {}. class-wise F1 {}." .format(num_iter, avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1)) filename = f'../models/classification/VUA_iter_{str(num_iter)}.pt' torch.save(rnn_clf.state_dict(), filename) # print("Training done!") return rnn_clf, nll_criterion
set up model, loss criterion, optimizer ''' # Instantiate the model # embedding_dim = glove + elmo + suffix indicator # dropout1: dropout on input to RNN # dropout2: dropout in RNN; would be used if num_layers!=1 # dropout3: dropout on hidden state of RNN to linear layer rnn_clf = RNNSequenceClassifier(num_classes=2, embedding_dim=300+1024+50, hidden_size=300, num_layers=1, bidir=True, dropout1=0.2, dropout2=0, dropout3=0.2) # Move the model to the GPU if available if using_GPU: rnn_clf = rnn_clf.cuda() # Set up criterion for calculating loss nll_criterion = nn.NLLLoss() # Set up an optimizer for updating the parameters of the rnn_clf rnn_clf_optimizer = optim.SGD(rnn_clf.parameters(), lr=0.02, momentum=0.9) # Number of epochs (passes through the dataset) to train the model for. num_epochs = 30 ''' 3. 2 train model ''' training_loss = [] val_loss = [] training_f1 = [] val_f1 = [] # A counter for the number of gradient updates num_iter = 0 train_dataloader = train_dataloader_mohX val_dataloader = val_dataloader_mohX
rnn_clf = RNNSequenceClassifier(num_classes=2, embedding_dim=350, hidden_size=300, num_layers=1, bidir=True, dropout1=0.2, dropout2=0, dropout3=0) # Move the model to the GPU if available if using_GPU: rnn_clf = rnn_clf.cuda() # Set up criterion for calculating loss nll_criterion = nn.NLLLoss() # Set up an optimizer for updating the parameters of the rnn_clf rnn_clf_optimizer = optim.Adam(rnn_clf.parameters(), lr=0.001) # Number of epochs (passes through the dataset) to train the model for. num_epochs = 40 ''' 3. 2 train model ''' training_loss = [] val_loss = [] training_f1 = [] val_f1 = [] val_p = [] val_r = [] val_acc = [] # A counter for the number of gradient updates num_iter = 0
def train_model(): optimal_f1s = [] optimal_ps = [] optimal_rs = [] optimal_accs = [] # predictions_all = [] for i in tqdm(range(10)): ''' 2. 3 set up Dataloader for batching ''' training_sentences = [] training_labels = [] for j in range(10): if j != i: training_sentences.extend(ten_folds[j][0]) training_labels.extend(ten_folds[j][1]) training_dataset_trofi = TextDataset(training_sentences, training_labels) val_dataset_trofi = TextDataset(ten_folds[i][0], ten_folds[i][1]) # Data-related hyperparameters batch_size = 10 # Set up a DataLoader for the training, validation, and test dataset train_dataloader_trofi = DataLoader(dataset=training_dataset_trofi, batch_size=batch_size, shuffle=True, collate_fn=TextDataset.collate_fn) val_dataloader_trofi = DataLoader(dataset=val_dataset_trofi, batch_size=batch_size, shuffle=False, collate_fn=TextDataset.collate_fn) """ 3. Model training """ ''' 3. 1 set up model, loss criterion, optimizer ''' # Instantiate the model # embedding_dim = glove + elmo + suffix indicator # dropout1: dropout on input to RNN # dropout2: dropout in RNN; would be used if num_layers=1 # dropout3: dropout on hidden state of RNN to linear layer rnn_clf = RNNSequenceClassifier(num_classes=2, embedding_dim=300+1024+50, hidden_size=300, num_layers=1, bidir=True, dropout1=0.2, dropout2=0, dropout3=0) # Move the model to the GPU if available if using_GPU: rnn_clf = rnn_clf.cuda() # Set up criterion for calculating loss nll_criterion = nn.NLLLoss() # Set up an optimizer for updating the parameters of the rnn_clf rnn_clf_optimizer = optim.Adam(rnn_clf.parameters(), lr=0.001) # Number of epochs (passes through the dataset) to train the model for. num_epochs = 15 ''' 3. 2 train model ''' training_loss = [] val_loss = [] training_f1 = [] val_f1 = [] val_p = [] val_r = [] val_acc = [] # A counter for the number of gradient updates num_iter = 0 train_dataloader = train_dataloader_trofi val_dataloader = val_dataloader_trofi model_index = 0 for epoch in range(num_epochs): # print("Starting epoch {}".format(epoch + 1)) for (example_text, example_lengths, labels) in train_dataloader: example_text = Variable(example_text) example_lengths = Variable(example_lengths) labels = Variable(labels) if using_GPU: example_text = example_text.cuda() example_lengths = example_lengths.cuda() labels = labels.cuda() # predicted shape: (batch_size, 2) predicted = rnn_clf(example_text, example_lengths) batch_loss = nll_criterion(predicted, labels) rnn_clf_optimizer.zero_grad() batch_loss.backward() rnn_clf_optimizer.step() num_iter += 1 # Calculate validation and training set loss and accuracy every 200 gradient updates if num_iter % 200 == 0: avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate(val_dataloader, rnn_clf, nll_criterion, using_GPU) val_loss.append(avg_eval_loss) val_f1.append(f1) val_p.append(precision) val_r.append(recall) val_acc.append(eval_accuracy.item()) # print( # "Iteration {}. Validation Loss {}. Validation Accuracy {}. Validation Precision {}. Validation Recall {}. Validation F1 {}. Validation class-wise F1 {}.".format( # num_iter, avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1)) # filename = f'../models/classification/TroFi_fold_{str(i)}_iter_{str(num_iter)}.pt' # torch.save(rnn_clf, filename) model_index += 1 # avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate(train_dataloader, rnn_clf, nll_criterion, using_GPU) # training_loss.append(avg_eval_loss) # training_f1.append(f1) # print( # "Iteration {}. Training Loss {}. Training Accuracy {}. Training Precision {}. Training Recall {}. Training F1 {}. Training class-wise F1 {}.".format( # num_iter, avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1)) """ additional trianing! """ # rnn_clf_optimizer = optim.Adam(rnn_clf.parameters(), lr=0.0005) # for epoch in range(num_epochs): # print("Starting epoch {}".format(epoch + 1)) # for (example_text, example_lengths, labels) in train_dataloader: # example_text = Variable(example_text) # example_lengths = Variable(example_lengths) # labels = Variable(labels) # if using_GPU: # example_text = example_text.cuda() # example_lengths = example_lengths.cuda() # labels = labels.cuda() # # predicted shape: (batch_size, 2) # predicted = rnn_clf(example_text, example_lengths) # batch_loss = nll_criterion(predicted, labels) # rnn_clf_optimizer.zero_grad() # batch_loss.backward() # rnn_clf_optimizer.step() # num_iter += 1 # # Calculate validation and training set loss and accuracy every 200 gradient updates # if num_iter % 100 == 0: # avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate(val_dataloader, rnn_clf, nll_criterion, using_GPU) # val_loss.append(avg_eval_loss) # val_f1.append(f1) # val_p.append(precision) # val_r.append(recall) # val_acc.append(eval_accuracy) # print( # "Iteration {}. Validation Loss {}. Validation Accuracy {}. Validation Precision {}. Validation Recall {}. Validation F1 {}. Validation class-wise F1 {}.".format( # num_iter, avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1)) # model_index += 1 # print("Training done for fold {}".format(i)) """ 3.3 plot the training process: MET F1 and losses for validation and training dataset """ # plt.figure(0) # plt.title('F1 for TroFI dataset on fold ' + str(i)) # plt.xlabel('iteration (unit:200)') # plt.ylabel('F1') # plt.plot(val_f1,'g') # plt.plot(val_p,'r') # plt.plot(val_r,'b') # plt.plot(val_acc,'c') # plt.plot(training_f1, 'b') # plt.legend(['Validation F1', 'Validation precision', 'validaiton recall', 'validation accuracy', 'Training F1'], loc='upper right') # plt.show() # plt.figure(1) # plt.title('Loss for TroFi dataset on fold ' + str(i)) # plt.xlabel('iteration (unit:200)') # plt.ylabel('Loss') # plt.plot(val_loss,'g') # plt.plot(training_loss, 'b') # plt.legend(['Validation loss', 'Training loss'], loc='upper right') # plt.show() """ store the best f1 """ # print('val_f1: ', val_f1) idx = 0 if math.isnan(max(val_f1)): optimal_f1s.append(max(val_f1[6:])) idx = val_f1.index(optimal_f1s[-1]) optimal_ps.append(val_p[idx]) optimal_rs.append(val_r[idx]) optimal_accs.append(val_acc[idx]) else: optimal_f1s.append(max(val_f1)) idx = val_f1.index(optimal_f1s[-1]) optimal_ps.append(val_p[idx]) optimal_rs.append(val_r[idx]) optimal_accs.append(val_acc[idx]) # filename = '../models/LSTMSuffixElmoAtt_TroFi_fold_' + str(i) + '_epoch_' + str(idx) + '.pt' # temp_model = torch.load(filename) # print('best model: ', filename) # predictions_all.extend(test(val_dataloader_TroFi, temp_model, using_GPU)) return np.mean(np.array(optimal_ps)), np.mean(np.array(optimal_rs)), np.mean(np.array(optimal_f1s)), np.mean(np.array(optimal_accs))
def train_model(): optimal_f1s = [] optimal_ps = [] optimal_rs = [] optimal_accs = [] for i in tqdm(range(10)): ''' 2. 3 set up Dataloader for batching ''' training_sentences = [] training_labels = [] for j in range(10): if j != i: training_sentences.extend(ten_folds[j][0]) training_labels.extend(ten_folds[j][1]) training_dataset_mohX = TextDataset(training_sentences, training_labels) val_dataset_mohX = TextDataset(ten_folds[i][0], ten_folds[i][1]) # Data-related hyperparameters batch_size = 10 # Set up a DataLoader for the training, validation, and test dataset train_dataloader_mohX = DataLoader(dataset=training_dataset_mohX, batch_size=batch_size, shuffle=True, collate_fn=TextDataset.collate_fn) val_dataloader_mohX = DataLoader(dataset=val_dataset_mohX, batch_size=batch_size, shuffle=True, collate_fn=TextDataset.collate_fn) """ 3. Model training """ ''' 3. 1 set up model, loss criterion, optimizer ''' # Instantiate the model # embedding_dim = glove + elmo + suffix indicator # dropout1: dropout on input to RNN # dropout2: dropout in RNN; would be used if num_layers!=1 # dropout3: dropout on hidden state of RNN to linear layer rnn_clf = RNNSequenceClassifier(num_classes=2, embedding_dim=300+1024+50, hidden_size=300, num_layers=1, bidir=True, dropout1=0.2, dropout2=0, dropout3=0.2) # Move the model to the GPU if available if using_GPU: rnn_clf = rnn_clf.cuda() # Set up criterion for calculating loss nll_criterion = nn.NLLLoss() # Set up an optimizer for updating the parameters of the rnn_clf rnn_clf_optimizer = optim.SGD(rnn_clf.parameters(), lr=0.02, momentum=0.9) # Number of epochs (passes through the dataset) to train the model for. num_epochs = 30 ''' 3. 2 train model ''' training_loss = [] val_loss = [] training_f1 = [] val_f1 = [] val_p = [] val_r = [] val_acc = [] # A counter for the number of gradient updates num_iter = 0 train_dataloader = train_dataloader_mohX val_dataloader = val_dataloader_mohX for epoch in range(num_epochs): # print("Starting epoch {}".format(epoch + 1)) for (example_text, example_lengths, labels) in train_dataloader: example_text = Variable(example_text) example_lengths = Variable(example_lengths) labels = Variable(labels) if using_GPU: example_text = example_text.cuda() example_lengths = example_lengths.cuda() labels = labels.cuda() # predicted shape: (batch_size, 2) predicted = rnn_clf(example_text, example_lengths) batch_loss = nll_criterion(predicted, labels) rnn_clf_optimizer.zero_grad() batch_loss.backward() rnn_clf_optimizer.step() num_iter += 1 # Calculate validation and training set loss and accuracy every 200 gradient updates if num_iter % 200 == 0: avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate(val_dataloader, rnn_clf, nll_criterion, using_GPU) val_loss.append(avg_eval_loss) val_f1.append(f1) val_p.append(precision) val_r.append(recall) val_acc.append(eval_accuracy.item()) # print( # "Iteration {}. Validation Loss {}. Validation Accuracy {}. Validation Precision {}. Validation Recall {}. Validation F1 {}. Validation class-wise F1 {}.".format( # num_iter, avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1)) # filename = f'../models/classification/MOHX_fold_{str(i)}_iter_{str(num_iter)}.pt' # torch.save(rnn_clf, filename) avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate(train_dataloader, rnn_clf, nll_criterion, using_GPU) training_loss.append(avg_eval_loss) training_f1.append(f1) # print( # "Iteration {}. Training Loss {}. Training Accuracy {}. Training Precision {}. Training Recall {}. Training F1 {}. Training class-wise F1 {}.".format( # num_iter, avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1)) # print("Training done for fold {}".format(i)) # store the best f1 idx = 0 if math.isnan(max(val_f1)): optimal_f1s.append(max(val_f1[6:])) idx = val_f1.index(optimal_f1s[-1]) optimal_ps.append(val_p[idx]) optimal_rs.append(val_r[idx]) optimal_accs.append(val_acc[idx]) else: optimal_f1s.append(max(val_f1)) idx = val_f1.index(optimal_f1s[-1]) optimal_ps.append(val_p[idx]) optimal_rs.append(val_r[idx]) optimal_accs.append(val_acc[idx]) return np.mean(np.array(optimal_ps)), np.mean(np.array(optimal_rs)), np.mean(np.array(optimal_f1s)), np.mean(np.array(optimal_accs)) # print('F1 on MOH-X by 10-fold = ', optimal_f1s) # print('F1 on MOH-X = ', np.mean(np.array(optimal_f1s))) """