def train(dim, args): import torch from torch import nn, optim import numpy as np from features import ExtractWordEmbeddings from preprocess_data import batchify, padBatch from models.lstm import LSTMClassifier from sklearn.utils import shuffle # hyperparameters embedding_dim = 300 # changes only with different word embeddings hidden_dim = args.hidden_dim max_epochs = args.max_epochs is_cuda = True batch_size = 60 lr = args.lr n_decreases = 10 save_dir = 'weights/LSTM/%s' % dim if not os.path.exists(save_dir): os.makedirs(save_dir) """ Loading train / validation datasets X_tr: a list of tokenized sentences y_tr: a list of 0 and 1 """ X_tr, y_tr = loadDatasetForLSTM(dim, 'train') # a list of tokenized sentences X_d, y_d = loadDatasetForLSTM(dim, 'dev') # load model and settings for training model = LSTMClassifier(embedding_dim=embedding_dim, hidden_dim=hidden_dim) if is_cuda: model.cuda() optimizer = optim.AdamW(model.parameters(), lr=lr) flag = True old_val = np.inf # previous validation error em = ExtractWordEmbeddings(emb_type='glove') loss_fn = nn.BCELoss() # train model epoch = 0 cnt_decrease = 0 while (flag): tr_loss = 0.0 epoch += 1 if (epoch > max_epochs) | (cnt_decrease > n_decreases): break # train model.train() # for each iteration, shuffles X_tr and y_tr and puts them into batches X_tr, y_tr = shuffle(X_tr, y_tr) tr_batches = batchify(X_tr, y_tr, batch_size) for X_b, y_b in tr_batches: # X_b is still a list of tokenized sentences (list of list of words) optimizer.zero_grad() """ obtain_vectors_from_sentence(sent=list of words, include_unk=True) : changes each word into an embedding, and returns a list of embeddings padBatch(list of embedding lists, max_seq=None) : for each batch, returns a tensor fixed to the max size, applies zero padding """ inputs = torch.tensor( padBatch([ em.obtain_vectors_from_sentence(sent, True) for sent in X_b ])).float() # here, inputs become a tensor of shape (B * seq_len * dim) targets = torch.tensor(y_b, dtype=torch.float32) if is_cuda: inputs, targets = inputs.cuda(), targets.cuda() outputs = model(inputs) loss = loss_fn(outputs, targets) # error here loss.backward() tr_loss += loss.item() optimizer.step() print("[Epoch %d] train loss: %1.3f" % (epoch, tr_loss)) # validate model.eval() current_loss = 0.0 X_d, y_d = shuffle(X_d, y_d) val_batches = batchify(X_d, y_d, batch_size) with torch.no_grad(): for X_b, y_b in val_batches: inputs = torch.tensor( padBatch([ em.obtain_vectors_from_sentence(sent, True) for sent in X_b ])).float() targets = torch.tensor(y_b, dtype=torch.float32) if is_cuda: inputs, targets = inputs.cuda(), targets.cuda() outputs = model(inputs) loss = loss_fn(outputs, targets) # error here current_loss += loss.item() print("[Epoch %d] validation loss: %1.3f" % (epoch, current_loss)) if current_loss < old_val: # if current round is better than the previous round best_state = model.state_dict() # save this model torch.save(best_state, join(save_dir, 'best-weights.pth')) print("Updated model") old_val = current_loss cnt_decrease = 0 else: # if the current round is doing worse cnt_decrease += 1 if cnt_decrease >= n_decreases: flag = False return
else: train_with_teacher_logits(trainloader, student, target_logist_list, Config.temp, optimizer, scheduler=scheduler, device=device) # evaluate it valid_acc = evalation(validloader, student, device=device, tag="Valid") test_acc = evalation(testloader, student, device=device, tag="Test") if valid_acc > best_valid_acc: best_valid_acc = valid_acc best_student = student.state_dict() corresp_test_acc = test_acc saving_dict = { 'epoch': epoch, 'state_dict': best_student, 'best_valid_acc': best_valid_acc, 'corresp_test_acc': corresp_test_acc } torch.save( saving_dict, Config.student_tar_fmt.format(plbl=Config.part_labeled, temp=Config.temp)) print("Finish training student!")
# evaluate on cpu predictions = np.array(predictions.cpu()) all_predictions = np.concatenate((all_predictions, predictions)) # Get results preprocess.plot_confusion_matrix(all_targets, all_predictions, classes=list(labels_dict.keys()), epoch=e, model_code=conf['code']) performance = preprocess.evaluate(all_targets, all_predictions) writer.add_scalars('metrics/performance', performance, iter) print('Test A: {acc} | P: {precision} | R: {recall} | F: {f1}\n\n'. format(**performance)) if performance['f1'] > best_f1: best_f1 = performance['f1'] # save model and results torch.save( { 'model': model.state_dict(), 'optimizer': optimizer.state_dict() }, 'saved_models/{}_{}_best_model.pth'.format( conf['code'], conf['operation'])) with open( 'saved_models/{}_{}_best_performance.pkl'.format( conf['code'], conf['operation']), 'wb') as f: pickle.dump(performance, f) writer.close()