def train(opt): if torch.cuda.is_available(): torch.cuda.manual_seed(123) else: torch.manual_seed(123) #os.chdir("/data2/xuhuizh/graphM_project/HAMN") output_file = open(opt.saved_path + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_params = { "batch_size": opt.batch_size, "shuffle": True, "drop_last": True } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False } if opt.tune == 1: print('Fine tune the word embedding') freeze = False else: freeze = True if opt.max_sent_words == "5,5": max_word_length, max_sent_length = get_max_lengths(opt.train_set) else: max_word_length, max_sent_length = [ int(x) for x in opt.max_sent_words.split(',') ] print(max_word_length, max_sent_length, flush=True) training_set = MyDataset(opt.train_set, opt.word2vec_path, max_sent_length, max_word_length) training_generator = DataLoader(training_set, **training_params) test_set = MyDataset(opt.test_set, opt.word2vec_path, max_sent_length, max_word_length) test_generator = DataLoader(test_set, **test_params) if opt.graph == 1: print('use graph model') model = HierGraphNet(opt.word_hidden_size, opt.sent_hidden_size, opt.batch_size, freeze, opt.word2vec_path, max_sent_length, max_word_length) elif opt.graph == 2: print('use deep graph model') model = DHierGraphNet(opt.word_hidden_size, opt.sent_hidden_size, opt.batch_size, freeze, opt.word2vec_path, max_sent_length, max_word_length) else: model = HierAttNet(opt.word_hidden_size, opt.sent_hidden_size, opt.batch_size, freeze, opt.word2vec_path, max_sent_length, max_word_length) # writer.add_graph(model, torch.zeros(opt.batch_size, max_sent_length, max_word_length)) if torch.cuda.is_available(): model.cuda() #m = nn.Sigmoid() #criterion = nn.CosineEmbeddingLoss() criterion = nn.BCELoss() optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epoches): start_time = time.time() loss_ls = [] te_label_ls = [] te_pred_ls = [] for iter, (feature1, feature2, label, pos) in enumerate(training_generator): num_sample = len(label) if torch.cuda.is_available(): feature1 = feature1.cuda() feature2 = feature2.cuda() label = label.float().cuda() #print(label.shape) #print(feature1) #print(feature2) optimizer.zero_grad() model._init_hidden_state() predictions = model(feature1, feature2) #print(label) #print(predictions) #cosine: #loss = criterion(output_1, output_2, label) #BCE: #print(predictions) #print(label) loss = criterion(predictions, label) loss.backward() optimizer.step() training_metrics = get_evaluation( label.cpu().numpy(), predictions.cpu().detach().numpy(), list_metrics=["accuracy"]) print( "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}" .format(epoch + 1, opt.num_epoches, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"]), flush=True) print("--- %s seconds ---" % (time.time() - start_time)) start_time = time.time() # writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter) # writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter) loss_ls.append(loss * num_sample) te_label_ls.extend(label.clone().cpu()) te_pred_ls.append(predictions.clone().cpu()) sum_all = 0 sum_updated = 0 ''' for name, param in model.named_parameters(): print('All parameters') print(name,torch.numel(param.data)) sum_all += torch.numel(param.data) if param.requires_grad: print('Updated parameters:') print(name,torch.numel(param.data)) sum_updated+= torch.numel(param.data) print('all', sum_all) print('update', sum_updated) ''' #print total train loss te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation( te_label, te_pred.detach().numpy(), list_metrics=["accuracy", "confusion_matrix"]) output_file.write( "Epoch: {}/{} \nTrain loss: {} Train accuracy: {} \nTrain confusion matrix: \n{}\n\n" .format(epoch + 1, opt.num_epoches, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) if epoch % opt.test_interval == 0: model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for te_feature1, te_feature2, te_label, cite_pos in test_generator: num_sample = len(te_label) #print(num_sample) if torch.cuda.is_available(): te_feature1 = te_feature1.cuda() te_feature2 = te_feature2.cuda() te_label = te_label.float().cuda() with torch.no_grad(): model._init_hidden_state(num_sample) te_predictions = model(te_feature1, te_feature2) te_predictions = te_predictions[-1] te_loss = criterion(te_predictions, te_label) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation( te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"]) output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n" .format(epoch + 1, opt.num_epoches, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) for name, param in model.named_parameters(): if param.requires_grad: if name == 'fd.weight': print(name, param.data) #writer.add_scalar('Test/Loss', te_loss, epoch) #writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch) model.train() if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch torch.save(model, opt.saved_path + os.sep + opt.model_name) torch.save(model.state_dict(), opt.saved_path + os.sep + opt.model_name + '.pth') # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, te_loss)) break
def test(opt): test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False } if os.path.isdir(opt.output): shutil.rmtree(opt.output) os.makedirs(opt.output) if torch.cuda.is_available(): model = torch.load(opt.model_path + os.sep + opt.model_type + "_model") else: model = torch.load(opt.model_path + os.sep + opt.model_type + "_model", map_location=lambda storage, loc: storage) max_news_length, max_sent_length, max_word_length = get_max_lengths( opt.train_set) stock_length = 9 test_set = MyDataset(data_path=opt.test_set, dict_path=opt.word2vec_path, max_news_length=max_news_length, max_sent_length=max_sent_length, max_word_length=max_word_length, days_num=opt.days_num, stock_length=stock_length) test_generator = DataLoader(test_set, **test_params) if torch.cuda.is_available(): model.cuda() model.eval() te_label_ls = [] te_pred_ls = [] for te_days_news, te_days_stock, te_label in test_generator: num_sample = len(te_label) if torch.cuda.is_available(): te_feature = te_days_news.cuda() te_days_stock = te_days_stock.cuda() te_label = te_label.cuda() with torch.no_grad(): if opt.model_type in [ "ori_han", "sent_ori_han", "muil_han", "sent_muil_han" ]: te_predictions = model(te_days_news) elif opt.model_type in ["muil_stock_han", "sent_muil_stock_han"]: te_predictions = model(te_days_news, te_days_stock) te_predictions = F.softmax(te_predictions) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_pred = torch.cat(te_pred_ls, 0).numpy() te_label = np.array(te_label_ls) fieldnames = ['True label', 'Predicted label', 'Content'] with open(opt.output + os.sep + "predictions.csv", 'w') as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames, quoting=csv.QUOTE_NONNUMERIC) writer.writeheader() for i, j, k in zip(te_label, te_pred, test_set.newses, test_set.stocks): writer.writerow({ 'True label': i + 1, 'Predicted label': np.argmax(j) + 1, 'Content': k }) test_metrics = get_evaluation( te_label, te_pred, list_metrics=["accuracy", "loss", "confusion_matrix"]) print("Prediction:\nLoss: {} Accuracy: {} \nConfusion matrix: \n{}".format( test_metrics["loss"], test_metrics["accuracy"], test_metrics["confusion_matrix"]))
def train(opt, train_data_path, test_data_path, valid_data_path): task = opt.task_name if torch.cuda.is_available(): torch.cuda.manual_seed(2019) else: torch.manual_seed(2019) output_file = open(opt.saved_path + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": True } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False } max_word_length, max_sent_length = get_max_lengths(opt.input_path) print("Max words: ", max_word_length, "Max sents: ", max_sent_length) #df_data = pd.read_csv(data_path, encoding='utf8', sep='\t') #df_data = df_data.sample(frac=1, random_state=2019) #print(df_data.shape) #y = df_data.readability.values #kf = model_selection.StratifiedKFold(n_splits=5) predicted_all_folds = [] true_all_folds = [] counter = 0 accuracies_all_folds = [] precision_all_folds = [] recall_all_folds = [] f1_all_folds = [] qwk_all_folds = [] #if os.path.exists(opt.vocab_path): # os.remove(opt.vocab_path) df_train = pd.read_csv(train_data_path, encoding='utf8', sep='\t') df_test = pd.read_csv(test_data_path, encoding='utf8', sep='\t') df_valid = pd.read_csv(valid_data_path, encoding='utf8', sep='\t') training_set = MyDataset(df_train, opt.vocab_path, task, max_sent_length, max_word_length) training_generator = DataLoader(training_set, **training_params) test_set = MyDataset(df_test, opt.vocab_path, task, max_sent_length, max_word_length) test_generator = DataLoader(test_set, **test_params) valid_set = MyDataset(df_valid, opt.vocab_path, task, max_sent_length, max_word_length) valid_generator = DataLoader(valid_set, **test_params) model = HierAttNet(opt.word_hidden_size, opt.sent_hidden_size, opt.batch_size, training_set.num_classes, opt.vocab_path, max_sent_length, max_word_length) if torch.cuda.is_available(): model.cuda() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) best_loss = 1e5 best_epoch = 0 num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epoches): model.train() for iter, (feature, label) in enumerate(training_generator): if torch.cuda.is_available(): feature = feature.cuda() label = label.cuda() optimizer.zero_grad() model._init_hidden_state() predictions = model(feature) loss = criterion(predictions, label) loss.backward() optimizer.step() #`clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), 0.25) training_metrics = get_evaluation( label.cpu().numpy(), predictions.cpu().detach().numpy(), list_metrics=["accuracy"]) print( "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}" .format(epoch + 1, opt.num_epoches, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"])) if epoch % opt.test_interval == 0: model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for te_feature, te_label in valid_generator: num_sample = len(te_label) if torch.cuda.is_available(): te_feature = te_feature.cuda() te_label = te_label.cuda() with torch.no_grad(): model._init_hidden_state(num_sample) te_predictions = model(te_feature) te_loss = criterion(te_predictions, te_label) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation( te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix", "qwk"]) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch print('Saving model') torch.save(model, opt.saved_path + os.sep + "whole_model_han") # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, best_loss)) break print() print('Evaluation: ') print() model.eval() model = torch.load(opt.saved_path + os.sep + "whole_model_han") loss_ls = [] te_label_ls = [] te_pred_ls = [] for te_feature, te_label in test_generator: num_sample = len(te_label) if torch.cuda.is_available(): te_feature = te_feature.cuda() te_label = te_label.cuda() with torch.no_grad(): model._init_hidden_state(num_sample) te_predictions = model(te_feature) te_loss = criterion(te_predictions, te_label) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation(te_label, te_pred.numpy(), list_metrics=[ "accuracy", "precision", "recall", "f1", "confusion_matrix", 'qwk' ]) true = te_label preds = np.argmax(te_pred.numpy(), -1) predicted_all_folds.extend(preds) true_all_folds.extend(true) print("Test set accuracy: {}".format(test_metrics["accuracy"])) print("Test set precision: {}".format(test_metrics["precision"])) print("Test set recall: {}".format(test_metrics["recall"])) print("Test set f1: {}".format(test_metrics["f1"])) print("Test set cm: {}".format(test_metrics["confusion_matrix"])) print("Test set qwk: {}".format(test_metrics["qwk"])) accuracies_all_folds.append(test_metrics["accuracy"]) precision_all_folds.append(test_metrics["precision"]) recall_all_folds.append(test_metrics["recall"]) f1_all_folds.append(test_metrics["f1"]) qwk_all_folds.append(test_metrics["qwk"]) print() #if task in ['newsela', 'merlin', 'capito', 'apa']: # break print() print("Task: ", task) print("Accuracy: ", accuracy_score(true_all_folds, predicted_all_folds)) print( "Precison: ", precision_score(true_all_folds, predicted_all_folds, average="weighted")) print( "Recall: ", recall_score(true_all_folds, predicted_all_folds, average="weighted")) print("F1: ", f1_score(true_all_folds, predicted_all_folds, average="weighted")) print('Confusion matrix: ', confusion_matrix(true_all_folds, predicted_all_folds)) print( 'QWK: ', cohen_kappa_score(true_all_folds, predicted_all_folds, weights="quadratic")) print('All folds accuracy: ', accuracies_all_folds) print('All folds precision: ', precision_all_folds) print('All folds recall: ', recall_all_folds) print('All folds f1: ', f1_all_folds) print('All folds QWK: ', qwk_all_folds)
def train(opt): if torch.cuda.is_available(): torch.cuda.manual_seed(123) else: torch.manual_seed(123) output_file = open(opt.saved_path + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_params = { "batch_size": opt.batch_size, "shuffle": True, "drop_last": True } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False } max_word_length, max_sent_length = get_max_lengths(opt.train_set) training_set = MyDataset(opt.train_set, opt.word2vec_path, max_sent_length, max_word_length) training_generator = DataLoader(training_set, **training_params) test_set = MyDataset(opt.test_set, opt.word2vec_path, max_sent_length, max_word_length) test_generator = DataLoader(test_set, **test_params) model = HierAttNet(opt.word_hidden_size, opt.sent_hidden_size, opt.batch_size, training_set.num_classes, opt.word2vec_path, max_sent_length, max_word_length) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) writer = SummaryWriter(opt.log_path) # writer.add_graph(model, torch.zeros(opt.batch_size, max_sent_length, max_word_length)) if torch.cuda.is_available(): model.cuda() criterion = nn.MSELoss() optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr, momentum=opt.momentum) # optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epoches): for iter, (feature1, feature2, label) in enumerate(training_generator): if torch.cuda.is_available(): feature1 = feature1.cuda() feature2 = feature2.cuda() label = label.cuda() optimizer.zero_grad() model._init_hidden_state() feature1 = model(feature1) model._init_hidden_state() feature2 = model(feature2) diff = exponent_neg_manhattan_distance(feature1, feature2) loss = criterion(diff, label.float()) loss.backward() optimizer.step() training_metrics = get_evaluation(label.cpu().numpy(), diff.cpu().detach().numpy(), list_metrics=["accuracy"]) print( "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}" .format(epoch + 1, opt.num_epoches, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"])) writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter) # torch.save(model, opt.saved_path + os.sep + "whole_model_han_{}".format(epoch)) if epoch % opt.test_interval == 0: model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for te_feature1, te_feature2, te_label in test_generator: num_sample = len(te_label) if torch.cuda.is_available(): te_feature1 = te_feature1.cuda() te_feature2 = te_feature2.cuda() te_label = te_label.cuda() with torch.no_grad(): model._init_hidden_state(num_sample) te_feature1 = model(te_feature1) model._init_hidden_state(num_sample) te_feature2 = model(te_feature2) te_diff = exponent_neg_manhattan_distance( te_feature1, te_feature2) te_loss = criterion(te_diff, te_label.float()) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_diff.clone().cpu()) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation( te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"]) output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n" .format(epoch + 1, opt.num_epoches, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) writer.add_scalar('Test/Loss', te_loss, epoch) writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch) model.train() if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch torch.save(model, opt.saved_path + os.sep + "whole_model_han") # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, te_loss)) break