def train(opt): if torch.cuda.is_available(): torch.cuda.manual_seed(123) print("cuda...") else: torch.manual_seed(123) # training setting output_file = open(opt.saved_path + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_params = { "batch_size": opt.batch_size, "num_workers": opt.num_workers, "shuffle": True, "pin_memory": True, "drop_last": True } test_params = { "batch_size": opt.batch_size, "num_workers": opt.num_workers, "shuffle": False, "pin_memory": True, "drop_last": False } # training dataset info # max_news_length, max_sent_length, max_word_length = get_max_lengths(opt.train_set) # stock_length = 9 data_init_time = datetime.datetime.now() training_set = MyDataset(data_path=opt.train_set) training_generator = DataLoader(training_set, **training_params) test_set = MyDataset(data_path=opt.test_set) test_generator = DataLoader(test_set, **test_params) data_end_time = datetime.datetime.now() print("the data loading time is: {}s...".format( (data_end_time - data_init_time).seconds)) # model init model_init_time = datetime.datetime.now() if opt.model_type == "ori_han": model = Ori_HAN(days_num=opt.days_num, days_hidden_size=opt.days_hidden_size, news_hidden_size=opt.news_hidden_size, num_classes=training_set.num_classes, pretrained_word2vec_path=opt.word2vec_path, dropout=opt.dropout) elif opt.model_type == "sent_ori_han": model = Sent_Ori_HAN(days_num=opt.days_num, days_hidden_size=opt.days_hidden_size, news_hidden_size=opt.news_hidden_size, sent_hidden_size=opt.sent_hidden_size, num_classes=training_set.num_classes, pretrained_word2vec_path=opt.word2vec_path, dropout=opt.dropout) elif opt.model_type == "muil_han": model = Muil_HAN(head_num=opt.head_num, days_num=opt.days_num, days_hidden_size=opt.days_hidden_size, news_hidden_size=opt.news_hidden_size, num_classes=training_set.num_classes, pretrained_word2vec_path=opt.word2vec_path, dropout=opt.dropout) elif opt.model_type == "sent_muil_han": model = Sent_Muil_HAN(head_num=opt.head_num, days_num=opt.days_num, days_hidden_size=opt.days_hidden_size, news_hidden_size=opt.news_hidden_size, sent_hidden_size=opt.sent_hidden_size, num_classes=training_set.num_classes, pretrained_word2vec_path=opt.word2vec_path, dropout=opt.dropout) elif opt.model_type == "muil_stock_han": model = Muil_Stock_HAN(head_num=opt.head_num, days_num=opt.days_num, days_hidden_size=opt.days_hidden_size, news_hidden_size=opt.news_hidden_size, stock_hidden_size=opt.stock_hidden_size, stock_length=stock_length, num_classes=training_set.num_classes, pretrained_word2vec_path=opt.word2vec_path, dropout=opt.dropout) elif opt.model_type == "sent_muil_stock_han": model = Sent_Muil_Stock_HAN(head_num=opt.head_num, days_num=opt.days_num, days_hidden_size=opt.days_hidden_size, news_hidden_size=opt.news_hidden_size, sent_hidden_size=opt.sent_hidden_size, stock_hidden_size=opt.stock_hidden_size, stock_length=stock_length, num_classes=training_set.num_classes, pretrained_word2vec_path=opt.word2vec_path, dropout=opt.dropout) model_end_time = datetime.datetime.now() print("the model init time is: {}s...".format( (model_end_time - model_init_time).seconds)) # other setting if os.path.isdir(opt.log_path + opt.model_type): shutil.rmtree(opt.log_path + opt.model_type) # 递归删除文件夹下的所有子文件夹 os.makedirs(opt.log_path + opt.model_type) writer = SummaryWriter(opt.log_path + opt.model_type) # 模型训练相关信息初始化 if torch.cuda.is_available(): model.cuda() print("model use cuda...") criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr, momentum=opt.momentum) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) # 训练模型 print("start to train model...") for epoch in range(opt.num_epoches): dataloader_init_time = datetime.datetime.now() for iter, (days_news, days_stock, label) in enumerate(training_generator): dataloader_end_time = datetime.datetime.now() print("the dataloader loading time is: {}s...".format( (dataloader_end_time - dataloader_init_time).seconds)) if torch.cuda.is_available(): days_news = days_news.cuda() days_stock = days_stock.cuda() label = label.cuda() print("data use cuda...") training_init_time = datetime.datetime.now() optimizer.zero_grad() if opt.model_type in [ "ori_han", "sent_ori_han", "muil_han", "sent_muil_han" ]: predictions = model(days_news) elif opt.model_type in ["muil_stock_han", "sent_muil_stock_han"]: predictions = model(days_news, days_stock) loss = criterion(predictions, torch.tensor(label)) loss.backward() optimizer.step() training_metrics = get_evaluation( label.cpu().numpy(), predictions.cpu().detach().numpy(), list_metrics=["accuracy"]) print( "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}" .format(epoch + 1, opt.num_epoches, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"])) writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter) training_end_time = datetime.datetime.now() print("the training time is: {}s...".format( (training_end_time - training_init_time).seconds)) if epoch % opt.test_interval == 0: model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for te_days_news, te_days_stock, te_label in test_generator: num_sample = len(te_label) if torch.cuda.is_available(): te_days_news = te_days_news.cuda() te_days_stock = te_days_stock.cuda() te_label = te_label.cuda() with torch.no_grad(): if opt.model_type in [ "ori_han", "sent_ori_han", "muil_han", "sent_muil_han" ]: te_predictions = model(te_days_news) elif opt.model_type in [ "muil_stock_han", "sent_muil_stock_han" ]: te_predictions = model(te_days_news, te_days_stock) te_loss = criterion(te_predictions, torch.tensor(te_label)) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation( te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"]) output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n" .format(epoch + 1, opt.num_epoches, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) writer.add_scalar('Test/Loss', te_loss, epoch) writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch) model.train() if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch torch.save(model, opt.saved_path + opt.model_type + "_model") # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, te_loss)) break
def train(opt): if torch.cuda.is_available(): torch.cuda.manual_seed(123) else: torch.manual_seed(123) if opt.dataset in ["agnews", "dbpedia", "yelp_review", "yelp_review_polarity", "amazon_review", "amazon_polarity", "sogou_news", "yahoo_answers"]: opt.input, opt.output = get_default_folder(opt.dataset, opt.feature) if not os.path.exists(opt.output): os.makedirs(opt.output) output_file = open(opt.output + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_params = {"batch_size": opt.batch_size, "shuffle": True, "num_workers": 0} test_params = {"batch_size": opt.batch_size, "shuffle": False, "num_workers": 0} training_set = MyDataset(opt.input + os.sep + "train.csv", opt.max_length) test_set = MyDataset(opt.input + os.sep + "test.csv", opt.max_length) training_generator = DataLoader(training_set, **training_params) test_generator = DataLoader(test_set, **test_params) if opt.feature == "small": model = CharacterLevelCNN(input_length=opt.max_length, n_classes=training_set.num_classes, input_dim=len(opt.alphabet), n_conv_filters=256, n_fc_neurons=1024) elif opt.feature == "large": model = CharacterLevelCNN(input_length=opt.max_length, n_classes=training_set.num_classes, input_dim=len(opt.alphabet), n_conv_filters=1024, n_fc_neurons=2048) else: sys.exit("Invalid feature mode!") log_path = "{}_{}_{}".format(opt.log_path, opt.feature, opt.dataset) if os.path.isdir(log_path): shutil.rmtree(log_path) os.makedirs(log_path) writer = SummaryWriter(log_path) if torch.cuda.is_available(): model.cuda() criterion = nn.CrossEntropyLoss() if opt.optimizer == "adam": optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) elif opt.optimizer == "sgd": optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr, momentum=0.9) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epochs): for iter, batch in enumerate(training_generator): feature, label = batch if torch.cuda.is_available(): feature = feature.cuda() label = label.cuda() optimizer.zero_grad() predictions = model(feature) loss = criterion(predictions, label) loss.backward() optimizer.step() training_metrics = get_evaluation(label.cpu().numpy(), predictions.cpu().detach().numpy(), list_metrics=["accuracy"]) print("Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"])) writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter) model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for batch in test_generator: te_feature, te_label = batch num_sample = len(te_label) if torch.cuda.is_available(): te_feature = te_feature.cuda() te_label = te_label.cuda() with torch.no_grad(): te_predictions = model(te_feature) te_loss = criterion(te_predictions, te_label) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation(te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"]) output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n".format( epoch + 1, opt.num_epochs, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epochs, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) writer.add_scalar('Test/Loss', te_loss, epoch) writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch) model.train() if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch torch.save(model, "{}/char-cnn_{}_{}".format(opt.output, opt.dataset, opt.feature)) # Early stopping if epoch - best_epoch > opt.es_patience > 0: print("Stop training at epoch {}. The lowest loss achieved is {} at epoch {}".format(epoch, te_loss, best_epoch)) break if opt.optimizer == "sgd" and epoch % 3 == 0 and epoch > 0: current_lr = optimizer.state_dict()['param_groups'][0]['lr'] current_lr /= 2 for param_group in optimizer.param_groups: param_group['lr'] = current_lr
def train(opt): if torch.cuda.is_available(): torch.cuda.manual_seed(123) else: torch.manual_seed(123) # os.chdir("/data2/xuhuizh/graphM_project/HAMN") output_file = open(opt.saved_path + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_params = { "batch_size": opt.batch_size, "shuffle": True, "drop_last": True } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False } if opt.tune == 1: print('Fine tune the word embedding') freeze = False else: freeze = True if opt.max_sent_words == "5,5": max_word_length, max_sent_length = get_max_lengths(opt.train_set) else: max_word_length, max_sent_length = [ int(x) for x in opt.max_sent_words.split(',') ] print(max_word_length, max_sent_length) training_set = MyDataset(opt.train_set, opt.word2vec_path, max_sent_length, max_word_length) training_generator = DataLoader(training_set, **training_params) test_set = MyDataset(opt.test_set, opt.word2vec_path, max_sent_length, max_word_length) test_generator = DataLoader(test_set, **test_params) if opt.graph == 1: print('use graph model') model = HierGraphNet(opt.word_hidden_size, opt.sent_hidden_size, opt.batch_size, freeze, opt.word2vec_path, max_sent_length, max_word_length) elif opt.graph == 2: print('use deep graph model') model = DHierGraphNet(opt.word_hidden_size, opt.sent_hidden_size, opt.batch_size, freeze, opt.word2vec_path, max_sent_length, max_word_length) else: model = HierAttNet(opt.word_hidden_size, opt.sent_hidden_size, opt.batch_size, freeze, opt.word2vec_path, max_sent_length, max_word_length) # if os.path.isdir(opt.log_path): # shutil.rmtree(opt.log_path) # os.makedirs(opt.log_path) #writer = SummaryWriter(opt.log_path) # writer.add_graph(model, torch.zeros(opt.batch_size, max_sent_length, max_word_length)) if torch.cuda.is_available(): model.cuda() criterion = nn.BCELoss() optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epoches): start_time = time.time() loss_ls = [] te_label_ls = [] te_pred_ls = [] for iter, (feature1, feature2, label) in enumerate(training_generator): num_sample = len(label) if torch.cuda.is_available(): feature1 = feature1.cuda() feature2 = feature2.cuda() label = label.float().cuda() optimizer.zero_grad() model._init_hidden_state() predictions = model(feature1, feature2) loss = criterion(predictions, label) loss.backward() optimizer.step() training_metrics = get_evaluation( label.cpu().numpy(), predictions.cpu().detach().numpy(), list_metrics=["accuracy"]) print( "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}" .format(epoch + 1, opt.num_epoches, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"]), flush=True) print("--- %s seconds ---" % (time.time() - start_time)) start_time = time.time() # writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter) # writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter) loss_ls.append(loss * num_sample) te_label_ls.extend(label.clone().cpu()) te_pred_ls.append(predictions.clone().cpu()) sum_all = 0 sum_updated = 0 ''' for name, param in model.named_parameters(): print('All parameters') print(name,torch.numel(param.data)) sum_all += torch.numel(param.data) if param.requires_grad: print('Updated parameters:') print(name,torch.numel(param.data)) sum_updated+= torch.numel(param.data) print('all', sum_all) print('update', sum_updated) ''' #print total train loss te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation( te_label, te_pred.detach().numpy(), list_metrics=["accuracy", "confusion_matrix"]) output_file.write( "Epoch: {}/{} \nTrain loss: {} Train accuracy: {} \nTrain confusion matrix: \n{}\n\n" .format(epoch + 1, opt.num_epoches, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) if epoch % opt.test_interval == 0: model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for te_feature1, te_feature2, te_label in test_generator: num_sample = len(te_label) #print(num_sample) if torch.cuda.is_available(): te_feature1 = te_feature1.cuda() te_feature2 = te_feature2.cuda() te_label = te_label.float().cuda() with torch.no_grad(): model._init_hidden_state(num_sample) te_predictions = model(te_feature1, te_feature2) te_loss = criterion(te_predictions, te_label) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation( te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"]) output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n" .format(epoch + 1, opt.num_epoches, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) for name, param in model.named_parameters(): if param.requires_grad: if name == 'fd.weight': print(name, param.data) # writer.add_scalar('Test/Loss', te_loss, epoch) # writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch) model.train() if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch torch.save(model, opt.saved_path + os.sep + opt.model_name) logger.info('saved model') # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, te_loss)) break
def train(opt, train_data_path, test_data_path, valid_data_path): task = opt.task_name if torch.cuda.is_available(): torch.cuda.manual_seed(2019) else: torch.manual_seed(2019) output_file = open(opt.saved_path + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": True } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False } max_word_length, max_sent_length = get_max_lengths(opt.input_path) print("Max words: ", max_word_length, "Max sents: ", max_sent_length) #df_data = pd.read_csv(data_path, encoding='utf8', sep='\t') #df_data = df_data.sample(frac=1, random_state=2019) #print(df_data.shape) #y = df_data.readability.values #kf = model_selection.StratifiedKFold(n_splits=5) predicted_all_folds = [] true_all_folds = [] counter = 0 accuracies_all_folds = [] precision_all_folds = [] recall_all_folds = [] f1_all_folds = [] qwk_all_folds = [] #if os.path.exists(opt.vocab_path): # os.remove(opt.vocab_path) df_train = pd.read_csv(train_data_path, encoding='utf8', sep='\t') df_test = pd.read_csv(test_data_path, encoding='utf8', sep='\t') df_valid = pd.read_csv(valid_data_path, encoding='utf8', sep='\t') training_set = MyDataset(df_train, opt.vocab_path, task, max_sent_length, max_word_length) training_generator = DataLoader(training_set, **training_params) test_set = MyDataset(df_test, opt.vocab_path, task, max_sent_length, max_word_length) test_generator = DataLoader(test_set, **test_params) valid_set = MyDataset(df_valid, opt.vocab_path, task, max_sent_length, max_word_length) valid_generator = DataLoader(valid_set, **test_params) model = HierAttNet(opt.word_hidden_size, opt.sent_hidden_size, opt.batch_size, training_set.num_classes, opt.vocab_path, max_sent_length, max_word_length) if torch.cuda.is_available(): model.cuda() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) best_loss = 1e5 best_epoch = 0 num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epoches): model.train() for iter, (feature, label) in enumerate(training_generator): if torch.cuda.is_available(): feature = feature.cuda() label = label.cuda() optimizer.zero_grad() model._init_hidden_state() predictions = model(feature) loss = criterion(predictions, label) loss.backward() optimizer.step() #`clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), 0.25) training_metrics = get_evaluation( label.cpu().numpy(), predictions.cpu().detach().numpy(), list_metrics=["accuracy"]) print( "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}" .format(epoch + 1, opt.num_epoches, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"])) if epoch % opt.test_interval == 0: model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for te_feature, te_label in valid_generator: num_sample = len(te_label) if torch.cuda.is_available(): te_feature = te_feature.cuda() te_label = te_label.cuda() with torch.no_grad(): model._init_hidden_state(num_sample) te_predictions = model(te_feature) te_loss = criterion(te_predictions, te_label) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation( te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix", "qwk"]) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch print('Saving model') torch.save(model, opt.saved_path + os.sep + "whole_model_han") # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, best_loss)) break print() print('Evaluation: ') print() model.eval() model = torch.load(opt.saved_path + os.sep + "whole_model_han") loss_ls = [] te_label_ls = [] te_pred_ls = [] for te_feature, te_label in test_generator: num_sample = len(te_label) if torch.cuda.is_available(): te_feature = te_feature.cuda() te_label = te_label.cuda() with torch.no_grad(): model._init_hidden_state(num_sample) te_predictions = model(te_feature) te_loss = criterion(te_predictions, te_label) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation(te_label, te_pred.numpy(), list_metrics=[ "accuracy", "precision", "recall", "f1", "confusion_matrix", 'qwk' ]) true = te_label preds = np.argmax(te_pred.numpy(), -1) predicted_all_folds.extend(preds) true_all_folds.extend(true) print("Test set accuracy: {}".format(test_metrics["accuracy"])) print("Test set precision: {}".format(test_metrics["precision"])) print("Test set recall: {}".format(test_metrics["recall"])) print("Test set f1: {}".format(test_metrics["f1"])) print("Test set cm: {}".format(test_metrics["confusion_matrix"])) print("Test set qwk: {}".format(test_metrics["qwk"])) accuracies_all_folds.append(test_metrics["accuracy"]) precision_all_folds.append(test_metrics["precision"]) recall_all_folds.append(test_metrics["recall"]) f1_all_folds.append(test_metrics["f1"]) qwk_all_folds.append(test_metrics["qwk"]) print() #if task in ['newsela', 'merlin', 'capito', 'apa']: # break print() print("Task: ", task) print("Accuracy: ", accuracy_score(true_all_folds, predicted_all_folds)) print( "Precison: ", precision_score(true_all_folds, predicted_all_folds, average="weighted")) print( "Recall: ", recall_score(true_all_folds, predicted_all_folds, average="weighted")) print("F1: ", f1_score(true_all_folds, predicted_all_folds, average="weighted")) print('Confusion matrix: ', confusion_matrix(true_all_folds, predicted_all_folds)) print( 'QWK: ', cohen_kappa_score(true_all_folds, predicted_all_folds, weights="quadratic")) print('All folds accuracy: ', accuracies_all_folds) print('All folds precision: ', precision_all_folds) print('All folds recall: ', recall_all_folds) print('All folds f1: ', f1_all_folds) print('All folds QWK: ', qwk_all_folds)
def train(opt): if torch.cuda.is_available(): torch.cuda.manual_seed(123) else: torch.manual_seed(123) output_file = open(opt.saved_path + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_params = { "batch_size": opt.batch_size, "shuffle": True, "drop_last": True } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False } max_word_length, max_sent_length = get_max_lengths(opt.train_set) training_set = MyDataset(opt.train_set, opt.word2vec_path, max_sent_length, max_word_length) training_generator = DataLoader(training_set, **training_params) test_set = MyDataset(opt.test_set, opt.word2vec_path, max_sent_length, max_word_length) test_generator = DataLoader(test_set, **test_params) model = HierAttNet(opt.word_hidden_size, opt.sent_hidden_size, opt.batch_size, training_set.num_classes, opt.word2vec_path, max_sent_length, max_word_length) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) writer = SummaryWriter(opt.log_path) # writer.add_graph(model, torch.zeros(opt.batch_size, max_sent_length, max_word_length)) if torch.cuda.is_available(): model.cuda() criterion = nn.MSELoss() optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr, momentum=opt.momentum) # optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epoches): for iter, (feature1, feature2, label) in enumerate(training_generator): if torch.cuda.is_available(): feature1 = feature1.cuda() feature2 = feature2.cuda() label = label.cuda() optimizer.zero_grad() model._init_hidden_state() feature1 = model(feature1) model._init_hidden_state() feature2 = model(feature2) diff = exponent_neg_manhattan_distance(feature1, feature2) loss = criterion(diff, label.float()) loss.backward() optimizer.step() training_metrics = get_evaluation(label.cpu().numpy(), diff.cpu().detach().numpy(), list_metrics=["accuracy"]) print( "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}" .format(epoch + 1, opt.num_epoches, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"])) writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter) # torch.save(model, opt.saved_path + os.sep + "whole_model_han_{}".format(epoch)) if epoch % opt.test_interval == 0: model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for te_feature1, te_feature2, te_label in test_generator: num_sample = len(te_label) if torch.cuda.is_available(): te_feature1 = te_feature1.cuda() te_feature2 = te_feature2.cuda() te_label = te_label.cuda() with torch.no_grad(): model._init_hidden_state(num_sample) te_feature1 = model(te_feature1) model._init_hidden_state(num_sample) te_feature2 = model(te_feature2) te_diff = exponent_neg_manhattan_distance( te_feature1, te_feature2) te_loss = criterion(te_diff, te_label.float()) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_diff.clone().cpu()) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation( te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"]) output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n" .format(epoch + 1, opt.num_epoches, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) writer.add_scalar('Test/Loss', te_loss, epoch) writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch) model.train() if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch torch.save(model, opt.saved_path + os.sep + "whole_model_han") # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, te_loss)) break
def train(opt): if torch.cuda.is_available(): torch.cuda.manual_seed(0) else: torch.manual_seed(0) output_file = open(opt.saved_path + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_params = { "batch_size": opt.batch_size, "shuffle": True, "drop_last": True } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False } train_idx, test_idx, label2idx = k_fold_split(opt.data_set) training_set = MyDataset(opt.data_set, opt.bert_path, train_idx[0], label2idx) training_generator = DataLoader(training_set, **training_params) test_set = MyDataset(opt.data_set, opt.bert_path, test_idx[0], label2idx) test_generator = DataLoader(test_set, **test_params) model = BertHierAttNet(opt.bert_size, opt.word_hidden_size, opt.sent_hidden_size, len(label2idx), opt.bert_path) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) writer = SummaryWriter(opt.log_path) # writer.add_graph(model, torch.zeros(opt.batch_size, max_sent_length, max_word_length)) if torch.cuda.is_available(): model.cuda() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr, momentum=opt.momentum) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epoches): for i, (x, mask, label, id) in enumerate(training_generator): if torch.cuda.is_available(): x = x.cuda() # [batch, seq_num, seq_len] mask = mask.cuda() # [batch, seq_num, seq_len] label = label.cuda() # [batch] optimizer.zero_grad() predictions = model(x, mask) loss = criterion(predictions, label) loss.backward() optimizer.step() training_metrics = get_evaluation( label.cpu().numpy(), predictions.cpu().detach().numpy(), list_metrics=["accuracy"]) print( "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}" .format(epoch + 1, opt.num_epoches, i + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"])) writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + i) writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + i) if epoch % opt.test_interval == 0: model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for te_feature, te_label in test_generator: num_sample = len(te_label) if torch.cuda.is_available(): te_feature = te_feature.cuda() te_label = te_label.cuda() with torch.no_grad(): model._init_hidden_state(num_sample) te_predictions = model(te_feature) te_loss = criterion(te_predictions, te_label) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation( te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"]) output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n" .format(epoch + 1, opt.num_epoches, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) writer.add_scalar('Test/Loss', te_loss, epoch) writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch) model.train() if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch torch.save(model, opt.saved_path + os.sep + "whole_model_han") # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, te_loss)) break
def train(opt): if use_cuda: torch.cuda.manual_seed(RANDOM_SEED) else: torch.manual_seed(RANDOM_SEED) np.random.seed(RANDOM_SEED) random.seed(RANDOM_SEED) output_file = open(opt.saved_path + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_params = {"batch_size": opt.batch_size, "shuffle": True, "drop_last": False} test_params = {"batch_size": opt.batch_size, "shuffle": True, "drop_last": False} mydataset = MyDataset(superspan_HANsFile, superspan_HANs_labelsFile, label_namesFile, ImportanceFeatureMatsFile, model_gensim, max_vocab, training_inds, childLabel2ParentLabelFile, None, dataset, descriptor_HANsFile, VvFile) # training_generator = DataLoader(mydataset, **training_params) testing_inds = [i for i in range(len(mydataset.text_lines)) if i not in training_inds] if test_these_inds: testing_inds = test_these_inds test_set = MyDataset(superspan_HANsFile, superspan_HANs_labelsFile, label_namesFile, ImportanceFeatureMatsFile, model_gensim, max_vocab, testing_inds, None, test_this_label, None, None, None, mydataset.max_length_sentences, mydataset.max_length_word) test_generator = DataLoader(test_set, **test_params) model = HierAttNet(opt.sent_feature_size, phrases2feature_vector_path, dictionary_path, mydataset.max_length_sentences, mydataset.max_length_word, model_save_path, Vv_embedding_path, path_semanticsFile, max_vocab, use_cuda, mydataset, opt.num_bins) if os.path.isdir(log_path): shutil.rmtree(log_path) os.makedirs(log_path) writer = SummaryWriter(log_path) if use_cuda: model.cuda() criterion = nn.CrossEntropyLoss() att_parameters = set(model.sent_att_net.parameters()) | set(model.word_att_net.parameters()) optimizer = torch.optim.SGD([ {'params': filter(lambda p: p.requires_grad, set(model.parameters()) - att_parameters)}, {'params': filter(lambda p: p.requires_grad, att_parameters), 'lr': opt.lr * 1000}, ], lr=opt.lr, momentum=opt.momentum) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) classind2size = Counter() topk2classind2errorSize = { 'top 1': Counter(), 'top 3': Counter(), 'top 5': Counter(), } stop_training = False all_labels_set = set([l for l in mydataset.labels_list if '.' in l]) sampled_labels_set = set() for epoch in range(opt.num_epoches): for iter, (features, ImportanceFeatureMat, labels, indexes, addtional_info) in tqdm(enumerate(training_generator)): if use_cuda: features = features.cuda() ImportanceFeatureMat = ImportanceFeatureMat.cuda() labels = labels.cuda() optimizer.zero_grad() predictions, attn_score, similarity_w_attentions = model( features, ImportanceFeatureMat, get_concept_similarity) loss = criterion(predictions, labels) if not stop_training: loss.backward() optimizer.step() sampled_labels_set |= set(labels.cpu().numpy()) if USE_TRAINING_METRICS: training_metrics = get_evaluation(labels.cpu().numpy(), predictions.data.cpu().numpy(), list_metrics=[ "accuracy", "top K accuracy", "top K classind2wrong_doc_ind", "top K tree score"], childLabel2ParentLabel=mydataset.childLabel2ParentLabel, labels_list=mydataset.labels_list) print("Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, top K accuracy: {}, top K tree score: {}".format( epoch + 1, opt.num_epoches, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["top K accuracy"], training_metrics["top K tree score"])) writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter) column_names = [model.dataset.labels_list.copy() for _ in range(labels.shape[0])] if iter % opt.log_interval == 1: if get_concept_similarity: pickle.dump([pd.DataFrame(s, index=model.dataset.doc_tensor2doc(features[i]), columns=column_names[i]) for i, s in enumerate( model.similarity_w_attentions)], open('log/model.similarity_w_attentions{}.bin'.format(iter), 'wb')) pickle.dump(pd.DataFrame(model.bin_weight_history), open( 'log/model.bin_weight_history_{}.bin'.format(iter), 'wb')) pickle.dump(pd.DataFrame(model.sent_att_net.context_weight_history, columns=['position', 'length', 'inTitle']), open('log/model.sent_att_net.context_weight_history_{}.bin'.format(iter), 'wb')) pickle.dump(pd.DataFrame(model.word_att_net.context_weight_history, columns=['meaningfulness', 'purity', 'targetness', 'completeness', 'nltk', 'spacy_np', 'spacy_entity', 'autophrase']), open('log/model.word_att_net.context_weight_history_{}.bin'.format(iter), 'wb') ) model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for iter, (te_feature, ImportanceFeatureMat, te_label, indexes, addtional_info) in tqdm(enumerate(test_generator), total=len(test_generator)): num_sample = len(te_label) if use_cuda: te_feature = te_feature.cuda() ImportanceFeatureMat = ImportanceFeatureMat.cuda() te_label = te_label.cuda() with torch.no_grad(): if test_these_inds or test_this_label: te_predictions, te_attn_score, similarity_w_attentions = model( te_feature, ImportanceFeatureMat, get_concept_similarity=get_concept_similarity) else: te_predictions, te_attn_score, _ = model(te_feature, ImportanceFeatureMat) te_loss = criterion(te_predictions, te_label) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) if test_these_inds or test_this_label: column_names = [model.dataset.labels_list.copy() for _ in range(te_label.shape[0])] training_metrics = get_evaluation(te_label.cpu().numpy(), te_predictions.data.cpu().numpy(), list_metrics=[ "accuracy", "top K accuracy", "top K classind2wrong_doc_ind", "top K tree score"], childLabel2ParentLabel=mydataset.childLabel2ParentLabel, labels_list=mydataset.labels_list) for classind, doc_ind_in_batchs in training_metrics["top K classind2wrong_doc_ind"]['top 1'].items(): print('error for class: ', classind, mydataset.labels_list[classind]) for (doc_ind, preds) in doc_ind_in_batchs: try: print('doc_ind', doc_ind, 'predicted: ', [ mydataset.labels_list[pred_classind] for pred_classind in preds], addtional_info[doc_ind]) for i, pred_classind in enumerate(preds): column_names[doc_ind][pred_classind] += "@{}".format(i) print(mydataset.doc_tensor2doc( te_feature[doc_ind]), 'tensor index:', indexes[doc_ind]) sim_save_path = 'log/model.similarity_w_attentions_docindex_{}.bin'.format(indexes.numpy()[ doc_ind]) pickle.dump(pd.DataFrame(similarity_w_attentions[doc_ind], index=model.dataset.doc_tensor2doc( te_feature[doc_ind]), columns=column_names[doc_ind]), open(sim_save_path, 'wb')) except Exception as e: import ipdb ipdb.set_trace() raise e continue if iter % 10 == 1: print('test iter {}/{}'.format(iter, len(test_generator))) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation(te_label, te_pred.numpy(), list_metrics=[ "accuracy", "top K accuracy", "top K tree score", "top K accuracy by class", "confusion_matrix"], childLabel2ParentLabel=mydataset.childLabel2ParentLabel, labels_list=mydataset.labels_list) with open(evaluationResultFile, 'w') as my_file: # print(str(test_metrics), file=my_file) pickle.dump(test_metrics, open(evaluationResultFile_bin, 'wb')) output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n".format( epoch + 1, opt.num_epoches, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) writer.add_scalar('Test/Loss', te_loss, epoch) writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch) model.train() if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch # Early stopping if epoch - best_epoch > opt.es_patience > 0: print("Stop training at epoch {}. The lowest loss achieved is {}".format( epoch, te_loss)) break pickle.dump(test_metrics, open(evaluationResultFileFinal_bin, 'wb')) if __name__ == '__main__': if use_cuda: torch.cuda.set_device(dataset2device[dataset]) train(args)
def train(opt): if torch.cuda.is_available(): torch.cuda.manual_seed(123) else: torch.manual_seed(123) training_params = {"batch_size": opt.batch_size, "shuffle": True} test_params = {"batch_size": opt.batch_size, "shuffle": False} output_file = open(opt.saved_path + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_set = MyDataset(opt.data_path, opt.total_images_per_class, opt.ratio, "train") training_generator = DataLoader(training_set, **training_params) print ("there are {} images for training phase".format(training_set.__len__())) test_set = MyDataset(opt.data_path, opt.total_images_per_class, opt.ratio, "test") test_generator = DataLoader(test_set, **test_params) print("there are {} images for test phase".format(test_set.__len__())) model = QuickDraw(num_classes=training_set.num_classes) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) writer = SummaryWriter(opt.log_path) # writer.add_graph(model, torch.rand(opt.batch_size, 1, 28, 28)) if torch.cuda.is_available(): model.cuda() criterion = nn.CrossEntropyLoss() if opt.optimizer == "adam": optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) elif opt.optimizer == "sgd": optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr, momentum=0.9) else: print("invalid optimizer") exit(0) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epochs): for iter, batch in enumerate(training_generator): images, labels = batch if torch.cuda.is_available(): images = images.cuda() labels = labels.cuda() optimizer.zero_grad() predictions = model(images) loss = criterion(predictions, labels) loss.backward() optimizer.step() training_metrics = get_evaluation(labels.cpu().numpy(), predictions.cpu().detach().numpy(), list_metrics=["accuracy"]) print("Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"])) writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter) model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for idx, te_batch in enumerate(test_generator): te_images, te_labels = te_batch num_samples = te_labels.size()[0] if torch.cuda.is_available(): te_images = te_images.cuda() te_labels = te_labels.cuda() with torch.no_grad(): te_predictions = model(te_images) te_loss = criterion(te_predictions, te_labels) loss_ls.append(te_loss * num_samples) te_label_ls.extend(te_labels.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation(te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"]) output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n".format( epoch + 1, opt.num_epochs, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epochs, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) writer.add_scalar('Test/Loss', te_loss, epoch) writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch) model.train() if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch torch.save(model, opt.saved_path + os.sep + "whole_model_quickdraw") if epoch - best_epoch > opt.es_patience > 0: print("Stop training at epoch {}. The lowest loss achieved is {}".format(epoch, te_loss)) break writer.close() output_file.close()
def train(opt): # GPUが使えるか確認 if torch.cuda.is_available(): torch.cuda.manual_seed(123) else: torch.manual_seed(123) # データセットのパスを指定 if opt.dataset in ["csic2010", "agnews", "dbpedia", "yelp_review", "yelp_review_polarity", "amazon_review", "amazon_polarity", "sogou_news", "yahoo_answers"]: opt.input, opt.output = get_default_folder(opt.dataset, opt.feature) # outputのディレクトリが存在していなかったらディレクトリを作成 if not os.path.exists(opt.output): os.makedirs(opt.output) # outputファイルを作成 output_file = open(opt.output + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) # パラメータの設定 training_params = {"batch_size": opt.batch_size, "shuffle": True, "num_workers": 0} test_params = {"batch_size": opt.batch_size, "shuffle": False, "num_workers": 0} # データセットの読み込み training_set = MyDataset(opt.input + os.sep + "train.csv", opt.max_length) test_set = MyDataset(opt.input + os.sep + "test.csv", opt.max_length) training_generator = DataLoader(training_set, **training_params) test_generator = DataLoader(test_set, **test_params) # データセットの大小指定 if opt.feature == "small": model = CharacterLevelCNN(input_length=opt.max_length, n_classes=training_set.num_classes, input_dim=len(opt.alphabet), n_conv_filters=256, n_fc_neurons=1024) elif opt.feature == "large": model = CharacterLevelCNN(input_length=opt.max_length, n_classes=training_set.num_classes, input_dim=len(opt.alphabet), n_conv_filters=1024, n_fc_neurons=2048) else: sys.exit("Invalid feature mode!") # ログのパス指定とログの書き込み log_path = "{}_{}_{}".format(opt.log_path, opt.feature, opt.dataset) if os.path.isdir(log_path): shutil.rmtree(log_path) os.makedirs(log_path) writer = SummaryWriter(log_path) # GPUが使えるか確認 if torch.cuda.is_available(): model.cuda() # 損失関数にはクロスエントロピーを使用 criterion = nn.CrossEntropyLoss() # 最適化方法の指定 if opt.optimizer == "adam": optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) elif opt.optimizer == "sgd": optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr, momentum=0.9) # パラメータの指定 best_loss = 1e5 best_epoch = 0 # 学習 model.train() # エポックごとのイテレーション回数 num_iter_per_epoch = len(training_generator) # 学習開始 for epoch in range(opt.num_epochs): for iter, batch in enumerate(training_generator): # featureとlabelに分ける(train) feature, label = batch # GPU使用可能か確認 if torch.cuda.is_available(): feature = feature.cuda() label = label.cuda() # 勾配の初期化 optimizer.zero_grad() # 予測 predictions = model(feature) # ロスの計算 loss = criterion(predictions, label) # 勾配の計算 loss.backward() # パラメータの更新 optimizer.step() # モデルの評価 training_metrics = get_evaluation(label.cpu().numpy(), predictions.cpu().detach().numpy(), list_metrics=["accuracy"]) # 現在の学習状況の表示 print("Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"])) writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter) # 推論モードへの切り替え model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for batch in test_generator: # featureとlabelに分ける(test) te_feature, te_label = batch # サンプル数の取得 num_sample = len(te_label) # GPUが使用可能か確認 if torch.cuda.is_available(): te_feature = te_feature.cuda() te_label = te_label.cuda() # パラメータの保存を止める with torch.no_grad(): te_predictions = model(te_feature) # ロスの計算 te_loss = criterion(te_predictions, te_label) # 現在のロスをリストに追加 loss_ls.append(te_loss * num_sample) # テストデータのlabelをリストに追加 te_label_ls.extend(te_label.clone().cpu()) # テストデータの予測値をリストに追加 te_pred_ls.append(te_predictions.clone().cpu()) # バッチ全体でのロスを計算 te_loss = sum(loss_ls) / test_set.__len__() # testの予測値のtensorを縦方向に連結 te_pred = torch.cat(te_pred_ls, 0) # testのlabelをnumpy.array化 te_label = np.array(te_label_ls) # モデルの評価 test_metrics = get_evaluation(te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"]) # 出力結果をファイルに書き込み output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n".format( epoch + 1, opt.num_epochs, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) # 現在のテストの評価状況を表示 print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epochs, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) writer.add_scalar('Test/Loss', te_loss, epoch) writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch) # 学習モードに変更 model.train() # 現在のロスがあらかじめ設定したロスの閾値を下回ったら,現在のモデルを保存 if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch torch.save(model, "{}/char-cnn_{}_{}".format(opt.output, opt.dataset, opt.feature)) # Early stopping if epoch - best_epoch > opt.es_patience > 0: print("Stop training at epoch {}. The lowest loss achieved is {} at epoch {}".format(epoch, te_loss, best_epoch)) break # 勾配グリッピング if opt.optimizer == "sgd" and epoch % 3 == 0 and epoch > 0: current_lr = optimizer.state_dict()['param_groups'][0]['lr'] current_lr /= 2 for param_group in optimizer.param_groups: param_group['lr'] = current_lr