def load_data(image_ids, image_folder_path, mode, vocab_file = "", batch_size = 10): # Initiate instance of MyDataset class num_workers = 0 dataset = MyDataset(image_ids, image_folder_path, mode = mode, vocab_file = vocab_file, batch_size = batch_size) if mode == 'train' or mode == 'val': indices = dataset.get_indices() initial_sampler = data.sampler.SubsetRandomSampler(indices=indices) data_loader = data.DataLoader(dataset=dataset, num_workers=num_workers,\ batch_sampler=\ data.sampler.BatchSampler(sampler=initial_sampler,\ batch_size=dataset.batch_size,drop_last=False)) else: # if test, initial sampler is not necessary data_loader = data.DataLoader(dataset=dataset, num_workers=num_workers,\ batch_size=dataset.batch_size, shuffle = True) return data_loader
def train(opt): model = torch.load(opt.input) if torch.cuda.is_available(): model.cuda() for fn in sorted(glob.glob('/space/SP/bad/*.txt') + glob.glob('/space/SP/good/*.txt')): test_set = MyDataset(fn, opt.max_length) test_generator = DataLoader(test_set) model.eval() for batch in test_generator: te_feature, te_label = batch if torch.cuda.is_available(): te_feature = te_feature.cuda() te_label = te_label.cuda() with torch.no_grad(): te_predictions = model(te_feature) out = F.softmax(te_predictions, 1) weight = torch.argmax(out[0]) weighti = int(out[0][1].item() * 1000) weighti = '%04d' % weighti if (weight == 1 and fn.find('/bad/') > 0) or (weight == 0 and fn.find('/good/') > 0): print(True if weight == 1 else False, weighti, fn) os.rename(fn, '/space/SP/likely-good/' + weighti + '-' + os.path.basename(fn))
def do_POST(self): self.send_response(200) self.send_header('Content-type', 'application/json') self.end_headers() content_length = int(self.headers['Content-Length']) # <--- Gets the size of data post_data = self.rfile.read(content_length) # <--- Gets the data itself test_set = MyDataset(None, data=post_data, max_length=1014) # it should be possible to fetch te_feature from test_set without going # through dataloader, but I was unable to find the magic call test_generator = DataLoader(test_set) te_feature, te_label = iter(test_generator).next() model.eval() if torch.cuda.is_available(): te_feature = te_feature.cuda() with torch.no_grad(): te_predictions = model(te_feature) out = F.softmax(te_predictions, 1) weight = torch.argmax(out[0]) weighti = int(out[0][1].item() * 1000) print(True if weight == 1 else False, weighti) response = { 'license': (True if weight == 1 else False) } if weight == 1: response['confidence'] = (weighti - 500) / 5 else: response['confidence'] = (500 - weighti) / 5 self.wfile.write(json.dumps(response).encode('utf-8'))
def train(opt): if torch.cuda.is_available(): model = torch.load(opt.input) else: model = torch.load(opt.input, map_location='cpu') for fn in glob.glob('/space/SP/dumps/*.txt'): test_set = MyDataset(fn, opt.max_length) test_generator = DataLoader(test_set) model.eval() for batch in test_generator: te_feature, te_label = batch if torch.cuda.is_available(): te_feature = te_feature.cuda() te_label = te_label.cuda() with torch.no_grad(): te_predictions = model(te_feature) out = F.softmax(te_predictions, 1) weight = torch.argmax(out[0]) weighti = int(out[0][1].item() * 1000) #if weighti > 995 or weighti < 5: continue weighti = '%04d' % weighti print(True if weight == 1 else False, weighti, fn) fn = os.path.basename(fn) os.symlink('../dumps/' + fn, '/space/SP/likely-good/' + weighti + '-' + fn)
def test(opt): test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False } if os.path.isdir(opt.output): shutil.rmtree(opt.output) os.makedirs(opt.output) if torch.cuda.is_available(): model = torch.load(opt.pre_trained_model) else: model = torch.load(opt.pre_trained_model, map_location=lambda storage, loc: storage) test_set = MyDataset(opt.data_path, opt.word2vec_path, 10, 33) test_generator = DataLoader(test_set, **test_params) if torch.cuda.is_available(): model.cuda() model.eval() te_label_ls = [] te_pred_ls = [] for te_feature1, te_feature2, te_label in test_generator: num_sample = len(te_label) print(num_sample) if torch.cuda.is_available(): te_feature1 = te_feature1.cuda() te_feature2 = te_feature2.cuda() te_label = te_label.cuda() with torch.no_grad(): model._init_hidden_state(num_sample) te_predictions = model(te_feature1, te_feature2) #te_predictions = F.softmax(te_predictions) #do not know what it is doing? te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_pred = torch.cat(te_pred_ls, 0).numpy() te_label = np.array(te_label_ls) te_pred = np.where(te_pred > 0.5, 1, 0) fieldnames = ['True label', 'Predicted label', 'Content1', 'Content2'] with open(opt.output + os.sep + "predictions.csv", 'w') as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames, quoting=csv.QUOTE_NONNUMERIC) writer.writeheader() for i, j, k in zip(te_label, te_pred, test_set.texts): writer.writerow({ 'True label': i, 'Predicted label': j, 'Content1': k[0], 'Content2': k[1] }) test_metrics = get_evaluation( te_label, te_pred, list_metrics=["accuracy", "loss", "confusion_matrix"]) print("Prediction:\nLoss: {} Accuracy: {} \nConfusion matrix: \n{}".format( test_metrics["loss"], test_metrics["accuracy"], test_metrics["confusion_matrix"]))
def inference(opt): test_params = {"batch_size": opt.batch_size, "shuffle": False, "num_workers": 0} test_set = MyDataset(opt.input) test_generator = DataLoader(test_set, **test_params) model = torch.load(opt.trained_model) model.eval() test_true = [] test_prob = [] for batch in test_generator: _, n_true_label = batch if opt.gpu: batch = [Variable(record, volatile=True).cuda() for record in batch] else: batch = [Variable(record, volatile=True) for record in batch] t_data, _ = batch t_predicted_label = model(t_data) t_predicted_label = F.softmax(t_predicted_label) test_prob.append(t_predicted_label) test_true.extend(n_true_label) test_prob = torch.cat(test_prob, 0) test_prob = test_prob.cpu().data.numpy() test_true = np.array(test_true) test_pred = np.argmax(test_prob, -1) fieldnames = ['True label', 'Predicted label', 'Content'] with open(opt.output, 'w') as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames, quoting=csv.QUOTE_NONNUMERIC) writer.writeheader() for i, j, k in zip(test_true, test_pred, test_set.texts): writer.writerow( {'True label': i + 1, 'Predicted label': j + 1, 'Content': k}) test_metrics = get_evaluation(test_true, test_prob, list_metrics=["accuracy", "loss", "confusion_matrix"]) print("Prediction:\nLoss: {} Accuracy: {} \nConfusion matrix: \n{}".format(test_metrics["loss"], test_metrics["accuracy"], test_metrics["confusion_matrix"]))
def train(opt): if torch.cuda.is_available(): torch.cuda.manual_seed(123) else: torch.manual_seed(123) # os.chdir("/data2/xuhuizh/graphM_project/HAMN") output_file = open(opt.saved_path + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_params = { "batch_size": opt.batch_size, "shuffle": True, "drop_last": True } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False } if opt.tune == 1: print('Fine tune the word embedding') freeze = False else: freeze = True if opt.max_sent_words == "5,5": max_word_length, max_sent_length = get_max_lengths(opt.train_set) else: max_word_length, max_sent_length = [ int(x) for x in opt.max_sent_words.split(',') ] print(max_word_length, max_sent_length) training_set = MyDataset(opt.train_set, opt.word2vec_path, max_sent_length, max_word_length) training_generator = DataLoader(training_set, **training_params) test_set = MyDataset(opt.test_set, opt.word2vec_path, max_sent_length, max_word_length) test_generator = DataLoader(test_set, **test_params) if opt.graph == 1: print('use graph model') model = HierGraphNet(opt.word_hidden_size, opt.sent_hidden_size, opt.batch_size, freeze, opt.word2vec_path, max_sent_length, max_word_length) elif opt.graph == 2: print('use deep graph model') model = DHierGraphNet(opt.word_hidden_size, opt.sent_hidden_size, opt.batch_size, freeze, opt.word2vec_path, max_sent_length, max_word_length) else: model = HierAttNet(opt.word_hidden_size, opt.sent_hidden_size, opt.batch_size, freeze, opt.word2vec_path, max_sent_length, max_word_length) # if os.path.isdir(opt.log_path): # shutil.rmtree(opt.log_path) # os.makedirs(opt.log_path) #writer = SummaryWriter(opt.log_path) # writer.add_graph(model, torch.zeros(opt.batch_size, max_sent_length, max_word_length)) if torch.cuda.is_available(): model.cuda() criterion = nn.BCELoss() optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epoches): start_time = time.time() loss_ls = [] te_label_ls = [] te_pred_ls = [] for iter, (feature1, feature2, label) in enumerate(training_generator): num_sample = len(label) if torch.cuda.is_available(): feature1 = feature1.cuda() feature2 = feature2.cuda() label = label.float().cuda() optimizer.zero_grad() model._init_hidden_state() predictions = model(feature1, feature2) loss = criterion(predictions, label) loss.backward() optimizer.step() training_metrics = get_evaluation( label.cpu().numpy(), predictions.cpu().detach().numpy(), list_metrics=["accuracy"]) print( "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}" .format(epoch + 1, opt.num_epoches, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"]), flush=True) print("--- %s seconds ---" % (time.time() - start_time)) start_time = time.time() # writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter) # writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter) loss_ls.append(loss * num_sample) te_label_ls.extend(label.clone().cpu()) te_pred_ls.append(predictions.clone().cpu()) sum_all = 0 sum_updated = 0 ''' for name, param in model.named_parameters(): print('All parameters') print(name,torch.numel(param.data)) sum_all += torch.numel(param.data) if param.requires_grad: print('Updated parameters:') print(name,torch.numel(param.data)) sum_updated+= torch.numel(param.data) print('all', sum_all) print('update', sum_updated) ''' #print total train loss te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation( te_label, te_pred.detach().numpy(), list_metrics=["accuracy", "confusion_matrix"]) output_file.write( "Epoch: {}/{} \nTrain loss: {} Train accuracy: {} \nTrain confusion matrix: \n{}\n\n" .format(epoch + 1, opt.num_epoches, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) if epoch % opt.test_interval == 0: model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for te_feature1, te_feature2, te_label in test_generator: num_sample = len(te_label) #print(num_sample) if torch.cuda.is_available(): te_feature1 = te_feature1.cuda() te_feature2 = te_feature2.cuda() te_label = te_label.float().cuda() with torch.no_grad(): model._init_hidden_state(num_sample) te_predictions = model(te_feature1, te_feature2) te_loss = criterion(te_predictions, te_label) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation( te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"]) output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n" .format(epoch + 1, opt.num_epoches, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) for name, param in model.named_parameters(): if param.requires_grad: if name == 'fd.weight': print(name, param.data) # writer.add_scalar('Test/Loss', te_loss, epoch) # writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch) model.train() if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch torch.save(model, opt.saved_path + os.sep + opt.model_name) logger.info('saved model') # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, te_loss)) break
def train(opt): if torch.cuda.is_available(): torch.cuda.manual_seed(123) else: torch.manual_seed(123) output_file = open(opt.saved_path + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_params = { "batch_size": opt.batch_size, "shuffle": True, "drop_last": True } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False } max_word_length, max_sent_length = get_max_lengths(opt.train_set) training_set = MyDataset(opt.train_set, opt.word2vec_path, max_sent_length, max_word_length) training_generator = DataLoader(training_set, **training_params) test_set = MyDataset(opt.test_set, opt.word2vec_path, max_sent_length, max_word_length) test_generator = DataLoader(test_set, **test_params) model = HierAttNet(opt.word_hidden_size, opt.sent_hidden_size, opt.batch_size, training_set.num_classes, opt.word2vec_path, max_sent_length, max_word_length) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) writer = SummaryWriter(opt.log_path) # writer.add_graph(model, torch.zeros(opt.batch_size, max_sent_length, max_word_length)) if torch.cuda.is_available(): model.cuda() criterion = nn.MSELoss() optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr, momentum=opt.momentum) # optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epoches): for iter, (feature1, feature2, label) in enumerate(training_generator): if torch.cuda.is_available(): feature1 = feature1.cuda() feature2 = feature2.cuda() label = label.cuda() optimizer.zero_grad() model._init_hidden_state() feature1 = model(feature1) model._init_hidden_state() feature2 = model(feature2) diff = exponent_neg_manhattan_distance(feature1, feature2) loss = criterion(diff, label.float()) loss.backward() optimizer.step() training_metrics = get_evaluation(label.cpu().numpy(), diff.cpu().detach().numpy(), list_metrics=["accuracy"]) print( "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}" .format(epoch + 1, opt.num_epoches, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"])) writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter) # torch.save(model, opt.saved_path + os.sep + "whole_model_han_{}".format(epoch)) if epoch % opt.test_interval == 0: model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for te_feature1, te_feature2, te_label in test_generator: num_sample = len(te_label) if torch.cuda.is_available(): te_feature1 = te_feature1.cuda() te_feature2 = te_feature2.cuda() te_label = te_label.cuda() with torch.no_grad(): model._init_hidden_state(num_sample) te_feature1 = model(te_feature1) model._init_hidden_state(num_sample) te_feature2 = model(te_feature2) te_diff = exponent_neg_manhattan_distance( te_feature1, te_feature2) te_loss = criterion(te_diff, te_label.float()) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_diff.clone().cpu()) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation( te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"]) output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n" .format(epoch + 1, opt.num_epoches, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) writer.add_scalar('Test/Loss', te_loss, epoch) writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch) model.train() if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch torch.save(model, opt.saved_path + os.sep + "whole_model_han") # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, te_loss)) break
def run(path): ds = MyDataset(path) for data in ds: pass
def train(opt): if torch.cuda.is_available(): torch.cuda.manual_seed(123) else: torch.manual_seed(123) training_params = {"batch_size": opt.batch_size, "shuffle": True} test_params = {"batch_size": opt.batch_size, "shuffle": False} output_file = open(opt.saved_path + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_set = MyDataset(opt.data_path, opt.total_images_per_class, opt.ratio, "train") training_generator = DataLoader(training_set, **training_params) print ("there are {} images for training phase".format(training_set.__len__())) test_set = MyDataset(opt.data_path, opt.total_images_per_class, opt.ratio, "test") test_generator = DataLoader(test_set, **test_params) print("there are {} images for test phase".format(test_set.__len__())) model = QuickDraw(num_classes=training_set.num_classes) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) writer = SummaryWriter(opt.log_path) # writer.add_graph(model, torch.rand(opt.batch_size, 1, 28, 28)) if torch.cuda.is_available(): model.cuda() criterion = nn.CrossEntropyLoss() if opt.optimizer == "adam": optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) elif opt.optimizer == "sgd": optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr, momentum=0.9) else: print("invalid optimizer") exit(0) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epochs): for iter, batch in enumerate(training_generator): images, labels = batch if torch.cuda.is_available(): images = images.cuda() labels = labels.cuda() optimizer.zero_grad() predictions = model(images) loss = criterion(predictions, labels) loss.backward() optimizer.step() training_metrics = get_evaluation(labels.cpu().numpy(), predictions.cpu().detach().numpy(), list_metrics=["accuracy"]) print("Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"])) writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter) model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for idx, te_batch in enumerate(test_generator): te_images, te_labels = te_batch num_samples = te_labels.size()[0] if torch.cuda.is_available(): te_images = te_images.cuda() te_labels = te_labels.cuda() with torch.no_grad(): te_predictions = model(te_images) te_loss = criterion(te_predictions, te_labels) loss_ls.append(te_loss * num_samples) te_label_ls.extend(te_labels.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation(te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"]) output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n".format( epoch + 1, opt.num_epochs, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epochs, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) writer.add_scalar('Test/Loss', te_loss, epoch) writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch) model.train() if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch torch.save(model, opt.saved_path + os.sep + "whole_model_quickdraw") if epoch - best_epoch > opt.es_patience > 0: print("Stop training at epoch {}. The lowest loss achieved is {}".format(epoch, te_loss)) break writer.close() output_file.close()
def train(opt): if torch.cuda.is_available(): torch.cuda.manual_seed(123) print("cuda...") else: torch.manual_seed(123) # training setting output_file = open(opt.saved_path + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_params = { "batch_size": opt.batch_size, "num_workers": opt.num_workers, "shuffle": True, "pin_memory": True, "drop_last": True } test_params = { "batch_size": opt.batch_size, "num_workers": opt.num_workers, "shuffle": False, "pin_memory": True, "drop_last": False } # training dataset info # max_news_length, max_sent_length, max_word_length = get_max_lengths(opt.train_set) # stock_length = 9 data_init_time = datetime.datetime.now() training_set = MyDataset(data_path=opt.train_set) training_generator = DataLoader(training_set, **training_params) test_set = MyDataset(data_path=opt.test_set) test_generator = DataLoader(test_set, **test_params) data_end_time = datetime.datetime.now() print("the data loading time is: {}s...".format( (data_end_time - data_init_time).seconds)) # model init model_init_time = datetime.datetime.now() if opt.model_type == "ori_han": model = Ori_HAN(days_num=opt.days_num, days_hidden_size=opt.days_hidden_size, news_hidden_size=opt.news_hidden_size, num_classes=training_set.num_classes, pretrained_word2vec_path=opt.word2vec_path, dropout=opt.dropout) elif opt.model_type == "sent_ori_han": model = Sent_Ori_HAN(days_num=opt.days_num, days_hidden_size=opt.days_hidden_size, news_hidden_size=opt.news_hidden_size, sent_hidden_size=opt.sent_hidden_size, num_classes=training_set.num_classes, pretrained_word2vec_path=opt.word2vec_path, dropout=opt.dropout) elif opt.model_type == "muil_han": model = Muil_HAN(head_num=opt.head_num, days_num=opt.days_num, days_hidden_size=opt.days_hidden_size, news_hidden_size=opt.news_hidden_size, num_classes=training_set.num_classes, pretrained_word2vec_path=opt.word2vec_path, dropout=opt.dropout) elif opt.model_type == "sent_muil_han": model = Sent_Muil_HAN(head_num=opt.head_num, days_num=opt.days_num, days_hidden_size=opt.days_hidden_size, news_hidden_size=opt.news_hidden_size, sent_hidden_size=opt.sent_hidden_size, num_classes=training_set.num_classes, pretrained_word2vec_path=opt.word2vec_path, dropout=opt.dropout) elif opt.model_type == "muil_stock_han": model = Muil_Stock_HAN(head_num=opt.head_num, days_num=opt.days_num, days_hidden_size=opt.days_hidden_size, news_hidden_size=opt.news_hidden_size, stock_hidden_size=opt.stock_hidden_size, stock_length=stock_length, num_classes=training_set.num_classes, pretrained_word2vec_path=opt.word2vec_path, dropout=opt.dropout) elif opt.model_type == "sent_muil_stock_han": model = Sent_Muil_Stock_HAN(head_num=opt.head_num, days_num=opt.days_num, days_hidden_size=opt.days_hidden_size, news_hidden_size=opt.news_hidden_size, sent_hidden_size=opt.sent_hidden_size, stock_hidden_size=opt.stock_hidden_size, stock_length=stock_length, num_classes=training_set.num_classes, pretrained_word2vec_path=opt.word2vec_path, dropout=opt.dropout) model_end_time = datetime.datetime.now() print("the model init time is: {}s...".format( (model_end_time - model_init_time).seconds)) # other setting if os.path.isdir(opt.log_path + opt.model_type): shutil.rmtree(opt.log_path + opt.model_type) # 递归删除文件夹下的所有子文件夹 os.makedirs(opt.log_path + opt.model_type) writer = SummaryWriter(opt.log_path + opt.model_type) # 模型训练相关信息初始化 if torch.cuda.is_available(): model.cuda() print("model use cuda...") criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr, momentum=opt.momentum) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) # 训练模型 print("start to train model...") for epoch in range(opt.num_epoches): dataloader_init_time = datetime.datetime.now() for iter, (days_news, days_stock, label) in enumerate(training_generator): dataloader_end_time = datetime.datetime.now() print("the dataloader loading time is: {}s...".format( (dataloader_end_time - dataloader_init_time).seconds)) if torch.cuda.is_available(): days_news = days_news.cuda() days_stock = days_stock.cuda() label = label.cuda() print("data use cuda...") training_init_time = datetime.datetime.now() optimizer.zero_grad() if opt.model_type in [ "ori_han", "sent_ori_han", "muil_han", "sent_muil_han" ]: predictions = model(days_news) elif opt.model_type in ["muil_stock_han", "sent_muil_stock_han"]: predictions = model(days_news, days_stock) loss = criterion(predictions, torch.tensor(label)) loss.backward() optimizer.step() training_metrics = get_evaluation( label.cpu().numpy(), predictions.cpu().detach().numpy(), list_metrics=["accuracy"]) print( "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}" .format(epoch + 1, opt.num_epoches, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"])) writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter) training_end_time = datetime.datetime.now() print("the training time is: {}s...".format( (training_end_time - training_init_time).seconds)) if epoch % opt.test_interval == 0: model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for te_days_news, te_days_stock, te_label in test_generator: num_sample = len(te_label) if torch.cuda.is_available(): te_days_news = te_days_news.cuda() te_days_stock = te_days_stock.cuda() te_label = te_label.cuda() with torch.no_grad(): if opt.model_type in [ "ori_han", "sent_ori_han", "muil_han", "sent_muil_han" ]: te_predictions = model(te_days_news) elif opt.model_type in [ "muil_stock_han", "sent_muil_stock_han" ]: te_predictions = model(te_days_news, te_days_stock) te_loss = criterion(te_predictions, torch.tensor(te_label)) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation( te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"]) output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n" .format(epoch + 1, opt.num_epoches, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) writer.add_scalar('Test/Loss', te_loss, epoch) writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch) model.train() if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch torch.save(model, opt.saved_path + opt.model_type + "_model") # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, te_loss)) break
def train(opt, train_data_path, test_data_path, valid_data_path): task = opt.task_name if torch.cuda.is_available(): torch.cuda.manual_seed(2019) else: torch.manual_seed(2019) output_file = open(opt.saved_path + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": True } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False } max_word_length, max_sent_length = get_max_lengths(opt.input_path) print("Max words: ", max_word_length, "Max sents: ", max_sent_length) #df_data = pd.read_csv(data_path, encoding='utf8', sep='\t') #df_data = df_data.sample(frac=1, random_state=2019) #print(df_data.shape) #y = df_data.readability.values #kf = model_selection.StratifiedKFold(n_splits=5) predicted_all_folds = [] true_all_folds = [] counter = 0 accuracies_all_folds = [] precision_all_folds = [] recall_all_folds = [] f1_all_folds = [] qwk_all_folds = [] #if os.path.exists(opt.vocab_path): # os.remove(opt.vocab_path) df_train = pd.read_csv(train_data_path, encoding='utf8', sep='\t') df_test = pd.read_csv(test_data_path, encoding='utf8', sep='\t') df_valid = pd.read_csv(valid_data_path, encoding='utf8', sep='\t') training_set = MyDataset(df_train, opt.vocab_path, task, max_sent_length, max_word_length) training_generator = DataLoader(training_set, **training_params) test_set = MyDataset(df_test, opt.vocab_path, task, max_sent_length, max_word_length) test_generator = DataLoader(test_set, **test_params) valid_set = MyDataset(df_valid, opt.vocab_path, task, max_sent_length, max_word_length) valid_generator = DataLoader(valid_set, **test_params) model = HierAttNet(opt.word_hidden_size, opt.sent_hidden_size, opt.batch_size, training_set.num_classes, opt.vocab_path, max_sent_length, max_word_length) if torch.cuda.is_available(): model.cuda() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) best_loss = 1e5 best_epoch = 0 num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epoches): model.train() for iter, (feature, label) in enumerate(training_generator): if torch.cuda.is_available(): feature = feature.cuda() label = label.cuda() optimizer.zero_grad() model._init_hidden_state() predictions = model(feature) loss = criterion(predictions, label) loss.backward() optimizer.step() #`clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), 0.25) training_metrics = get_evaluation( label.cpu().numpy(), predictions.cpu().detach().numpy(), list_metrics=["accuracy"]) print( "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}" .format(epoch + 1, opt.num_epoches, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"])) if epoch % opt.test_interval == 0: model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for te_feature, te_label in valid_generator: num_sample = len(te_label) if torch.cuda.is_available(): te_feature = te_feature.cuda() te_label = te_label.cuda() with torch.no_grad(): model._init_hidden_state(num_sample) te_predictions = model(te_feature) te_loss = criterion(te_predictions, te_label) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation( te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix", "qwk"]) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch print('Saving model') torch.save(model, opt.saved_path + os.sep + "whole_model_han") # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, best_loss)) break print() print('Evaluation: ') print() model.eval() model = torch.load(opt.saved_path + os.sep + "whole_model_han") loss_ls = [] te_label_ls = [] te_pred_ls = [] for te_feature, te_label in test_generator: num_sample = len(te_label) if torch.cuda.is_available(): te_feature = te_feature.cuda() te_label = te_label.cuda() with torch.no_grad(): model._init_hidden_state(num_sample) te_predictions = model(te_feature) te_loss = criterion(te_predictions, te_label) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation(te_label, te_pred.numpy(), list_metrics=[ "accuracy", "precision", "recall", "f1", "confusion_matrix", 'qwk' ]) true = te_label preds = np.argmax(te_pred.numpy(), -1) predicted_all_folds.extend(preds) true_all_folds.extend(true) print("Test set accuracy: {}".format(test_metrics["accuracy"])) print("Test set precision: {}".format(test_metrics["precision"])) print("Test set recall: {}".format(test_metrics["recall"])) print("Test set f1: {}".format(test_metrics["f1"])) print("Test set cm: {}".format(test_metrics["confusion_matrix"])) print("Test set qwk: {}".format(test_metrics["qwk"])) accuracies_all_folds.append(test_metrics["accuracy"]) precision_all_folds.append(test_metrics["precision"]) recall_all_folds.append(test_metrics["recall"]) f1_all_folds.append(test_metrics["f1"]) qwk_all_folds.append(test_metrics["qwk"]) print() #if task in ['newsela', 'merlin', 'capito', 'apa']: # break print() print("Task: ", task) print("Accuracy: ", accuracy_score(true_all_folds, predicted_all_folds)) print( "Precison: ", precision_score(true_all_folds, predicted_all_folds, average="weighted")) print( "Recall: ", recall_score(true_all_folds, predicted_all_folds, average="weighted")) print("F1: ", f1_score(true_all_folds, predicted_all_folds, average="weighted")) print('Confusion matrix: ', confusion_matrix(true_all_folds, predicted_all_folds)) print( 'QWK: ', cohen_kappa_score(true_all_folds, predicted_all_folds, weights="quadratic")) print('All folds accuracy: ', accuracies_all_folds) print('All folds precision: ', precision_all_folds) print('All folds recall: ', recall_all_folds) print('All folds f1: ', f1_all_folds) print('All folds QWK: ', qwk_all_folds)
def train(opt): if use_cuda: torch.cuda.manual_seed(RANDOM_SEED) else: torch.manual_seed(RANDOM_SEED) np.random.seed(RANDOM_SEED) random.seed(RANDOM_SEED) output_file = open(opt.saved_path + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_params = {"batch_size": opt.batch_size, "shuffle": True, "drop_last": False} test_params = {"batch_size": opt.batch_size, "shuffle": True, "drop_last": False} mydataset = MyDataset(superspan_HANsFile, superspan_HANs_labelsFile, label_namesFile, ImportanceFeatureMatsFile, model_gensim, max_vocab, training_inds, childLabel2ParentLabelFile, None, dataset, descriptor_HANsFile, VvFile) # training_generator = DataLoader(mydataset, **training_params) testing_inds = [i for i in range(len(mydataset.text_lines)) if i not in training_inds] if test_these_inds: testing_inds = test_these_inds test_set = MyDataset(superspan_HANsFile, superspan_HANs_labelsFile, label_namesFile, ImportanceFeatureMatsFile, model_gensim, max_vocab, testing_inds, None, test_this_label, None, None, None, mydataset.max_length_sentences, mydataset.max_length_word) test_generator = DataLoader(test_set, **test_params) model = HierAttNet(opt.sent_feature_size, phrases2feature_vector_path, dictionary_path, mydataset.max_length_sentences, mydataset.max_length_word, model_save_path, Vv_embedding_path, path_semanticsFile, max_vocab, use_cuda, mydataset, opt.num_bins) if os.path.isdir(log_path): shutil.rmtree(log_path) os.makedirs(log_path) writer = SummaryWriter(log_path) if use_cuda: model.cuda() criterion = nn.CrossEntropyLoss() att_parameters = set(model.sent_att_net.parameters()) | set(model.word_att_net.parameters()) optimizer = torch.optim.SGD([ {'params': filter(lambda p: p.requires_grad, set(model.parameters()) - att_parameters)}, {'params': filter(lambda p: p.requires_grad, att_parameters), 'lr': opt.lr * 1000}, ], lr=opt.lr, momentum=opt.momentum) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) classind2size = Counter() topk2classind2errorSize = { 'top 1': Counter(), 'top 3': Counter(), 'top 5': Counter(), } stop_training = False all_labels_set = set([l for l in mydataset.labels_list if '.' in l]) sampled_labels_set = set() for epoch in range(opt.num_epoches): for iter, (features, ImportanceFeatureMat, labels, indexes, addtional_info) in tqdm(enumerate(training_generator)): if use_cuda: features = features.cuda() ImportanceFeatureMat = ImportanceFeatureMat.cuda() labels = labels.cuda() optimizer.zero_grad() predictions, attn_score, similarity_w_attentions = model( features, ImportanceFeatureMat, get_concept_similarity) loss = criterion(predictions, labels) if not stop_training: loss.backward() optimizer.step() sampled_labels_set |= set(labels.cpu().numpy()) if USE_TRAINING_METRICS: training_metrics = get_evaluation(labels.cpu().numpy(), predictions.data.cpu().numpy(), list_metrics=[ "accuracy", "top K accuracy", "top K classind2wrong_doc_ind", "top K tree score"], childLabel2ParentLabel=mydataset.childLabel2ParentLabel, labels_list=mydataset.labels_list) print("Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, top K accuracy: {}, top K tree score: {}".format( epoch + 1, opt.num_epoches, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["top K accuracy"], training_metrics["top K tree score"])) writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter) column_names = [model.dataset.labels_list.copy() for _ in range(labels.shape[0])] if iter % opt.log_interval == 1: if get_concept_similarity: pickle.dump([pd.DataFrame(s, index=model.dataset.doc_tensor2doc(features[i]), columns=column_names[i]) for i, s in enumerate( model.similarity_w_attentions)], open('log/model.similarity_w_attentions{}.bin'.format(iter), 'wb')) pickle.dump(pd.DataFrame(model.bin_weight_history), open( 'log/model.bin_weight_history_{}.bin'.format(iter), 'wb')) pickle.dump(pd.DataFrame(model.sent_att_net.context_weight_history, columns=['position', 'length', 'inTitle']), open('log/model.sent_att_net.context_weight_history_{}.bin'.format(iter), 'wb')) pickle.dump(pd.DataFrame(model.word_att_net.context_weight_history, columns=['meaningfulness', 'purity', 'targetness', 'completeness', 'nltk', 'spacy_np', 'spacy_entity', 'autophrase']), open('log/model.word_att_net.context_weight_history_{}.bin'.format(iter), 'wb') ) model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for iter, (te_feature, ImportanceFeatureMat, te_label, indexes, addtional_info) in tqdm(enumerate(test_generator), total=len(test_generator)): num_sample = len(te_label) if use_cuda: te_feature = te_feature.cuda() ImportanceFeatureMat = ImportanceFeatureMat.cuda() te_label = te_label.cuda() with torch.no_grad(): if test_these_inds or test_this_label: te_predictions, te_attn_score, similarity_w_attentions = model( te_feature, ImportanceFeatureMat, get_concept_similarity=get_concept_similarity) else: te_predictions, te_attn_score, _ = model(te_feature, ImportanceFeatureMat) te_loss = criterion(te_predictions, te_label) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) if test_these_inds or test_this_label: column_names = [model.dataset.labels_list.copy() for _ in range(te_label.shape[0])] training_metrics = get_evaluation(te_label.cpu().numpy(), te_predictions.data.cpu().numpy(), list_metrics=[ "accuracy", "top K accuracy", "top K classind2wrong_doc_ind", "top K tree score"], childLabel2ParentLabel=mydataset.childLabel2ParentLabel, labels_list=mydataset.labels_list) for classind, doc_ind_in_batchs in training_metrics["top K classind2wrong_doc_ind"]['top 1'].items(): print('error for class: ', classind, mydataset.labels_list[classind]) for (doc_ind, preds) in doc_ind_in_batchs: try: print('doc_ind', doc_ind, 'predicted: ', [ mydataset.labels_list[pred_classind] for pred_classind in preds], addtional_info[doc_ind]) for i, pred_classind in enumerate(preds): column_names[doc_ind][pred_classind] += "@{}".format(i) print(mydataset.doc_tensor2doc( te_feature[doc_ind]), 'tensor index:', indexes[doc_ind]) sim_save_path = 'log/model.similarity_w_attentions_docindex_{}.bin'.format(indexes.numpy()[ doc_ind]) pickle.dump(pd.DataFrame(similarity_w_attentions[doc_ind], index=model.dataset.doc_tensor2doc( te_feature[doc_ind]), columns=column_names[doc_ind]), open(sim_save_path, 'wb')) except Exception as e: import ipdb ipdb.set_trace() raise e continue if iter % 10 == 1: print('test iter {}/{}'.format(iter, len(test_generator))) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation(te_label, te_pred.numpy(), list_metrics=[ "accuracy", "top K accuracy", "top K tree score", "top K accuracy by class", "confusion_matrix"], childLabel2ParentLabel=mydataset.childLabel2ParentLabel, labels_list=mydataset.labels_list) with open(evaluationResultFile, 'w') as my_file: # print(str(test_metrics), file=my_file) pickle.dump(test_metrics, open(evaluationResultFile_bin, 'wb')) output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n".format( epoch + 1, opt.num_epoches, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) writer.add_scalar('Test/Loss', te_loss, epoch) writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch) model.train() if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch # Early stopping if epoch - best_epoch > opt.es_patience > 0: print("Stop training at epoch {}. The lowest loss achieved is {}".format( epoch, te_loss)) break pickle.dump(test_metrics, open(evaluationResultFileFinal_bin, 'wb')) if __name__ == '__main__': if use_cuda: torch.cuda.set_device(dataset2device[dataset]) train(args)
f.write("acc: " + str(acc) + "\n") f.write("f1_micro: " + str(f1_micro) + "\n") f.write("f1_macro: " + str(f1_macro) + "\n") f.close() return acc, f1_micro, f1_macro if __name__ == '__main__': opt = get_train_args() if opt.gpu: torch.cuda.manual_seed(0) else: torch.manual_seed(0) train_idx, test_idx, label2idx = k_fold_split(opt.data_path) trainset = MyDataset(opt.data_path, opt.bert_path, train_idx[0], label2idx) trainloader = torch.utils.data.DataLoader(trainset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) testset = MyDataset(opt.data_path, opt.bert_path, test_idx[0], label2idx) testloader = torch.utils.data.DataLoader(testset, batch_size=50, shuffle=False, num_workers=opt.num_workers) model = BertHierAttNet(len(label2idx), opt.bert_path) if opt.gpu: model = nn.DataParallel(model) model.cuda() optimizer = AdamW(model.parameters(), lr=opt.lr)
def train(opt): if torch.cuda.is_available(): torch.cuda.manual_seed(121) else: torch.manual_seed(121) if not os.path.exists(opt.output): os.makedirs(opt.output) output_file = open(opt.output + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_params = {"batch_size": opt.batch_size, "shuffle": True, "num_workers": 0} test_params = {"batch_size": opt.batch_size, "shuffle": False, "num_workers": 0} training_set = MyDataset(None, None, opt.max_length, opt.dumps) print("Found", len(training_set), "for training") test_size = int(.15 * len(training_set)) test_set = training_set #training_set, test_set = torch.utils.data.random_split(training_set, [len(training_set) - test_size, test_size]) #print(len(training_set)) #print(len(test_set)) training_generator = DataLoader(training_set, **training_params) test_generator = DataLoader(test_set, **test_params) model = CharacterLevelCNN(input_length=opt.max_length, n_classes=2, input_dim=len(opt.alphabet), n_conv_filters=opt.f1, n_fc_neurons=opt.f2) log_path = "{}_{}".format(opt.log_path, opt.feature) if os.path.isdir(log_path): shutil.rmtree(log_path) os.makedirs(log_path) #writer = SummaryWriter(log_path) if torch.cuda.is_available(): model.cuda() criterion = nn.CrossEntropyLoss() if opt.optimizer == "adam": optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) elif opt.optimizer == "sgd": optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr, momentum=0.9) best_loss = 1e5 best_accurancy = 0 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epochs): for iter, batch in enumerate(training_generator): feature, label = batch if torch.cuda.is_available(): feature = feature.cuda() label = label.cuda() optimizer.zero_grad() predictions = model(feature) loss = criterion(predictions, label) loss.backward() optimizer.step() training_metrics = get_evaluation(label.cpu().numpy(), predictions.cpu().detach().numpy(), list_metrics=["accuracy"]) print("Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"])) #writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter) #writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter) model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for batch in test_generator: te_feature, te_label = batch num_sample = len(te_label) if torch.cuda.is_available(): te_feature = te_feature.cuda() te_label = te_label.cuda() with torch.no_grad(): te_predictions = model(te_feature) te_loss = criterion(te_predictions, te_label) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation(te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"]) output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n".format( epoch + 1, opt.num_epochs, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epochs, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) #writer.add_scalar('Test/Loss', te_loss, epoch) #writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch) model.train() if test_metrics["accuracy"] > best_accurancy: best_loss = te_loss best_accurancy = test_metrics["accuracy"] best_epoch = epoch torch.save(model, "{}/char-cnn_{}".format(opt.output, opt.feature)) # Early stopping if epoch - best_epoch > opt.es_patience > 0: print("Stop training at epoch {}. The highest accurancy is {} at epoch {}".format(epoch, best_accurancy, best_epoch)) break if opt.optimizer == "sgd" and epoch % 3 == 0 and epoch > 0: current_lr = optimizer.state_dict()['param_groups'][0]['lr'] current_lr *= 0.75 for param_group in optimizer.param_groups: param_group['lr'] = current_lr
def test(opt): test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False } if os.path.isdir(opt.output): shutil.rmtree(opt.output) os.makedirs(opt.output) if torch.cuda.is_available(): model = torch.load(opt.model_path + os.sep + opt.model_type + "_model") else: model = torch.load(opt.model_path + os.sep + opt.model_type + "_model", map_location=lambda storage, loc: storage) max_news_length, max_sent_length, max_word_length = get_max_lengths( opt.train_set) stock_length = 9 test_set = MyDataset(data_path=opt.test_set, dict_path=opt.word2vec_path, max_news_length=max_news_length, max_sent_length=max_sent_length, max_word_length=max_word_length, days_num=opt.days_num, stock_length=stock_length) test_generator = DataLoader(test_set, **test_params) if torch.cuda.is_available(): model.cuda() model.eval() te_label_ls = [] te_pred_ls = [] for te_days_news, te_days_stock, te_label in test_generator: num_sample = len(te_label) if torch.cuda.is_available(): te_feature = te_days_news.cuda() te_days_stock = te_days_stock.cuda() te_label = te_label.cuda() with torch.no_grad(): if opt.model_type in [ "ori_han", "sent_ori_han", "muil_han", "sent_muil_han" ]: te_predictions = model(te_days_news) elif opt.model_type in ["muil_stock_han", "sent_muil_stock_han"]: te_predictions = model(te_days_news, te_days_stock) te_predictions = F.softmax(te_predictions) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_pred = torch.cat(te_pred_ls, 0).numpy() te_label = np.array(te_label_ls) fieldnames = ['True label', 'Predicted label', 'Content'] with open(opt.output + os.sep + "predictions.csv", 'w') as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames, quoting=csv.QUOTE_NONNUMERIC) writer.writeheader() for i, j, k in zip(te_label, te_pred, test_set.newses, test_set.stocks): writer.writerow({ 'True label': i + 1, 'Predicted label': np.argmax(j) + 1, 'Content': k }) test_metrics = get_evaluation( te_label, te_pred, list_metrics=["accuracy", "loss", "confusion_matrix"]) print("Prediction:\nLoss: {} Accuracy: {} \nConfusion matrix: \n{}".format( test_metrics["loss"], test_metrics["accuracy"], test_metrics["confusion_matrix"]))
def test(opt): test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False } if os.path.isdir(opt.output): shutil.rmtree(opt.output) os.makedirs(opt.output) if torch.cuda.is_available(): model = torch.load(opt.pre_trained_model) else: model = torch.load(opt.pre_trained_model, map_location=lambda storage, loc: storage) test_set = MyDataset(opt.data_path, opt.word2vec_path, model.max_sent_length, model.max_word_length) test_generator = DataLoader(test_set, **test_params) if torch.cuda.is_available(): model.cuda() model.eval() te_label_ls = [] te_pred_ls = [] for te_feature1, te_feature2, te_label in test_generator: num_sample = len(te_label) print("processing {} pairs".format(num_sample)) if torch.cuda.is_available(): te_feature1 = te_feature1.cuda() te_feature2 = te_feature2.cuda() te_label = te_label.cuda() with torch.no_grad(): model._init_hidden_state(num_sample) te_feature1 = model(te_feature1) model._init_hidden_state(num_sample) te_feature2 = model(te_feature2) te_diff = exponent_neg_manhattan_distance(te_feature1, te_feature2) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_diff.clone().cpu()) te_pred = torch.cat(te_pred_ls, 0).numpy() te_label = np.array(te_label_ls) fieldnames = ['True label', 'Predicted label', 'Argument1', 'Argument2'] with open(opt.output + os.sep + "predictions.csv", 'w') as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames, quoting=csv.QUOTE_NONNUMERIC) writer.writeheader() for i, j, k, l in zip(te_label, te_pred, test_set.texts_1, test_set.texts_2): if j >= 0.5: pred_lb = 1 else: pred_lb = 0 writer.writerow({ 'True label': i, 'Predicted label': pred_lb, 'Argument1': k, 'Argument2': l }) test_metrics = get_evaluation( te_label, te_pred, list_metrics=["accuracy", "confusion_matrix"]) print("Prediction:\nAccuracy: {} \nConfusion matrix: \n{}".format( test_metrics["accuracy"], test_metrics["confusion_matrix"]))
def train(opt): if opt.dataset in [ "agnews", "dbpedia", "yelp_review", "yelp_review_polarity", "amazon_review", "amazon_polarity", "sogou_news", "yahoo_answers" ]: opt.input, opt.output = get_default_folder(opt.dataset, opt.depth) if not os.path.exists(opt.output): os.makedirs(opt.output) output_file = open(opt.output + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_params = { "batch_size": opt.batch_size, "shuffle": True, "num_workers": 0 } test_params = { "batch_size": opt.batch_size, "shuffle": False, "num_workers": 0 } training_set = MyDataset(opt.input + os.sep + "train.csv", opt.input + os.sep + "classes.txt", opt.max_length) test_set = MyDataset(opt.input + os.sep + "test.csv", opt.input + os.sep + "classes.txt", opt.max_length) training_generator = DataLoader(training_set, **training_params) test_generator = DataLoader(test_set, **test_params) model = VDCNN(n_classes=training_set.num_classes, num_embedding=len(opt.alphabet) + 1, embedding_dim=16, depth=opt.depth, n_fc_neurons=2048, shortcut=opt.shortcut) if opt.gpu: model.cuda() criterion = nn.CrossEntropyLoss() if opt.optimizer == "adam": optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) elif opt.optimizer == "sgd": optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr, momentum=0.9) model.train() num_iter_per_epoch = len(training_generator) best_accuracy = 0 for epoch in range(opt.num_epochs): for iter, batch in enumerate(training_generator): _, n_true_label = batch if opt.gpu: batch = [Variable(record).cuda() for record in batch] else: batch = [Variable(record) for record in batch] t_data, t_true_label = batch optimizer.zero_grad() t_predicted_label = model(t_data) n_prob_label = t_predicted_label.cpu().data.numpy() loss = criterion(t_predicted_label, t_true_label) loss.backward() optimizer.step() training_metrics = get_evaluation( n_true_label, n_prob_label, list_metrics=["accuracy", "loss"]) print( "Training: Iteration: {}/{} Epoch: {}/{} Loss: {} Accuracy: {}" .format(iter + 1, num_iter_per_epoch, epoch + 1, opt.num_epochs, training_metrics["loss"], training_metrics["accuracy"])) model.eval() test_true = [] test_prob = [] for batch in test_generator: _, n_true_label = batch if opt.gpu: batch = [ Variable(record.long(), volatile=True).cuda() for record in batch ] else: batch = [ Variable(record.long(), volatile=True) for record in batch ] t_data, _ = batch t_predicted_label = model(t_data) t_predicted_label = F.softmax(t_predicted_label) test_prob.append(t_predicted_label) test_true.extend(n_true_label) test_prob = torch.cat(test_prob, 0) test_prob = test_prob.cpu().data.numpy() test_true = np.array(test_true) model.train() test_metrics = get_evaluation( test_true, test_prob, list_metrics=["accuracy", "loss", "confusion_matrix"]) output_file.write( "Epoch: {}/{} \nTraining loss: {} Training accuracy: {} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n" .format(epoch + 1, opt.num_epochs, training_metrics["loss"], training_metrics["accuracy"], test_metrics["loss"], test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("\tTest:Epoch: {}/{} Loss: {} Accuracy: {}\r".format( epoch + 1, opt.num_epochs, test_metrics["loss"], test_metrics["accuracy"])) if test_metrics["accuracy"] > best_accuracy: best_accuracy = test_metrics["accuracy"] torch.save(model, opt.output + os.sep + "trained_model") if opt.optimizer == "sgd" and epoch % 3 == 0 and epoch > 0: current_lr = optimizer.state_dict()['param_groups'][0]['lr'] current_lr /= 2 for param_group in optimizer.param_groups: param_group['lr'] = current_lr
def train(opt): if torch.cuda.is_available(): torch.cuda.manual_seed(0) else: torch.manual_seed(0) output_file = open(opt.saved_path + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_params = { "batch_size": opt.batch_size, "shuffle": True, "drop_last": True } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False } train_idx, test_idx, label2idx = k_fold_split(opt.data_set) training_set = MyDataset(opt.data_set, opt.bert_path, train_idx[0], label2idx) training_generator = DataLoader(training_set, **training_params) test_set = MyDataset(opt.data_set, opt.bert_path, test_idx[0], label2idx) test_generator = DataLoader(test_set, **test_params) model = BertHierAttNet(opt.bert_size, opt.word_hidden_size, opt.sent_hidden_size, len(label2idx), opt.bert_path) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) writer = SummaryWriter(opt.log_path) # writer.add_graph(model, torch.zeros(opt.batch_size, max_sent_length, max_word_length)) if torch.cuda.is_available(): model.cuda() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr, momentum=opt.momentum) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epoches): for i, (x, mask, label, id) in enumerate(training_generator): if torch.cuda.is_available(): x = x.cuda() # [batch, seq_num, seq_len] mask = mask.cuda() # [batch, seq_num, seq_len] label = label.cuda() # [batch] optimizer.zero_grad() predictions = model(x, mask) loss = criterion(predictions, label) loss.backward() optimizer.step() training_metrics = get_evaluation( label.cpu().numpy(), predictions.cpu().detach().numpy(), list_metrics=["accuracy"]) print( "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}" .format(epoch + 1, opt.num_epoches, i + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"])) writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + i) writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + i) if epoch % opt.test_interval == 0: model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for te_feature, te_label in test_generator: num_sample = len(te_label) if torch.cuda.is_available(): te_feature = te_feature.cuda() te_label = te_label.cuda() with torch.no_grad(): model._init_hidden_state(num_sample) te_predictions = model(te_feature) te_loss = criterion(te_predictions, te_label) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation( te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"]) output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n" .format(epoch + 1, opt.num_epoches, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epoches, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) writer.add_scalar('Test/Loss', te_loss, epoch) writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch) model.train() if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch torch.save(model, opt.saved_path + os.sep + "whole_model_han") # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, te_loss)) break
def train(opt): if torch.cuda.is_available(): torch.cuda.manual_seed(123) else: torch.manual_seed(123) if opt.dataset in ["agnews", "dbpedia", "yelp_review", "yelp_review_polarity", "amazon_review", "amazon_polarity", "sogou_news", "yahoo_answers"]: opt.input, opt.output = get_default_folder(opt.dataset, opt.feature) if not os.path.exists(opt.output): os.makedirs(opt.output) output_file = open(opt.output + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) training_params = {"batch_size": opt.batch_size, "shuffle": True, "num_workers": 0} test_params = {"batch_size": opt.batch_size, "shuffle": False, "num_workers": 0} training_set = MyDataset(opt.input + os.sep + "train.csv", opt.max_length) test_set = MyDataset(opt.input + os.sep + "test.csv", opt.max_length) training_generator = DataLoader(training_set, **training_params) test_generator = DataLoader(test_set, **test_params) if opt.feature == "small": model = CharacterLevelCNN(input_length=opt.max_length, n_classes=training_set.num_classes, input_dim=len(opt.alphabet), n_conv_filters=256, n_fc_neurons=1024) elif opt.feature == "large": model = CharacterLevelCNN(input_length=opt.max_length, n_classes=training_set.num_classes, input_dim=len(opt.alphabet), n_conv_filters=1024, n_fc_neurons=2048) else: sys.exit("Invalid feature mode!") log_path = "{}_{}_{}".format(opt.log_path, opt.feature, opt.dataset) if os.path.isdir(log_path): shutil.rmtree(log_path) os.makedirs(log_path) writer = SummaryWriter(log_path) if torch.cuda.is_available(): model.cuda() criterion = nn.CrossEntropyLoss() if opt.optimizer == "adam": optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) elif opt.optimizer == "sgd": optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr, momentum=0.9) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epochs): for iter, batch in enumerate(training_generator): feature, label = batch if torch.cuda.is_available(): feature = feature.cuda() label = label.cuda() optimizer.zero_grad() predictions = model(feature) loss = criterion(predictions, label) loss.backward() optimizer.step() training_metrics = get_evaluation(label.cpu().numpy(), predictions.cpu().detach().numpy(), list_metrics=["accuracy"]) print("Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"])) writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter) model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for batch in test_generator: te_feature, te_label = batch num_sample = len(te_label) if torch.cuda.is_available(): te_feature = te_feature.cuda() te_label = te_label.cuda() with torch.no_grad(): te_predictions = model(te_feature) te_loss = criterion(te_predictions, te_label) loss_ls.append(te_loss * num_sample) te_label_ls.extend(te_label.clone().cpu()) te_pred_ls.append(te_predictions.clone().cpu()) te_loss = sum(loss_ls) / test_set.__len__() te_pred = torch.cat(te_pred_ls, 0) te_label = np.array(te_label_ls) test_metrics = get_evaluation(te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"]) output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n".format( epoch + 1, opt.num_epochs, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epochs, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) writer.add_scalar('Test/Loss', te_loss, epoch) writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch) model.train() if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch torch.save(model, "{}/char-cnn_{}_{}".format(opt.output, opt.dataset, opt.feature)) # Early stopping if epoch - best_epoch > opt.es_patience > 0: print("Stop training at epoch {}. The lowest loss achieved is {} at epoch {}".format(epoch, te_loss, best_epoch)) break if opt.optimizer == "sgd" and epoch % 3 == 0 and epoch > 0: current_lr = optimizer.state_dict()['param_groups'][0]['lr'] current_lr /= 2 for param_group in optimizer.param_groups: param_group['lr'] = current_lr
def train(opt): # GPUが使えるか確認 if torch.cuda.is_available(): torch.cuda.manual_seed(123) else: torch.manual_seed(123) # データセットのパスを指定 if opt.dataset in ["csic2010", "agnews", "dbpedia", "yelp_review", "yelp_review_polarity", "amazon_review", "amazon_polarity", "sogou_news", "yahoo_answers"]: opt.input, opt.output = get_default_folder(opt.dataset, opt.feature) # outputのディレクトリが存在していなかったらディレクトリを作成 if not os.path.exists(opt.output): os.makedirs(opt.output) # outputファイルを作成 output_file = open(opt.output + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(vars(opt))) # パラメータの設定 training_params = {"batch_size": opt.batch_size, "shuffle": True, "num_workers": 0} test_params = {"batch_size": opt.batch_size, "shuffle": False, "num_workers": 0} # データセットの読み込み training_set = MyDataset(opt.input + os.sep + "train.csv", opt.max_length) test_set = MyDataset(opt.input + os.sep + "test.csv", opt.max_length) training_generator = DataLoader(training_set, **training_params) test_generator = DataLoader(test_set, **test_params) # データセットの大小指定 if opt.feature == "small": model = CharacterLevelCNN(input_length=opt.max_length, n_classes=training_set.num_classes, input_dim=len(opt.alphabet), n_conv_filters=256, n_fc_neurons=1024) elif opt.feature == "large": model = CharacterLevelCNN(input_length=opt.max_length, n_classes=training_set.num_classes, input_dim=len(opt.alphabet), n_conv_filters=1024, n_fc_neurons=2048) else: sys.exit("Invalid feature mode!") # ログのパス指定とログの書き込み log_path = "{}_{}_{}".format(opt.log_path, opt.feature, opt.dataset) if os.path.isdir(log_path): shutil.rmtree(log_path) os.makedirs(log_path) writer = SummaryWriter(log_path) # GPUが使えるか確認 if torch.cuda.is_available(): model.cuda() # 損失関数にはクロスエントロピーを使用 criterion = nn.CrossEntropyLoss() # 最適化方法の指定 if opt.optimizer == "adam": optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) elif opt.optimizer == "sgd": optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr, momentum=0.9) # パラメータの指定 best_loss = 1e5 best_epoch = 0 # 学習 model.train() # エポックごとのイテレーション回数 num_iter_per_epoch = len(training_generator) # 学習開始 for epoch in range(opt.num_epochs): for iter, batch in enumerate(training_generator): # featureとlabelに分ける(train) feature, label = batch # GPU使用可能か確認 if torch.cuda.is_available(): feature = feature.cuda() label = label.cuda() # 勾配の初期化 optimizer.zero_grad() # 予測 predictions = model(feature) # ロスの計算 loss = criterion(predictions, label) # 勾配の計算 loss.backward() # パラメータの更新 optimizer.step() # モデルの評価 training_metrics = get_evaluation(label.cpu().numpy(), predictions.cpu().detach().numpy(), list_metrics=["accuracy"]) # 現在の学習状況の表示 print("Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch, optimizer.param_groups[0]['lr'], loss, training_metrics["accuracy"])) writer.add_scalar('Train/Loss', loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Accuracy', training_metrics["accuracy"], epoch * num_iter_per_epoch + iter) # 推論モードへの切り替え model.eval() loss_ls = [] te_label_ls = [] te_pred_ls = [] for batch in test_generator: # featureとlabelに分ける(test) te_feature, te_label = batch # サンプル数の取得 num_sample = len(te_label) # GPUが使用可能か確認 if torch.cuda.is_available(): te_feature = te_feature.cuda() te_label = te_label.cuda() # パラメータの保存を止める with torch.no_grad(): te_predictions = model(te_feature) # ロスの計算 te_loss = criterion(te_predictions, te_label) # 現在のロスをリストに追加 loss_ls.append(te_loss * num_sample) # テストデータのlabelをリストに追加 te_label_ls.extend(te_label.clone().cpu()) # テストデータの予測値をリストに追加 te_pred_ls.append(te_predictions.clone().cpu()) # バッチ全体でのロスを計算 te_loss = sum(loss_ls) / test_set.__len__() # testの予測値のtensorを縦方向に連結 te_pred = torch.cat(te_pred_ls, 0) # testのlabelをnumpy.array化 te_label = np.array(te_label_ls) # モデルの評価 test_metrics = get_evaluation(te_label, te_pred.numpy(), list_metrics=["accuracy", "confusion_matrix"]) # 出力結果をファイルに書き込み output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n".format( epoch + 1, opt.num_epochs, te_loss, test_metrics["accuracy"], test_metrics["confusion_matrix"])) # 現在のテストの評価状況を表示 print("Epoch: {}/{}, Lr: {}, Loss: {}, Accuracy: {}".format( epoch + 1, opt.num_epochs, optimizer.param_groups[0]['lr'], te_loss, test_metrics["accuracy"])) writer.add_scalar('Test/Loss', te_loss, epoch) writer.add_scalar('Test/Accuracy', test_metrics["accuracy"], epoch) # 学習モードに変更 model.train() # 現在のロスがあらかじめ設定したロスの閾値を下回ったら,現在のモデルを保存 if te_loss + opt.es_min_delta < best_loss: best_loss = te_loss best_epoch = epoch torch.save(model, "{}/char-cnn_{}_{}".format(opt.output, opt.dataset, opt.feature)) # Early stopping if epoch - best_epoch > opt.es_patience > 0: print("Stop training at epoch {}. The lowest loss achieved is {} at epoch {}".format(epoch, te_loss, best_epoch)) break # 勾配グリッピング if opt.optimizer == "sgd" and epoch % 3 == 0 and epoch > 0: current_lr = optimizer.state_dict()['param_groups'][0]['lr'] current_lr /= 2 for param_group in optimizer.param_groups: param_group['lr'] = current_lr