def start_training(train_arguments, folder_index): rcnn = RCNN(train_arguments.pos_loss_method, train_arguments.loss_weight_lambda, train_arguments.prevent_overfitting_method).cuda() rcnn.train() # train mode could use dropout. npz_path = train_arguments.get_train_data_path(folder_index) npz = np.load(npz_path) print("\n\n\nload from: ", npz_path) train_arguments.train_sentences = npz['train_sentences'] train_arguments.train_sentence_info = npz['train_sentence_info'] train_arguments.train_roi = npz['train_roi'] train_arguments.train_cls = npz['train_cls'] if train_arguments.normalize: if train_arguments.dx_compute_method == "left_boundary": train_arguments.train_tbbox = npz["train_norm_lb_tbbox"] else: train_arguments.train_tbbox = npz["train_norm_tbbox"] else: train_arguments.train_tbbox = npz['train_tbbox'] train_arguments.train_sentences = t.Tensor(train_arguments.train_sentences) train_arguments.train_set = np.random.permutation(train_arguments.train_sentences.size(0)) # like shuffle if train_arguments.prevent_overfitting_method.lower() == "l2 regu": if train_arguments.partial_l2_penalty: optimizer = optim.Adam([ {"params": rcnn.conv1.parameters(), "weight_decay": 0}, {"params": rcnn.cls_fc1.parameters(), "weight_decay": train_arguments.l2_beta}, {"params": rcnn.cls_score.parameters(), "weight_decay": train_arguments.l2_beta}, {"params": rcnn.bbox_fc1.parameters(), "weight_decay": train_arguments.l2_beta}, {"params": rcnn.bbox.parameters(), "weight_decay": train_arguments.l2_beta} ], lr=train_arguments.learning_rate) else: optimizer = optim.Adam(rcnn.parameters(), lr=train_arguments.learning_rate, weight_decay=train_arguments.l2_beta) else: # dropout optimizer optimizer = optim.Adam(rcnn.parameters(), lr=train_arguments.learning_rate) rcnn.optimizer = optimizer for epoch_time in range(train_arguments.max_iter_epoch): print('===========================================') print('[Training Epoch {}]'.format(epoch_time + 1)) train_epoch(train_arguments, rcnn) if epoch_time >= train_arguments.start_save_epoch: save_directory = train_arguments.get_save_directory(folder_index) save_path = save_directory + "model_epoch" + str(epoch_time + 1) + ".pth" t.save(rcnn.state_dict(), save_path) print("Model save in ", save_path)
def main(args): model = RCNN(vocab_size=args.vocab_size, embedding_dim=args.embedding_dim, hidden_size=args.hidden_size, hidden_size_linear=args.hidden_size_linear, class_num=args.class_num, dropout=args.dropout).to(args.device) if args.n_gpu > 1: model = torch.nn.DataParallel(model, dim=0) train_texts, train_labels = read_file(args.train_file_path) word2idx = build_dictionary(train_texts, vocab_size=args.vocab_size) logger.info('Dictionary Finished!') full_dataset = CustomTextDataset(train_texts, train_labels, word2idx) num_train_data = len(full_dataset) - args.num_val_data train_dataset, val_dataset = random_split( full_dataset, [num_train_data, args.num_val_data]) train_dataloader = DataLoader(dataset=train_dataset, collate_fn=lambda x: collate_fn(x, args), batch_size=args.batch_size, shuffle=True) valid_dataloader = DataLoader(dataset=val_dataset, collate_fn=lambda x: collate_fn(x, args), batch_size=args.batch_size, shuffle=True) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) train(model, optimizer, train_dataloader, valid_dataloader, args) logger.info('******************** Train Finished ********************') # Test if args.test_set: test_texts, test_labels = read_file(args.test_file_path) test_dataset = CustomTextDataset(test_texts, test_labels, word2idx) test_dataloader = DataLoader(dataset=test_dataset, collate_fn=lambda x: collate_fn(x, args), batch_size=args.batch_size, shuffle=True) model.load_state_dict( torch.load(os.path.join(args.model_save_path, "best.pt"))) _, accuracy, precision, recall, f1, cm = evaluate( model, test_dataloader, args) logger.info('-' * 50) logger.info( f'|* TEST SET *| |ACC| {accuracy:>.4f} |PRECISION| {precision:>.4f} |RECALL| {recall:>.4f} |F1| {f1:>.4f}' ) logger.info('-' * 50) logger.info('---------------- CONFUSION MATRIX ----------------') for i in range(len(cm)): logger.info(cm[i]) logger.info('--------------------------------------------------')
def main(args): acc_list = [] f1_score_list = [] prec_list = [] recall_list = [] for i in range(10): setup_data() model = RCNN(vocab_size=args.vocab_size, embedding_dim=args.embedding_dim, hidden_size=args.hidden_size, hidden_size_linear=args.hidden_size_linear, class_num=args.class_num, dropout=args.dropout).to(args.device) if args.n_gpu > 1: model = torch.nn.DataParallel(model, dim=0) train_texts, train_labels = read_file(args.train_file_path) word2idx, embedding = build_dictionary(train_texts, args.vocab_size, args.lexical, args.syntactic, args.semantic) logger.info('Dictionary Finished!') full_dataset = CustomTextDataset(train_texts, train_labels, word2idx, args) num_train_data = len(full_dataset) - args.num_val_data train_dataset, val_dataset = random_split( full_dataset, [num_train_data, args.num_val_data]) train_dataloader = DataLoader(dataset=train_dataset, collate_fn=lambda x: collate_fn(x, args), batch_size=args.batch_size, shuffle=True) valid_dataloader = DataLoader(dataset=val_dataset, collate_fn=lambda x: collate_fn(x, args), batch_size=args.batch_size, shuffle=True) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) train(model, optimizer, train_dataloader, valid_dataloader, embedding, args) logger.info('******************** Train Finished ********************') # Test if args.test_set: test_texts, test_labels = read_file(args.test_file_path) test_dataset = CustomTextDataset(test_texts, test_labels, word2idx, args) test_dataloader = DataLoader( dataset=test_dataset, collate_fn=lambda x: collate_fn(x, args), batch_size=args.batch_size, shuffle=True) model.load_state_dict( torch.load(os.path.join(args.model_save_path, "best.pt"))) _, accuracy, precision, recall, f1, cm = evaluate( model, test_dataloader, embedding, args) logger.info('-' * 50) logger.info( f'|* TEST SET *| |ACC| {accuracy:>.4f} |PRECISION| {precision:>.4f} |RECALL| {recall:>.4f} |F1| {f1:>.4f}' ) logger.info('-' * 50) logger.info('---------------- CONFUSION MATRIX ----------------') for i in range(len(cm)): logger.info(cm[i]) logger.info('--------------------------------------------------') acc_list.append(accuracy / 100) prec_list.append(precision) recall_list.append(recall) f1_score_list.append(f1) avg_acc = sum(acc_list) / len(acc_list) avg_prec = sum(prec_list) / len(prec_list) avg_recall = sum(recall_list) / len(recall_list) avg_f1_score = sum(f1_score_list) / len(f1_score_list) logger.info('--------------------------------------------------') logger.info( f'|* TEST SET *| |Avg ACC| {avg_acc:>.4f} |Avg PRECISION| {avg_prec:>.4f} |Avg RECALL| {avg_recall:>.4f} |Avg F1| {avg_f1_score:>.4f}' ) logger.info('--------------------------------------------------') plot_df = pd.DataFrame({ 'x_values': range(10), 'avg_acc': acc_list, 'avg_prec': prec_list, 'avg_recall': recall_list, 'avg_f1_score': f1_score_list }) plt.plot('x_values', 'avg_acc', data=plot_df, marker='o', markerfacecolor='blue', markersize=12, color='skyblue', linewidth=4) plt.plot('x_values', 'avg_prec', data=plot_df, marker='', color='olive', linewidth=2) plt.plot('x_values', 'avg_recall', data=plot_df, marker='', color='olive', linewidth=2, linestyle='dashed') plt.plot('x_values', 'avg_f1_score', data=plot_df, marker='', color='olive', linewidth=2, linestyle='dashed') plt.legend() fname = 'lexical-semantic-syntactic.png' if args.lexical and args.semantic and args.syntactic \ else 'semantic-syntactic.png' if args.semantic and args.syntactic \ else 'lexical-semantic.png' if args.lexical and args.semantic \ else 'lexical-syntactic.png'if args.lexical and args.syntactic \ else 'lexical.png' if args.lexical \ else 'syntactic.png' if args.syntactic \ else 'semantic.png' if args.semantic \ else 'plain.png' if not (path.exists('./images')): mkdir('./images') plt.savefig(path.join('./images', fname))
model.train() return score if __name__ == "__main__": random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) train_data = pickle.load(open(os.path.join(data_path, train_name), "rb")) dev_data = pickle.load(open(os.path.join(data_path, dev_name), "rb")) vocabulary = pickle.load(open(os.path.join(data_path, vocabulary_name), "rb")) print('dataset', len(train_data), len(dev_data)) # load w2v data weight = pickle.load(open(os.path.join(data_path, weight_name), "rb")) # model train_device = torch.device(device if torch.cuda.is_available() else "cpu") model = RCNN(vocab_size=len(vocabulary), embed_dim=embed_dim, output_dim=class_num, hidden_dim=hidden_dim, num_layers=num_layers, dropout=dropout, weight=weight) model.to(train_device) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) # train writer = SummaryWriter(log_dir=log_path) train() writer.close()
import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from model import RCNN from dataloader_mnist import dataloader,batch_size,test_dataset_len,train_dataset_len n_classes = 10 net = RCNN(n_classes=n_classes) learning_rate = 1e-3 epoch = 30 criterion = nn.CrossEntropyLoss() # optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9) optimizer = optim.Adam(net.parameters(), lr=learning_rate) # scheduler = optim.lr_scheduler.ReduceLROnPlateau( # optimizer, 'min' , # factor=0.1 , # patience=(train_dataset_len/batch_size)*3, # verbose=True) use_gpu = torch.cuda.is_available() if use_gpu: net = net.cuda() loss_trend = []