from utils import torch_utils, helper parser = argparse.ArgumentParser() parser.add_argument('--data_dir', type=str, default='dataset/Laptops') parser.add_argument('--vocab_dir', type=str, default='dataset/Laptops') parser.add_argument('--batch_size', type=int, default=32, help='Batch size.') parser.add_argument('--lower', default=True, help='Lowercase all words.') parser.add_argument('--model_dir', type=str, default='saved_models/best_model.pt', help='Directory of the model.') args = parser.parse_args() print("Loading model from {}".format(args.model_dir)) opt = torch_utils.load_config(args.model_dir) loaded_model = GCNTrainer(opt) loaded_model.load(args.model_dir) print("Loading vocab...") token_vocab = Vocab.load_vocab(args.vocab_dir + '/vocab_tok.vocab') # token post_vocab = Vocab.load_vocab(args.vocab_dir + '/vocab_post.vocab') # position pos_vocab = Vocab.load_vocab(args.vocab_dir + '/vocab_pos.vocab') # POS dep_vocab = Vocab.load_vocab(args.vocab_dir + '/vocab_dep.vocab') # deprel pol_vocab = Vocab.load_vocab(args.vocab_dir + '/vocab_pol.vocab') # polarity vocab = (token_vocab, post_vocab, pos_vocab, dep_vocab, pol_vocab) print( "token_vocab: {}, post_vocab: {}, pos_vocab: {}, dep_vocab: {}, pol_vocab: {}" .format(len(token_vocab), len(post_vocab), len(pos_vocab), len(dep_vocab), len(pol_vocab))) print("Loading data from {} with batch size {}...".format(
print("Loading data from {} with batch size {}...".format( args.dataset, args.batch_size)) train_batch = DataLoader('./dataset/' + args.dataset + '/train.json', args.batch_size, args, dicts) test_batch = DataLoader('./dataset/' + args.dataset + '/test.json', args.batch_size, args, dicts) # create the folder for saving the best models and log file model_save_dir = args.save_dir helper.ensure_dir(model_save_dir, verbose=True) file_logger = helper.FileLogger( model_save_dir + '/' + args.log, header="# epoch\ttrain_loss\ttest_loss\ttrain_acc\ttest_acc\ttest_f1") # create the model trainer = GCNTrainer(args, emb_matrix=emb_matrix) # start training train_acc_history, train_loss_history, test_loss_history, f1_score_history = [], [], [], [0.] test_acc_history = [0.] for epoch in range(1, args.num_epoch + 1): train_loss, train_acc, train_step = 0., 0., 0 for i, batch in enumerate(train_batch): loss, acc = trainer.update(batch) train_loss += loss train_acc += acc train_step += 1 if train_step % args.log_step == 0: print("train_loss: {}, train_acc: {}".format( train_loss / train_step, train_acc / train_step))
args.data_dir, args.batch_size)) train_batch = DataLoader(args.data_dir + '/train.json', args.batch_size, args, vocab) test_batch = DataLoader(args.data_dir + '/test.json', args.batch_size, args, vocab) # check saved_models director model_save_dir = args.save_dir helper.ensure_dir(model_save_dir, verbose=True) # log file_logger = helper.FileLogger( model_save_dir + '/' + args.log, header="# epoch\ttrain_loss\ttest_loss\ttrain_acc\ttest_acc\ttest_f1") # build model trainer = GCNTrainer(args, emb_matrix=word_emb) # start training train_acc_history, train_loss_history, test_loss_history, f1_score_history = [], [], [], [0.] test_acc_history = [0.] for epoch in range(1, args.num_epoch + 1): train_loss, train_acc, train_step = 0., 0., 0 for i, batch in enumerate(train_batch): loss, acc = trainer.update(batch) train_loss += loss train_acc += acc train_step += 1 if train_step % args.log_step == 0: print("train_loss: {}, train_acc: {}".format( train_loss / train_step, train_acc / train_step))