print("Loading data from {} with batch size {}...".format( args.dataset, args.batch_size)) train_batch = DataLoader('./dataset/' + args.dataset + '/train.json', args.batch_size, args, dicts) test_batch = DataLoader('./dataset/' + args.dataset + '/test.json', args.batch_size, args, dicts) # create the folder for saving the best models and log file model_save_dir = args.save_dir helper.ensure_dir(model_save_dir, verbose=True) file_logger = helper.FileLogger( model_save_dir + '/' + args.log, header="# epoch\ttrain_loss\ttest_loss\ttrain_acc\ttest_acc\ttest_f1") # create the model trainer = GCNTrainer(args, emb_matrix=emb_matrix) # start training train_acc_history, train_loss_history, test_loss_history, f1_score_history = [], [], [], [0.] test_acc_history = [0.] for epoch in range(1, args.num_epoch + 1): train_loss, train_acc, train_step = 0., 0., 0 for i, batch in enumerate(train_batch): loss, acc = trainer.update(batch) train_loss += loss train_acc += acc train_step += 1 if train_step % args.log_step == 0: print("train_loss: {}, train_acc: {}".format( train_loss / train_step, train_acc / train_step))
assert emb_matrix.shape[1] == args.emb_dim args.token_vocab_size = len(token_vocab['i2w']) args.post_vocab_size = len(dicts['post']) args.pos_vocab_size = len(dicts['pos']) dicts['token'] = token_vocab['w2i'] # load training set and test set print("Loading data from {} with batch size {}...".format( args.dataset, args.batch_size)) test_batch = DataLoader('./dataset/' + args.dataset + '/test.json', args.batch_size, args, dicts) # create the model trainer = GCNTrainer(args, emb_matrix=emb_matrix) print("Loading model from {}".format(args.save_dir)) trainer.load(args.save_dir) print("Evaluating...") predictions, labels = [], [] test_loss, test_acc, test_step = 0., 0., 0 for i, batch in enumerate(test_batch): loss, acc, pred, label, _, _ = trainer.predict(batch) test_loss += loss test_acc += acc predictions += pred labels += label test_step += 1 f1_score = metrics.f1_score(labels, predictions, average='macro')
print("Loading data from {} with batch size {}...".format( args.dataset, args.batch_size)) train_batch = DataLoader('./dataset/' + args.dataset + '/train_v1.json', args.batch_size, args, dicts) test_batch = DataLoader('./dataset/' + args.dataset + '/test_v1.json', args.batch_size, args, dicts) # create the folder for saving the best models and log file model_save_dir = args.save_dir helper.ensure_dir(model_save_dir, verbose=True) file_logger = helper.FileLogger( model_save_dir + '/' + args.log, header="# epoch\ttrain_loss\ttest_loss\ttrain_acc\ttest_acc\ttest_f1") # create the model trainer = GCNTrainer(args, emb_matrix=emb_matrix) # start training train_acc_history, train_loss_history, test_loss_history, f1_score_history = [], [], [], [0.] test_acc_history = [0.] if args.wandb == True: if args.model_type == 'CDT': run = wandb.init(project='paper', entity='wuharlem', name=f"CDT-DG-{args.seed}", tags=["original", "Glove", "CDT"]) else: run = wandb.init(project='paper', entity='wuharlem', name=f"AHGCN-DG-{args.model_type}-{args.seed}",
from utils import torch_utils, helper parser = argparse.ArgumentParser() parser.add_argument('--data_dir', type=str, default='dataset/Laptops') parser.add_argument('--vocab_dir', type=str, default='dataset/Laptops') parser.add_argument('--batch_size', type=int, default=32, help='Batch size.') parser.add_argument('--lower', default=True, help='Lowercase all words.') parser.add_argument('--model_dir', type=str, default='saved_models/best_model.pt', help='Directory of the model.') args = parser.parse_args() print("Loading model from {}".format(args.model_dir)) opt = torch_utils.load_config(args.model_dir) loaded_model = GCNTrainer(opt) loaded_model.load(args.model_dir) print("Loading vocab...") token_vocab = Vocab.load_vocab(args.vocab_dir + '/vocab_tok.vocab') # token post_vocab = Vocab.load_vocab(args.vocab_dir + '/vocab_post.vocab') # position pos_vocab = Vocab.load_vocab(args.vocab_dir + '/vocab_pos.vocab') # POS dep_vocab = Vocab.load_vocab(args.vocab_dir + '/vocab_dep.vocab') # deprel pol_vocab = Vocab.load_vocab(args.vocab_dir + '/vocab_pol.vocab') # polarity vocab = (token_vocab, post_vocab, pos_vocab, dep_vocab, pol_vocab) print( "token_vocab: {}, post_vocab: {}, pos_vocab: {}, dep_vocab: {}, pol_vocab: {}" .format(len(token_vocab), len(post_vocab), len(pos_vocab), len(dep_vocab), len(pol_vocab))) print("Loading data from {} with batch size {}...".format(
args.data_dir, args.batch_size)) train_batch = DataLoader(args.data_dir + '/train.json', args.batch_size, args, vocab) test_batch = DataLoader(args.data_dir + '/test.json', args.batch_size, args, vocab) # check saved_models director model_save_dir = args.save_dir helper.ensure_dir(model_save_dir, verbose=True) # log file_logger = helper.FileLogger( model_save_dir + '/' + args.log, header="# epoch\ttrain_loss\ttest_loss\ttrain_acc\ttest_acc\ttest_f1") # build model trainer = GCNTrainer(args, emb_matrix=word_emb) # start training train_acc_history, train_loss_history, test_loss_history, f1_score_history = [], [], [], [0.] test_acc_history = [0.] for epoch in range(1, args.num_epoch + 1): train_loss, train_acc, train_step = 0., 0., 0 for i, batch in enumerate(train_batch): loss, acc = trainer.update(batch) train_loss += loss train_acc += acc train_step += 1 if train_step % args.log_step == 0: print("train_loss: {}, train_acc: {}".format( train_loss / train_step, train_acc / train_step))