Esempio n. 1
0
print("Loading data from {} with batch size {}...".format(
    args.dataset, args.batch_size))
train_batch = DataLoader('./dataset/' + args.dataset + '/train.json',
                         args.batch_size, args, dicts)
test_batch = DataLoader('./dataset/' + args.dataset + '/test.json',
                        args.batch_size, args, dicts)

# create the folder for saving the best models and log file
model_save_dir = args.save_dir
helper.ensure_dir(model_save_dir, verbose=True)
file_logger = helper.FileLogger(
    model_save_dir + '/' + args.log,
    header="# epoch\ttrain_loss\ttest_loss\ttrain_acc\ttest_acc\ttest_f1")

# create the model
trainer = GCNTrainer(args, emb_matrix=emb_matrix)

# start training
train_acc_history, train_loss_history, test_loss_history, f1_score_history = [], [], [], [0.]
test_acc_history = [0.]
for epoch in range(1, args.num_epoch + 1):
    train_loss, train_acc, train_step = 0., 0., 0
    for i, batch in enumerate(train_batch):
        loss, acc = trainer.update(batch)
        train_loss += loss
        train_acc += acc
        train_step += 1
        if train_step % args.log_step == 0:
            print("train_loss: {}, train_acc: {}".format(
                train_loss / train_step, train_acc / train_step))
Esempio n. 2
0
assert emb_matrix.shape[1] == args.emb_dim

args.token_vocab_size = len(token_vocab['i2w'])
args.post_vocab_size = len(dicts['post'])
args.pos_vocab_size = len(dicts['pos'])

dicts['token'] = token_vocab['w2i']

# load training set and test set
print("Loading data from {} with batch size {}...".format(
    args.dataset, args.batch_size))
test_batch = DataLoader('./dataset/' + args.dataset + '/test.json',
                        args.batch_size, args, dicts)

# create the model
trainer = GCNTrainer(args, emb_matrix=emb_matrix)

print("Loading model from {}".format(args.save_dir))
trainer.load(args.save_dir)

print("Evaluating...")
predictions, labels = [], []
test_loss, test_acc, test_step = 0., 0., 0
for i, batch in enumerate(test_batch):
    loss, acc, pred, label, _, _ = trainer.predict(batch)
    test_loss += loss
    test_acc += acc
    predictions += pred
    labels += label
    test_step += 1
f1_score = metrics.f1_score(labels, predictions, average='macro')
print("Loading data from {} with batch size {}...".format(
    args.dataset, args.batch_size))
train_batch = DataLoader('./dataset/' + args.dataset + '/train_v1.json',
                         args.batch_size, args, dicts)
test_batch = DataLoader('./dataset/' + args.dataset + '/test_v1.json',
                        args.batch_size, args, dicts)

# create the folder for saving the best models and log file
model_save_dir = args.save_dir
helper.ensure_dir(model_save_dir, verbose=True)
file_logger = helper.FileLogger(
    model_save_dir + '/' + args.log,
    header="# epoch\ttrain_loss\ttest_loss\ttrain_acc\ttest_acc\ttest_f1")

# create the model
trainer = GCNTrainer(args, emb_matrix=emb_matrix)

# start training
train_acc_history, train_loss_history, test_loss_history, f1_score_history = [], [], [], [0.]
test_acc_history = [0.]

if args.wandb == True:
    if args.model_type == 'CDT':
        run = wandb.init(project='paper',
                         entity='wuharlem',
                         name=f"CDT-DG-{args.seed}",
                         tags=["original", "Glove", "CDT"])
    else:
        run = wandb.init(project='paper',
                         entity='wuharlem',
                         name=f"AHGCN-DG-{args.model_type}-{args.seed}",
Esempio n. 4
0
from utils import torch_utils, helper

parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str, default='dataset/Laptops')
parser.add_argument('--vocab_dir', type=str, default='dataset/Laptops')
parser.add_argument('--batch_size', type=int, default=32, help='Batch size.')
parser.add_argument('--lower', default=True, help='Lowercase all words.')
parser.add_argument('--model_dir',
                    type=str,
                    default='saved_models/best_model.pt',
                    help='Directory of the model.')
args = parser.parse_args()

print("Loading model from {}".format(args.model_dir))
opt = torch_utils.load_config(args.model_dir)
loaded_model = GCNTrainer(opt)
loaded_model.load(args.model_dir)

print("Loading vocab...")
token_vocab = Vocab.load_vocab(args.vocab_dir + '/vocab_tok.vocab')  # token
post_vocab = Vocab.load_vocab(args.vocab_dir + '/vocab_post.vocab')  # position
pos_vocab = Vocab.load_vocab(args.vocab_dir + '/vocab_pos.vocab')  # POS
dep_vocab = Vocab.load_vocab(args.vocab_dir + '/vocab_dep.vocab')  # deprel
pol_vocab = Vocab.load_vocab(args.vocab_dir + '/vocab_pol.vocab')  # polarity
vocab = (token_vocab, post_vocab, pos_vocab, dep_vocab, pol_vocab)
print(
    "token_vocab: {}, post_vocab: {}, pos_vocab: {}, dep_vocab: {}, pol_vocab: {}"
    .format(len(token_vocab), len(post_vocab), len(pos_vocab), len(dep_vocab),
            len(pol_vocab)))

print("Loading data from {} with batch size {}...".format(
Esempio n. 5
0
    args.data_dir, args.batch_size))
train_batch = DataLoader(args.data_dir + '/train.json', args.batch_size, args,
                         vocab)
test_batch = DataLoader(args.data_dir + '/test.json', args.batch_size, args,
                        vocab)

# check saved_models director
model_save_dir = args.save_dir
helper.ensure_dir(model_save_dir, verbose=True)
# log
file_logger = helper.FileLogger(
    model_save_dir + '/' + args.log,
    header="# epoch\ttrain_loss\ttest_loss\ttrain_acc\ttest_acc\ttest_f1")

# build model
trainer = GCNTrainer(args, emb_matrix=word_emb)

# start training
train_acc_history, train_loss_history, test_loss_history, f1_score_history = [], [], [], [0.]
test_acc_history = [0.]
for epoch in range(1, args.num_epoch + 1):
    train_loss, train_acc, train_step = 0., 0., 0
    for i, batch in enumerate(train_batch):
        loss, acc = trainer.update(batch)
        train_loss += loss
        train_acc += acc
        train_step += 1
        if train_step % args.log_step == 0:
            print("train_loss: {}, train_acc: {}".format(
                train_loss / train_step, train_acc / train_step))