Beispiel #1
0
opt['model_save_dir'] = model_save_dir
helper.ensure_dir(model_save_dir, verbose=True)

# save config
helper.save_config(opt, model_save_dir + '/config.json', verbose=True)
vocab.save(model_save_dir + '/vocab.pkl')
file_logger = helper.FileLogger(
    model_save_dir + '/' + opt['log'],
    header="# epoch\ttrain_loss\tdev_loss\tdev_score\tbest_dev_score")

# print model info
helper.print_config(opt)

# model
if not opt['load']:
    trainer = GCNTrainer(opt, emb_matrix=emb_matrix)
else:
    # load pretrained model
    model_file = opt['model_file']
    print("Loading model from {}".format(model_file))
    model_opt = torch_utils.load_config(model_file)
    model_opt['optim'] = opt['optim']
    trainer = GCNTrainer(model_opt)
    trainer.load(model_file)

id2label = dict([(v, k) for k, v in label2id.items()])
dev_score_history = []
current_lr = opt['lr']

global_step = 0
global_start_time = time.time()
parser.add_argument('--cuda', type=bool, default=torch.cuda.is_available())
parser.add_argument('--cpu', action='store_true')
args = parser.parse_args()

torch.manual_seed(args.seed)
random.seed(1234)
if args.cpu:
    args.cuda = False
elif args.cuda:
    torch.cuda.manual_seed(args.seed)

# load opt
model_file = args.model_dir + '/' + args.model
print("Loading model from {}".format(model_file))
opt = torch_utils.load_config(model_file)
trainer = GCNTrainer(opt)
trainer.load(model_file)

# load vocab
vocab_file = args.model_dir + '/vocab.pkl'
vocab = Vocab(vocab_file, load=True)
assert opt[
    'vocab_size'] == vocab.size, "Vocab size must match that in the saved model."

# load data
data_file = opt['data_dir'] + '/{}.json'.format(args.dataset)
print("Loading data from {} with batch size {}...".format(
    data_file, opt['batch_size']))
batch = DataLoader(data_file, opt['batch_size'], opt, vocab, evaluation=True)

helper.print_config(opt)
Beispiel #3
0
    # print(torch.cuda.device_count())
    # with torch.cuda.device(1):
    #     for subj in SUBJ_LIST:
    #         for obj in OBJ_LIST:
    # print("eval samples of subj:"+subj+" obj:"+obj)
    # args.model_dir = 'saved_models/02'
    # if os.path.exists(args.model_dir+'/'+subj+"_"+obj+"_"+"best_model.pt"):
    #     model_file = args.model_dir +'/'+subj+"_"+obj+"_"+"best_model.pt"
    # else:
    #     model_file = args.model_dir + '/best_model.pt'
    model_file=args.model_dir+'/best_model' \
                              '.pt'
    print("Loading model from {}".format(model_file))
    opt = torch_utils.load_config(model_file)
    data_file = opt['data_dir'] + '/{}.json'.format(args.dataset)
    trainer = GCNTrainer(opt, lbstokens=lbstokens)
    trainer.load(model_file)
    batch = DataLoader([data_file],
                       opt['batch_size'],
                       opt,
                       vocab,
                       evaluation=True,
                       corefresolve=True)
    batch_iter = tqdm(batch)

    all_probs = []
    samples = []
    for i, b in enumerate(batch_iter):
        preds, probs, _, sample = trainer.predict(b)
        predictions += preds
        all_probs += probs
Beispiel #4
0
parser.add_argument('--cuda', type=bool, default=torch.cuda.is_available())
parser.add_argument('--cpu', action='store_true')
args = parser.parse_args()

torch.manual_seed(args.seed)
random.seed(1234)
if args.cpu:
    args.cuda = False
elif args.cuda:
    torch.cuda.manual_seed(args.seed)

# load opt
model_file = args.model_dir + '/' + args.model
print("Loading model from {}".format(model_file))
opt = torch_utils.load_config(model_file)
trainer = GCNTrainer(opt)
trainer.load(model_file)

# load vocab
vocab_file = opt['vocab_dir'] + '/vocab.pkl'
vocab = Vocab(vocab_file, load=True)
assert opt['vocab_size'] == vocab.size, "Vocab size must match that in the saved model."

# load data
data_file = opt['data_dir']  + '/test.json'
print("Loading data from {} with batch size {}...".format(data_file, opt['batch_size']))
data = read_file(data_file, vocab, opt, False)
batch = DataLoader(data, opt['batch_size'], opt, evaluation=True)

helper.print_config(opt)
label2id = constant.LABEL_TO_ID
Beispiel #5
0
if args.cpu:
    args.cuda = False
elif args.cuda:
    torch.cuda.manual_seed(args.seed)

model_file_list = [
    'saved_models/01', 'saved_models/02', 'saved_models/03', 'saved_models/04',
    'saved_models/00'
]

prob_list = []
for j in range(len(model_file_list)):
    model_file = model_file_list[j] + '/' + args.model
    print("Loading model from {}".format(model_file))
    opt = torch_utils.load_config(model_file)  # 加载超参数
    trainer = GCNTrainer(opt)  # 定义模型
    trainer.load(model_file)  # 加载最好模型
    # load vocab
    vocab_file = model_file_list[j] + '/vocab.pkl'
    vocab = Vocab(vocab_file, load=True)
    assert opt[
        'vocab_size'] == vocab.size, "Vocab size must match that in the saved model."

    # load data
    data_file = opt['data_dir'] + '/{}.json'.format(args.dataset)
    print("Loading data from {} with batch size {}...".format(
        data_file, opt['batch_size']))
    batch = DataLoader(data_file,
                       opt['batch_size'],
                       opt,
                       vocab,
Beispiel #6
0
opt['model_save_dir'] = model_save_dir
helper.ensure_dir(model_save_dir, verbose=True)

# save config
helper.save_config(opt, model_save_dir + '/config.json', verbose=True)
vocab.save(model_save_dir + '/vocab.pkl')
file_logger = helper.FileLogger(
    model_save_dir + '/' + opt['log'],
    header="# epoch\ttrain_loss\tdev_loss\tdev_score\tbest_dev_score")

# print model info
helper.print_config(opt)

# model
if not opt['load']:
    trainer = GCNTrainer(opt, emb_matrix=emb_matrix)
else:
    # load pretrained model
    model_file = opt['model_file']
    print("Loading model from {}".format(model_file))
    model_opt = torch_utils.load_config(model_file)
    model_opt['optim'] = opt['optim']
    trainer = GCNTrainer(model_opt)
    trainer.load(model_file)

id2label = dict([(v, k) for k, v in label2id.items()])
dev_score_history = []
current_lr = opt['lr']
lr_change = True

global_step = 0
Beispiel #7
0
opt['model_save_dir'] = model_save_dir
helper.ensure_dir(model_save_dir, verbose=True)

# save config
helper.save_config(opt, model_save_dir + '/config.json', verbose=True)
vocab.save(model_save_dir + '/vocab.pkl')
file_logger = helper.FileLogger(
    model_save_dir + '/' + opt['log'],
    header="# epoch\ttrain_loss\tdev_loss\tdev_score\tbest_dev_score")

# print model info
helper.print_config(opt)

# model
if not opt['load']:
    trainer = GCNTrainer(opt, emb_matrix=emb_matrix)
else:
    # load pretrained model
    model_file = opt['model_file']
    print("Loading model from {}".format(model_file))
    model_opt = torch_utils.load_config(model_file)
    model_opt['optim'] = opt['optim']
    trainer = GCNTrainer(model_opt)
    trainer.load(model_file)

id2label = dict([(v, k) for k, v in label2id.items()])
dev_score_history = []
current_lr = opt['lr']

global_step = 0
global_start_time = time.time()
Beispiel #8
0
    header="# epoch\ttrain_loss\ttest_loss\ttrain_score\tbest_train_score")

test_save_dir = os.path.join(opt['test_save_dir'], opt['id'])
os.makedirs(test_save_dir, exist_ok=True)
test_save_file = os.path.join(test_save_dir, 'test_records.pkl')
test_confusion_save_file = os.path.join(test_save_dir,
                                        'test_confusion_matrix.pkl')
train_confusion_save_file = os.path.join(test_save_dir,
                                         'train_confusion_matrix.pkl')
deprel_save_file = os.path.join(test_save_dir, 'deprel_embs.pkl')
# print model info
helper.print_config(opt)

# model
if not opt['load']:
    trainer = GCNTrainer(opt, emb_matrix=emb_matrix)
else:
    # load pretrained model
    model_file = opt['model_file']
    print("Loading model from {}".format(model_file))
    model_opt = torch_utils.load_config(model_file)
    model_opt['optim'] = opt['optim']
    trainer = GCNTrainer(model_opt)
    trainer.load(model_file)

id2label = dict([(v, k) for k, v in label2id.items()])
train_score_history = []
current_lr = opt['lr']

global_step = 0
global_start_time = time.time()
Beispiel #9
0
 # if not os.path.exists(model_file):
 #     model_file="saved_models/02/"+"best_model_aug.pt"
 train_batch = DataLoader([opt['data_dir'] + '/train_coref.json'], opt['batch_size'], opt, vocab, evaluation=False,is_aug=False,corefresolve=True)
 dev_batch = DataLoader([opt['data_dir'] + '/dev_rev_coref.json'], opt['batch_size'], opt, vocab, evaluation=True,corefresolve=True)
 test_batch = DataLoader([opt['data_dir'] + '/test_rev_coref.json'], opt['batch_size'], opt, vocab, evaluation=True,corefresolve=True)
 # if dev_batch.num_examples==0 or test_batch.num_examples==0:
 #     continue
 max_steps = len(train_batch) * opt['num_epoch']
 global_step = 0
 global_start_time = time.time()
 format_str = '{}: step {}/{} (epoch {}/{}), loss = {:.6f} ({:.3f} sec/batch), lr: {:.6f}'
 score_history = []
 test_score_history=[]
 current_lr = opt['lr']
 if not opt['load']:
     trainer = GCNTrainer(opt, lbstokens=lbstokens,emb_matrix=emb_matrix)
 else:
     #load pretrained model
     model_file = opt['model_file']
     print("Loading model from {}".format(model_file))
     model_opt = torch_utils.load_config(model_file)
     model_opt['optim'] = opt['optim']
     model_opt['lr'] = opt['lr']
     model_opt['lr_decay'] = opt['lr_decay']
     trainer = GCNTrainer(model_opt,lbstokens=lbstokens)
     trainer.load(model_file)
     # print("Evaluating on dev set...")
     # predictions = []
     # dev_loss = 0
     # for i, batch in enumerate(dev_batch):
     #     preds, probs, loss, samples = trainer.predict(batch)
    help='When provided predictions and gold will be outputed in kind')

args = parser.parse_args()

torch.manual_seed(args.seed)
random.seed(args.seed)
if args.cpu:
    args.cuda = False
elif args.cuda:
    torch.cuda.manual_seed(args.seed)

# load opt
model_file = args.model_dir + '/' + args.model
print("Loading model from {}".format(model_file))
opt = torch_utils.load_config(model_file)
trainer = GCNTrainer(opt)
trainer.load(model_file)

# load vocab
vocab_file = args.model_dir + '/vocab.pkl'
vocab = Vocab(vocab_file, load=True)
assert opt[
    'vocab_size'] == vocab.size, "Vocab size must match that in the saved model."

# UCCA Embedding?
ucca_embedding = None
if opt['ucca_embedding_dim'] > 0:
    embedding_file = opt['ucca_embedding_dir'] + '/' + opt[
        'ucca_embedding_file']
    index_file = opt['ucca_embedding_dir'] + '/' + opt[
        'ucca_embedding_index_file']
Beispiel #11
0
args = parser.parse_args()
args.cpu = True
torch.manual_seed(args.seed)
random.seed(1234)
if args.cpu:
    args.cuda = False
elif args.cuda:
    torch.cuda.manual_seed(args.seed)

# load opt
model_file = args.model_dir + '/' + args.model
print("Loading model from {}".format(model_file))
opt = torch_utils.load_config(model_file)
opt['cuda'] = torch.cuda.is_available()
opt['cpu'] = not torch.cuda.is_available()
trainer = GCNTrainer(opt)
trainer.load(model_file)
trainer.opt['cuda'] = torch.cuda.is_available()
trainer.opt['cpu'] = not torch.cuda.is_available()
# load vocab
vocab_file = args.model_dir + '/vocab.pkl'
vocab = Vocab(vocab_file, load=True)
assert opt[
    'vocab_size'] == vocab.size, "Vocab size must match that in the saved model."

# load data
data_file = args.data_dir + '/{}.json'.format(args.dataset)
print("Loading data from {} with batch size {}...".format(
    data_file, opt['batch_size']))
batch = DataLoader(data_file, opt['batch_size'], opt, vocab, evaluation=True)
Beispiel #12
0
#model_id = opt['id'] if len(opt['id']) > 1 else '0' + opt['id']
#model_save_dir = opt['save_dir'] + '/' + model_id
#opt['model_save_dir'] = model_save_dir
#helper.ensure_dir(model_save_dir, verbose=True)

# save config
#helper.save_config(opt, model_save_dir + '/config.json', verbose=True)
#vocab.save(model_save_dir + '/vocab.pkl')
#file_logger = helper.FileLogger(model_save_dir + '/' + opt['log'], header="# epoch\ttrain_loss\tdev_loss\tdev_precision\tdev recall\tdev f1\tbest f1")

# print model info
#helper.print_config(opt)
#print(np.array(meshvec_table))
# model
if not opt['load']:
    trainer = GCNTrainer(opt,knowledge_emb=np.array(meshvec_table),word_emb=np.array(wordvec_table))
    #print (trainer)
else:
    # load pretrained model
    model_file = opt['model_file']
    print("Loading model from {}".format(model_file))
    model_opt = torch_utils.load_config(model_file)
    model_opt['optim'] = opt['optim']
    trainer = GCNTrainer(model_opt)
    trainer.load(model_file)


    print("Evaluating on dev set...")
    gold_answer=[]
    predictions = []
    doc_id_list = []
helper.ensure_dir(model_save_dir, verbose=True)

# save config
helper.save_config(opt, model_save_dir + '/config.json', verbose=True)
vocab.save(model_save_dir + '/vocab.pkl')
file_logger = helper.FileLogger(
    model_save_dir + '/' + opt['log'],
    header="# epoch\ttrain_loss\tdev_loss\tdev_score\tbest_dev_score")

# print model info
helper.print_config(opt)

# model
if not opt['load']:
    trainer = GCNTrainer(opt,
                         emb_matrix=emb_matrix,
                         ucca_embedding_matrix=ucca_embedding.embedding_matrix
                         if ucca_embedding else None)
else:
    # load pretrained model
    model_file = opt['model_file']
    print("Loading model from {}".format(model_file))
    model_opt = torch_utils.load_config(model_file)
    model_opt['optim'] = opt['optim']
    trainer = GCNTrainer(model_opt)
    trainer.load(model_file)

# The id2label[0] = 'no_relation' assignment is necessary for when --binary_classification is active
id2label = dict([(v, k) for k, v in label2id.items()])
id2label[0] = 'no_relation'

dev_score_history = []
args = parser.parse_args()


torch.manual_seed(args.seed)
random.seed(args.seed)
if args.cpu:
    args.cuda = False
elif args.cuda:
    torch.cuda.manual_seed(args.seed)

# load opt
model_file = args.model_dir + '/' + args.model
print("Loading model from {}".format(model_file))
opt = torch_utils.load_config(model_file)
trainer = GCNTrainer(opt)
trainer.load(model_file)

# load vocab
vocab_file = args.model_dir + '/vocab.pkl'
vocab = Vocab(vocab_file, load=True)
assert opt['vocab_size'] == vocab.size, "Vocab size must match that in the saved model."

# UCCA Embedding?
ucca_embedding = None
if opt['ucca_embedding_dim'] > 0:
    embedding_file = opt['ucca_embedding_dir'] + '/' + opt['ucca_embedding_file']
    index_file = opt['ucca_embedding_dir'] + '/' +  opt['ucca_embedding_index_file']
    ucca_embedding =  UccaEmbedding(opt['ucca_embedding_dim'], index_file, embedding_file)