def main(): best_result = float("-inf") logger.info("Loading data...") train_itr = DropBatchGen(args, data_mode="train", tokenizer=tokenizer) dev_itr = DropBatchGen(args, data_mode="dev", tokenizer=tokenizer) num_train_steps = int(args.max_epoch * len(train_itr) / args.gradient_accumulation_steps) logger.info("Num update steps {}!".format(num_train_steps)) logger.info("Build bert model.") bert_model = RobertaModel.from_pretrained(args.roberta_model) logger.info("Build Drop model.") network = NumericallyAugmentedBertNet( bert_model, hidden_size=bert_model.config.hidden_size, dropout_prob=args.dropout, use_gcn=args.use_gcn, gcn_steps=args.gcn_steps) logger.info("Build optimizer etc...") model = DropBertModel(args, network, num_train_step=num_train_steps) train_start = datetime.now() first = True for epoch in range(1, args.max_epoch + 1): model.avg_reset() if not first: train_itr.reset() first = False logger.info('At epoch {}'.format(epoch)) for step, batch in enumerate(train_itr): model.update(batch) if model.step % ( args.log_per_updates * args.gradient_accumulation_steps) == 0 or model.step == 1: logger.info( "Updates[{0:6}] train loss[{1:.5f}] train em[{2:.5f}] f1[{3:.5f}] remaining[{4}]" .format( model.updates, model.train_loss.avg, model.em_avg.avg, model.f1_avg.avg, str((datetime.now() - train_start) / (step + 1) * (num_train_steps - step - 1)).split('.')[0])) model.avg_reset() total_num, eval_loss, eval_em, eval_f1 = model.evaluate(dev_itr) logger.info( "Eval {} examples, result in epoch {}, eval loss {}, eval em {} eval f1 {}." .format(total_num, epoch, eval_loss, eval_em, eval_f1)) if eval_f1 > best_result: save_prefix = os.path.join(args.save_dir, "checkpoint_best") model.save(save_prefix, epoch) best_result = eval_f1 logger.info("Best eval F1 {} at epoch {}".format( best_result, epoch)) logger.info("done training in {} seconds!".format( (datetime.now() - train_start).seconds))
parser = argparse.ArgumentParser("Bert inference task.") options.add_bert_args(parser) options.add_model_args(parser) options.add_inference_args(parser) args = parser.parse_args() args.cuda = torch.cuda.device_count() > 0 print("Build bert model.") bert_model = RobertaModel(RobertaConfig().from_pretrained(args.roberta_model)) print("Build Drop model.") network = NumericallyAugmentedBertNet(bert_model, hidden_size=bert_model.config.hidden_size, dropout_prob=0.0, use_gcn=args.use_gcn, gcn_steps=args.gcn_steps) if args.cuda: network.cuda() print("Load from pre path {}.".format(args.pre_path)) network.load_state_dict(torch.load(args.pre_path)) print("Load data from {}.".format(args.inf_path)) tokenizer = RobertaTokenizer.from_pretrained(args.roberta_model) inf_iter = DropBatchGen(args, tokenizer, DropReader(tokenizer, passage_length_limit=463, question_length_limit=46)._read(args.inf_path)) print("Start inference...") result = {} network.eval() with torch.no_grad():
if args.eng == 0: bert_model = BertModel.from_pretrained(args.roberta_model) else: bert_model = RobertaModel.from_pretrained(args.roberta_model) print("Build Drop model.") if args.tag_mspan: network = TNumericallyAugmentedBertNet(bert_model, hidden_size=bert_model.config.hidden_size, dropout_prob=0.0, use_gcn=args.use_gcn, gcn_steps=args.gcn_steps, is_eng=args.eng) else: network = NumericallyAugmentedBertNet(bert_model, hidden_size=bert_model.config.hidden_size, dropout_prob=0.0, use_gcn=args.use_gcn, gcn_steps=args.gcn_steps) if args.cuda: network.cuda() print("Load from pre path {}.".format(args.pre_path)) network.load_state_dict(torch.load(args.pre_path)) print("Load data from {}.".format(args.inf_path)) if args.eng != 0: tokenizer = RobertaTokenizer.from_pretrained(args.roberta_model) else: # import pdb; pdb.set_trace() tokenizer = AutoTokenizer.from_pretrained(args.roberta_model) if args.tag_mspan: inf_iter = TDropBatchGen(args, tokenizer,