def main(output_model_file='./models/bert-base-uncased.bin', load=False, mode='tensors', batch_size=12, num_epoch=1, gradient_accumulation_steps=1, lr1=1e-4, lr2=1e-4, alpha=0.2): BERT_MODEL = 'bert-base-uncased' # bert-large is too large for ordinary GPU on task #2 tokenizer = BertTokenizer.from_pretrained(BERT_MODEL, do_lower_case=True) with open('./hotpot_train_v1.1_refined.json', 'r') as fin: dataset = json.load(fin) bundles = [] for data in tqdm(dataset): try: bundles.append(convert_question_to_samples_bundle(tokenizer, data)) except ValueError as err: pass # except Exception as err: # traceback.print_exc() # pass device = torch.device( 'cpu') if not torch.cuda.is_available() else torch.device('cuda') if load: print('Loading model from {}'.format(output_model_file)) model_state_dict = torch.load(output_model_file) model1 = BertForMultiHopQuestionAnswering.from_pretrained( BERT_MODEL, state_dict=model_state_dict['params1']) model2 = CognitiveGNN(model1.config.hidden_size) model2.load_state_dict(model_state_dict['params2']) else: model1 = BertForMultiHopQuestionAnswering.from_pretrained( BERT_MODEL, cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(-1)) model2 = CognitiveGNN(model1.config.hidden_size) print('Start Training... on {} GPUs'.format(torch.cuda.device_count())) model1 = torch.nn.DataParallel(model1, device_ids=range(torch.cuda.device_count())) model1, model2 = train( bundles, model1=model1, device=device, mode=mode, model2=model2, # Then pass hyperparams batch_size=batch_size, num_epoch=num_epoch, gradient_accumulation_steps=gradient_accumulation_steps, lr1=lr1, lr2=lr2, alpha=alpha) print('Saving model to {}'.format(output_model_file)) saved_dict = {'params1': model1.module.state_dict()} saved_dict['params2'] = model2.state_dict() torch.save(saved_dict, output_model_file)
def main(output_model_file='./models/bert-base-uncased.bin', load=False, mode='tensors', batch_size=12, lr=1e-4): BERT_MODEL = 'bert-base-uncased' tokenizer = BertTokenizer.from_pretrained(BERT_MODEL, do_lower_case=True) with open('./hotpot_train_v1.1_refined3.json', 'r') as fin: dataset = json.load(fin) bundles = [] for data in tqdm(dataset[:100]): try: bundles.append(convert_question_to_samples_bundle(tokenizer, data)) except Exception as err: # traceback.print_exc() # print(data['question']) pass device = torch.device( 'cpu') if not torch.cuda.is_available() else torch.device('cuda') if load: print('Loading model from {}'.format(output_model_file)) model_state_dict = torch.load(output_model_file) model = BertForMultiHopQuestionAnswering.from_pretrained( BERT_MODEL, state_dict=model_state_dict['bert-params']) model_cg = CognitiveGraph(model.config.hidden_size) #model_cg.load_state_dict(model_state_dict['cg-params']) else: model = BertForMultiHopQuestionAnswering.from_pretrained( BERT_MODEL, cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(-1)) model_cg = CognitiveGraph(model.config.hidden_size) print('Start Training... on {} GPUs'.format(torch.cuda.device_count())) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) model, model_cg = train(bundles, model, device, batch_size=batch_size, model_cg=model_cg, mode=mode, lr=lr) print('Saving model to {}'.format(output_model_file)) saved_dict = {'bert-params': model.module.state_dict()} saved_dict['cg-params'] = model_cg.state_dict() torch.save(saved_dict, output_model_file)
def main(BERT_MODEL='bert-base-uncased', model_file='./models/bert-base-uncased.bin', data_file='./hotpot_dev_distractor_v1.json', max_new_nodes=5): setting = 'distractor' if data_file.find('distractor') >= 0 else 'fullwiki' with open(data_file, 'r') as fin: dataset = json.load(fin) tokenizer = BertTokenizer.from_pretrained(BERT_MODEL, do_lower_case=True) device = torch.device('cpu') if not torch.cuda.is_available() else torch.device('cuda') print('Loading model from {}'.format(model_file)) model_state_dict = torch.load(model_file) model1 = BertForMultiHopQuestionAnswering.from_pretrained(BERT_MODEL, state_dict=model_state_dict['params1']) model2 = CognitiveGNN(model1.config.hidden_size) model2.load_state_dict(model_state_dict['params2']) sp, answer, graphs = {}, {}, {} print('Start Training... on {} GPUs'.format(torch.cuda.device_count())) model1 = torch.nn.DataParallel(model1, device_ids = range(torch.cuda.device_count())) model1.to(device).eval() model2.to(device).eval() with torch.no_grad(): for data in tqdm(dataset): gold, ans, graph_ret, ans_nodes = cognitive_graph_propagate(tokenizer, data, model1, model2, device, setting = setting, max_new_nodes=max_new_nodes) sp[data['_id']] = list(gold) answer[data['_id']] = ans graphs[data['_id']] = graph_ret + ['answer_nodes: ' + ', '.join(ans_nodes)] pred_file = data_file.replace('.json', '_pred.json') with open(pred_file, 'w') as fout: json.dump({'answer': answer, 'sp': sp, 'graphs': graphs}, fout)
train_bundles.append( convert_question_to_samples_bundle(tokenizer, data)) except ValueError as err: pass valid_bundles = [] # for data in tqdm(valid_data): # try: # valid_bundles.append(convert_question_to_samples_bundle(tokenizer, data)) # except ValueError as err: # pass if not args.load: # Task #1 model1 = BertForMultiHopQuestionAnswering.from_pretrained( args.bert_model, cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / "distributed_{}".format(-1), ) model2 = CognitiveGNN(model1.config.hidden_size, model1.config, 'gcn') else: # Task #2 print("Loading model from {}".format(args.load_path)) model_state_dict = torch.load(args.load_path) model1 = BertForMultiHopQuestionAnswering.from_pretrained( args.bert_model, state_dict=model_state_dict["params1"]) hidden_size = model1.config.hidden_size model2 = CognitiveGNN(hidden_size, model1.config, args.sys2) model2.load_state_dict(model_state_dict["params2"]) if args.sys2 == "xattn": from model import XAttn model2.gcn = XAttn(model1.config.hidden_size,