Ejemplo n.º 1
0
def main(output_model_file='./models/bert-base-uncased.bin',
         load=False,
         mode='tensors',
         batch_size=12,
         num_epoch=1,
         gradient_accumulation_steps=1,
         lr1=1e-4,
         lr2=1e-4,
         alpha=0.2):

    BERT_MODEL = 'bert-base-uncased'  # bert-large is too large for ordinary GPU on task #2
    tokenizer = BertTokenizer.from_pretrained(BERT_MODEL, do_lower_case=True)
    with open('./hotpot_train_v1.1_refined.json', 'r') as fin:
        dataset = json.load(fin)
    bundles = []
    for data in tqdm(dataset):
        try:
            bundles.append(convert_question_to_samples_bundle(tokenizer, data))
        except ValueError as err:
            pass
        # except Exception as err:
        #     traceback.print_exc()
        #     pass
    device = torch.device(
        'cpu') if not torch.cuda.is_available() else torch.device('cuda')
    if load:
        print('Loading model from {}'.format(output_model_file))
        model_state_dict = torch.load(output_model_file)
        model1 = BertForMultiHopQuestionAnswering.from_pretrained(
            BERT_MODEL, state_dict=model_state_dict['params1'])
        model2 = CognitiveGNN(model1.config.hidden_size)
        model2.load_state_dict(model_state_dict['params2'])

    else:
        model1 = BertForMultiHopQuestionAnswering.from_pretrained(
            BERT_MODEL,
            cache_dir=PYTORCH_PRETRAINED_BERT_CACHE /
            'distributed_{}'.format(-1))
        model2 = CognitiveGNN(model1.config.hidden_size)

    print('Start Training... on {} GPUs'.format(torch.cuda.device_count()))
    model1 = torch.nn.DataParallel(model1,
                                   device_ids=range(torch.cuda.device_count()))
    model1, model2 = train(
        bundles,
        model1=model1,
        device=device,
        mode=mode,
        model2=model2,  # Then pass hyperparams
        batch_size=batch_size,
        num_epoch=num_epoch,
        gradient_accumulation_steps=gradient_accumulation_steps,
        lr1=lr1,
        lr2=lr2,
        alpha=alpha)

    print('Saving model to {}'.format(output_model_file))
    saved_dict = {'params1': model1.module.state_dict()}
    saved_dict['params2'] = model2.state_dict()
    torch.save(saved_dict, output_model_file)
Ejemplo n.º 2
0
def main(output_model_file='./models/bert-base-uncased.bin',
         load=False,
         mode='tensors',
         batch_size=12,
         lr=1e-4):
    BERT_MODEL = 'bert-base-uncased'
    tokenizer = BertTokenizer.from_pretrained(BERT_MODEL, do_lower_case=True)
    with open('./hotpot_train_v1.1_refined3.json', 'r') as fin:
        dataset = json.load(fin)
    bundles = []
    for data in tqdm(dataset[:100]):
        try:
            bundles.append(convert_question_to_samples_bundle(tokenizer, data))
        except Exception as err:
            # traceback.print_exc()
            # print(data['question'])
            pass
    device = torch.device(
        'cpu') if not torch.cuda.is_available() else torch.device('cuda')
    if load:
        print('Loading model from {}'.format(output_model_file))
        model_state_dict = torch.load(output_model_file)
        model = BertForMultiHopQuestionAnswering.from_pretrained(
            BERT_MODEL, state_dict=model_state_dict['bert-params'])
        model_cg = CognitiveGraph(model.config.hidden_size)
        #model_cg.load_state_dict(model_state_dict['cg-params'])

    else:
        model = BertForMultiHopQuestionAnswering.from_pretrained(
            BERT_MODEL,
            cache_dir=PYTORCH_PRETRAINED_BERT_CACHE /
            'distributed_{}'.format(-1))
        model_cg = CognitiveGraph(model.config.hidden_size)

    print('Start Training... on {} GPUs'.format(torch.cuda.device_count()))
    model = torch.nn.DataParallel(model,
                                  device_ids=range(torch.cuda.device_count()))
    model, model_cg = train(bundles,
                            model,
                            device,
                            batch_size=batch_size,
                            model_cg=model_cg,
                            mode=mode,
                            lr=lr)
    print('Saving model to {}'.format(output_model_file))
    saved_dict = {'bert-params': model.module.state_dict()}
    saved_dict['cg-params'] = model_cg.state_dict()
    torch.save(saved_dict, output_model_file)
Ejemplo n.º 3
0
def main(BERT_MODEL='bert-base-uncased', model_file='./models/bert-base-uncased.bin', data_file='./hotpot_dev_distractor_v1.json', max_new_nodes=5):
    setting = 'distractor' if data_file.find('distractor') >= 0 else 'fullwiki'
    with open(data_file, 'r') as fin:
        dataset = json.load(fin)
    tokenizer = BertTokenizer.from_pretrained(BERT_MODEL, do_lower_case=True)
    device = torch.device('cpu') if not torch.cuda.is_available() else torch.device('cuda')
    print('Loading model from {}'.format(model_file))
    model_state_dict = torch.load(model_file)
    model1 = BertForMultiHopQuestionAnswering.from_pretrained(BERT_MODEL, state_dict=model_state_dict['params1'])
    model2 = CognitiveGNN(model1.config.hidden_size)
    model2.load_state_dict(model_state_dict['params2'])
    sp, answer, graphs = {}, {}, {}
    print('Start Training... on {} GPUs'.format(torch.cuda.device_count()))
    model1 = torch.nn.DataParallel(model1, device_ids = range(torch.cuda.device_count()))
    model1.to(device).eval()
    model2.to(device).eval()

    with torch.no_grad():
        for data in tqdm(dataset):
            gold, ans, graph_ret, ans_nodes = cognitive_graph_propagate(tokenizer, data, model1, model2, device, setting = setting, max_new_nodes=max_new_nodes)
            sp[data['_id']] = list(gold)
            answer[data['_id']] = ans
            graphs[data['_id']] = graph_ret + ['answer_nodes: ' + ', '.join(ans_nodes)]
    pred_file = data_file.replace('.json', '_pred.json')
    with open(pred_file, 'w') as fout:
        json.dump({'answer': answer, 'sp': sp, 'graphs': graphs}, fout)
Ejemplo n.º 4
0
            train_bundles.append(
                convert_question_to_samples_bundle(tokenizer, data))
        except ValueError as err:
            pass
    valid_bundles = []
    # for data in tqdm(valid_data):
    #     try:
    #         valid_bundles.append(convert_question_to_samples_bundle(tokenizer, data))
    #     except ValueError as err:
    #         pass

    if not args.load:
        # Task #1
        model1 = BertForMultiHopQuestionAnswering.from_pretrained(
            args.bert_model,
            cache_dir=PYTORCH_PRETRAINED_BERT_CACHE /
            "distributed_{}".format(-1),
        )
        model2 = CognitiveGNN(model1.config.hidden_size, model1.config, 'gcn')
    else:
        # Task #2
        print("Loading model from {}".format(args.load_path))
        model_state_dict = torch.load(args.load_path)
        model1 = BertForMultiHopQuestionAnswering.from_pretrained(
            args.bert_model, state_dict=model_state_dict["params1"])
        hidden_size = model1.config.hidden_size
        model2 = CognitiveGNN(hidden_size, model1.config, args.sys2)
        model2.load_state_dict(model_state_dict["params2"])
        if args.sys2 == "xattn":
            from model import XAttn
            model2.gcn = XAttn(model1.config.hidden_size,