Esempio n. 1
0
                                order_field=ORDER,
                                graph_field=GRAPH)

        dev_data = DocDataset(path=args.valid,
                              text_field=DOC,
                              order_field=ORDER,
                              graph_field=GRAPH)

        DOC.vocab = torch.load(args.vocab)
        print('vocab {} loaded'.format(args.vocab))
        args.__dict__.update({'doc_vocab': len(DOC.vocab)})

        train_flag = True
        train_real = DocIter(train_data,
                             args.batch_size,
                             device="cuda" if args.gpu else "cpu",
                             train=train_flag,
                             shuffle=train_flag,
                             sort_key=lambda x: len(x.doc))

        devbatch = 1
        dev_real = DocIter(dev_data,
                           devbatch,
                           device="cuda" if args.gpu else "cpu",
                           batch_size_fn=None,
                           train=False,
                           repeat=False,
                           shuffle=False,
                           sort=False)

        args_str = json.dumps(args.__dict__, indent=4, sort_keys=True)
        print(args_str)
Esempio n. 2
0
def train(args, train_iter, dev, fields, checkpoint):
    if is_cqa_task():
        model = CqaNet(args)
    else:
        model = PointerNet(args)
    # 
    model.cuda()

    DOC, ORDER, GRAPH = fields
    print('1:', DOC.vocab.itos[1])
    model.load_pretrained_emb(DOC.vocab.vectors)

    print_params(model)
    print(model)

    wd = 1e-5
    opt = torch.optim.Adadelta(model.parameters(), lr=args.lr, rho=0.95, weight_decay=wd)

    best_answer_type_score = -np.inf
    best_entity_score = -np.inf
    best_iter = 0
    offset = 0

    criterion = nn.NLLLoss(reduction='none')
    model.equip(criterion)

    start = time.time()

    early_stop = args.early_stop

    test_data = DocDataset(path=args.test, text_field=DOC, order_field=ORDER, graph_field=GRAPH)
    test_real = DocIter(test_data, 1, device='cuda', batch_size_fn=None,
                        train=False, repeat=False, shuffle=False, sort=False)

    fake_epc=-1
    is_validate_before_train=False
    timer=Timer()
    if is_validate_before_train:
        validate(args,   dev,  checkpoint,model,DOC,fake_epc,best_answer_type_score,best_iter,is_validate_before_train,best_entity_score,timer)
    
    
    for epc in range(args.maximum_steps):
        for iters, batch in enumerate(train_iter):
            model.train()

            model.zero_grad()

            t1 = time.time()

            loss = model(batch.doc, batch.order, batch.doc_len, batch.e_words, batch.elocs,batch.labels,batch.answer_types)

            loss.backward()
            opt.step()

            t2 = time.time()
            print('epc:{} iter:{} loss:{:.2f} t:{:.2f} lr:{:.1e}'.format(epc, iters + 1, loss, t2 - t1,
                                                                         opt.param_groups[0]['lr']))

        if epc < 5:
            print(f"finish epoch {epc}, lastest loss{loss}")
            continue

        best_answer_type_score,best_iter,best_entity_score,is_early_stop=validate(args,   dev,  checkpoint,model,DOC,epc,best_answer_type_score,best_iter,False,best_entity_score,timer)
        if is_early_stop:
            break

    print('\n*******Train Done********{}'.format(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
    minutes = (time.time() - start) // 60
    if minutes < 60:
        print('best:{:.2f}, iter:{}, time:{} mins, lr:{:.1e}, '.format(best_answer_type_score, best_iter, minutes,
                                                                       opt.param_groups[0]['lr']))
    else:
        hours = minutes / 60
        print('best:{:.2f}, iter:{}, time:{:.1f} hours, lr:{:.1e}, '.format(best_answer_type_score, best_iter, hours,
                                                                            opt.param_groups[0]['lr']))

    checkpoint = torch.load('{}/{}.best.pt'.format(args.model_path, args.model), map_location='cpu')
    model.load_state_dict(checkpoint['model'])

    with torch.no_grad():
        if args.loss:
            entity_score = valid_model(args, model, dev, DOC, 'loss')
            print('epc:{}, loss:{:.2f} best:{:.2f}\n'.format(epc, entity_score, best_answer_type_score))
        else:
            entity_acc, answer_type_acc, ktau, pm  = valid_model(args, model, dev, DOC)
            print('test entity_acc:{:.4%} answer_type_acc:{:.2%} ktau:{:.4f} pm:{:.2%}'.format(entity_acc, answer_type_acc, ktau, pm))
def train(args, train_iter, dev, fields, checkpoint):
    model = PointerNet(args)

    if args.gpu:
        model = model.cuda()

    DOC, ORDER, GRAPH = fields
    print('1:', DOC.vocab.itos[1])
    if DOC.vocab.vectors:
        model.load_pretrained_emb(DOC.vocab.vectors)

    print_params(model)
    print(model)

    opt = Optimizer.from_opt(model, args)
    # wd = 1e-5
    # opt = torch.optim.Adadelta(model.parameters(), lr=args.lr, rho=0.95, weight_decay=wd)

    best_score = -np.inf
    best_iter = 0
    offset = 0

    criterion = nn.NLLLoss(reduction='none')
    model.equip(criterion)

    start = time.time()

    early_stop = args.early_stop

    test_data = DocDataset(path=args.test,
                           text_field=DOC,
                           order_field=ORDER,
                           graph_field=GRAPH)
    test_real = DocIter(test_data,
                        1,
                        device="cuda" if args.gpu else "cpu",
                        batch_size_fn=None,
                        train=False,
                        repeat=False,
                        shuffle=False,
                        sort=False)

    steps = 0
    total_loss = 0
    epc = 0

    t1 = time.time()
    # for epc in range(args.maximum_steps):
    while True:
        for iters, batch in enumerate(train_iter):
            model.train()

            model.zero_grad()

            # print("batch.doc:", batch.doc)

            loss = model(batch.doc, batch.order, batch.doc_len, batch.e_words,
                         batch.elocs)

            loss.backward()
            opt.step()

            steps += 1
            total_loss += loss
            if steps % args.report == 0:
                t2 = time.time()
                total_loss /= args.report
                print(
                    'epc:{} steps:{}/{} loss:{:.4f} t:{:.2f} lr:{:.8f}'.format(
                        epc, steps, args.maximum_steps, total_loss, t2 - t1,
                        opt.learning_rate()))
                total_loss = 0
                t1 = t2
            if steps % args.valid_steps == 0:

                with torch.no_grad():
                    print('valid..............')
                    if args.loss:
                        score = valid_model(args, model, dev, DOC, 'loss')
                        print('epc:{}, loss:{:.2f} best:{:.2f}\n'.format(
                            epc, score, best_score))
                    else:
                        score, pmr, ktau, _ = valid_model(
                            args, model, dev, DOC)
                        # print('epc:{}, val acc:{:.4f} best:{:.4f} pmr:{:.2f} ktau:{:.4f}'.format(epc, score, best_score,
                        #                                                                       pmr, ktau))
                        print('Val acc:{:.4f} pmr:{:.2f} ktau:{:.4f}'.format(
                            score, pmr, ktau))

                        checkpoint = {
                            'model': model.state_dict(),
                            'args': args,
                            'loss': best_score
                        }
                        torch.save(checkpoint,
                                   '{}/{}.pt'.format(args.model_path, steps))
            # if early_stop and (epc - best_iter) >= early_stop:
            #     print('early stop at epc {}'.format(epc))
            #     break
        epc += 1
        if steps >= args.maximum_steps:
            break
    print('\n*******Train Done********{}'.format(
        time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
Esempio n. 4
0
def train(args, train_iter, dev, fields, checkpoint, permutation_file,
          permutation_length):
    if is_permutation_task():
        model = PermutationPredictor(args)
    else:
        model = PointerNet(args)
    if checkpoint != None:
        model.load_state_dict(checkpoint['model'], strict=False)
    model.cuda()

    DOC, ORDER, GRAPH = fields
    print('1:', DOC.vocab.itos[1])
    model.load_pretrained_emb(DOC.vocab.vectors)

    print_params(model)
    print(model)

    wd = 1e-5
    opt = torch.optim.Adadelta(model.parameters(),
                               lr=args.lr,
                               rho=0.95,
                               weight_decay=wd)

    best_score = -np.inf
    best_iter = 0
    offset = 0

    criterion = nn.NLLLoss(reduction='none')
    model.equip(criterion)

    start = time.time()

    early_stop = args.early_stop

    test_data = DocDataset(path=args.test,
                           text_field=DOC,
                           order_field=ORDER,
                           graph_field=GRAPH,
                           permutation_length=permutation_length,
                           permutation_number=args.permutation_number)
    test_real = DocIter(test_data,
                        1,
                        permutation_file,
                        permutation_length,
                        device='cuda',
                        batch_size_fn=None,
                        train=False,
                        repeat=False,
                        shuffle=False,
                        sort=False)

    history = {"acc": [], "loss": [], "val_acc": [], "epoch": []}
    val_loss = 0.0
    val_steps = 0
    for epc in range(args.maximum_steps):

        for iters, batch in enumerate(train_iter):
            model.train()

            model.zero_grad()

            t1 = time.time()

            loss = model(batch.doc, batch.order, batch.doc_len, batch.e_words,
                         batch.elocs, batch.permutation_lables)

            loss.backward()
            opt.step()

            t2 = time.time()
            print('epc:{} iter:{} loss:{:.2f} t:{:.2f} lr:{:.1e}'.format(
                epc, iters + 1, loss, t2 - t1, opt.param_groups[0]['lr']))
            val_loss += loss.detach().cpu().numpy()
            val_steps += 1
        if epc < 5:
            continue

        with torch.no_grad():
            print('valid..............')
            if args.loss:
                score = valid_model(args, model, dev, DOC, 'loss')
                print('epc:{}, loss:{:.2f} best:{:.2f}\n'.format(
                    epc, score, best_score))
            else:
                score, pmr, ktau, _ = valid_model(args, model, dev, DOC)
                history["epoch"].append(epc)
                history["acc"].append(score)
                print(
                    'epc:{}, val acc:{:.4f} best:{:.4f} pmr:{:.2f} ktau:{:.4f}'
                    .format(epc, score, best_score, pmr, ktau))

            if score > best_score:
                best_score = score
                best_iter = epc

                print('save best model at epc={}'.format(epc))
                checkpoint = {
                    'model': model.state_dict(),
                    'args': args,
                    'loss': best_score
                }

                if is_permutation_task():
                    path = '{}/{}.best.pt'.format(args.model_path, args.model)
                else:
                    path = '{}/{}.sentence_order_best.pt'.format(
                        args.model_path, args.model)
                torch.save(checkpoint, path)

            if early_stop and (epc - best_iter) >= early_stop:
                print('early stop at epc {}'.format(epc))
                break

    print('\n*******Train Done********{}'.format(
        time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
    minutes = (time.time() - start) // 60
    if minutes < 60:
        print('best:{:.2f}, iter:{}, time:{} mins, lr:{:.1e}, '.format(
            best_score, best_iter, minutes, opt.param_groups[0]['lr']))
    else:
        hours = minutes / 60
        print('best:{:.2f}, iter:{}, time:{:.1f} hours, lr:{:.1e}, '.format(
            best_score, best_iter, hours, opt.param_groups[0]['lr']))

    checkpoint = torch.load(path, map_location='cpu')
    model.load_state_dict(checkpoint['model'])

    with torch.no_grad():
        acc, pmr, ktau, pm = valid_model(args,
                                         model,
                                         test_real,
                                         DOC,
                                         shuflle_times=1)
        print('test acc:{:.4%} pmr:{:.2%} ktau:{:.4f} pm:{:.2%}'.format(
            acc, pmr, ktau, pm))

    loss = val_loss / val_steps
    return best_score, loss
Esempio n. 5
0
def run_model(args):
    if args.mode == 'train' or args.mode=='example':
        if args.load_from is not None and len(args.load_from) == 1:
            load_from = args.load_from[0]
            print('{} load the checkpoint from {} for initilize or resume'.
                  format(curtime(), load_from))
            checkpoint = torch.load(load_from, map_location='cpu')
        else:
            checkpoint = None

        # if not resume(initilize), only need model parameters
        if args.resume:
            print('update args from checkpoint')
            load_dict = checkpoint['args'].__dict__
            except_name = ['mode', 'resume', 'maximum_steps']
            override(args, load_dict, tuple(except_name))

        main_path = Path(args.main_path)
        model_path = main_path / args.model_path
        decoding_path = main_path / args.decoding_path

        for path in [model_path, decoding_path]:
            path.mkdir(parents=True, exist_ok=True)

        args.model_path = str(model_path)
        args.decoding_path = str(decoding_path)

        if args.model == '[time]':
            args.model = time.strftime("%m.%d_%H.%M.", time.gmtime())

        # setup random seeds
        set_seeds(args.seed)

        # special process, shuffle each document
        # DOC = DocField(batch_first=True, include_lengths=True, eos_token='<eos>', init_token='<bos>')
        DOC = DocField(batch_first=True, include_lengths=True)
        ORDER = data.Field(batch_first=True, include_lengths=True, pad_token=0, use_vocab=False,
                           sequential=True)

        GRAPH = GraphField(batch_first=True)
        LABEL_FIELD=data.Field(batch_first=True, include_lengths=True, use_vocab=False)
        train_data = DocDataset(path=args.corpus, text_field=DOC, order_field=ORDER, graph_field=GRAPH)

        dev_data = DocDataset(path=args.valid, text_field=DOC, order_field=ORDER, graph_field=GRAPH)

        DOC.vocab = torch.load(args.vocab)
        print('vocab {} loaded'.format(args.vocab))
        args.__dict__.update({'doc_vocab': len(DOC.vocab)})

        train_flag = True
        train_real = DocIter(train_data, args.batch_size, device='cuda',
                             train=train_flag,
                             shuffle=train_flag,
                             sort_key=lambda x: len(x.doc))

        devbatch = 1
        dev_real = DocIter(dev_data, devbatch, device='cuda', batch_size_fn=None,
                           train=False, repeat=False, shuffle=False, sort=False)

        args_str = json.dumps(args.__dict__, indent=4, sort_keys=True)
        print(args_str)

        print('{} Start training'.format(curtime()))
        train(args, train_real, dev_real, (DOC, ORDER, GRAPH), checkpoint)
    else:
        if len(args.load_from) == 1:
            load_from = '{}.coqa_best.pt'.format(args.load_from[0])
            print('{} load the best checkpoint from {}'.format(curtime(), load_from))
            checkpoint = torch.load(load_from, map_location='cpu')
        else:
            raise RuntimeError('must load model')

        # when translate load_dict update args except some
        print('update args from checkpoint')
        load_dict = checkpoint['args'].__dict__
        except_name = ['mode', 'load_from', 'test', 'writetrans', 'beam_size', 'batch_size']
        override(args, load_dict, tuple(except_name))

        print('{} Load test set'.format(curtime()))

        DOC = DocField(batch_first=True, include_lengths=True)
        ORDER = data.Field(batch_first=True, include_lengths=True, pad_token=0, use_vocab=False,
                           sequential=True)
        GRAPH = GraphField(batch_first=True)

        DOC.vocab = torch.load(args.vocab)
        print('vocab {} loaded'.format(args.vocab))
        args.__dict__.update({'doc_vocab': len(DOC.vocab)})

        args_str = json.dumps(args.__dict__, indent=4, sort_keys=True)
        print(args_str)

        test_data = DocDataset(path=args.test, text_field=DOC, order_field=ORDER, graph_field=GRAPH)
        test_real = DocIter(test_data, 1, device='cuda', batch_size_fn=None,
                            train=False, repeat=False, shuffle=False, sort=False)

        print('{} Load data done'.format(curtime()))
        start = time.time()
        decode(args, test_real, (DOC, ORDER), checkpoint)
        print('{} Decode done, time {} mins'.format(curtime(), (time.time() - start) / 60))