コード例 #1
0
def _test(model, test_dataloader, mse, optimizer, args, results,
          lang_availables):
    model.eval()
    losses = {lang: [] for lang in args.langs}
    losses['all'] = []
    preds_ls = {lang: [] for lang in args.langs}
    preds_ls['all'] = []
    trues_ls = {lang: [] for lang in args.langs}
    trues_ls['all'] = []
    langs_ls = []
    raw_srcs = []
    raw_refs = []
    raw_hyps = []
    sids = []
    vectors = []

    for batch_data in test_dataloader:
        with torch.no_grad():
            loss, preds, labels, hs = run_model(model,
                                                batch_data,
                                                args,
                                                mse,
                                                optimizer,
                                                getVector=True)
        langs_ls.extend(batch_data['lang'])
        losses['all'].append(loss)
        preds_ls['all'].extend(preds)
        trues_ls['all'].extend(labels)
        raw_srcs.extend(batch_data['raw_src'])
        raw_refs.extend(batch_data['raw_ref'])
        raw_hyps.extend(batch_data['raw_hyp'])
        sids.extend(batch_data['sid'])
        vectors.extend(hs)

    results['test']['loss'] = np.mean(losses['all'])
    results['test']['pred'] = preds_ls['all']
    results['test']['raw_src'] = langs_ls
    results['test']['raw_src'] = raw_srcs
    results['test']['raw_ref'] = raw_refs
    results['test']['raw_hyp'] = raw_hyps
    results['test']['sid'] = sids
    results['test']['vector'] = vectors

    if not args.darr:
        results['test']['pearson'] = utils.calc_pearson(
            preds_ls['all'], trues_ls['all'])
        results['test']['true'] = trues_ls['all']
        for lang, pred, true in zip(langs_ls, preds_ls['all'],
                                    trues_ls['all']):
            preds_ls[lang].append(pred)
            trues_ls[lang].append(true)
            if lang not in lang_availables:
                lang_availables.append(lang)
        for lang in lang_availables:
            results['test']['{}_pred'.format(lang)] = preds_ls[lang]
            results['test']['{}_true'.format(lang)] = trues_ls[lang]
            results['test']['{}_pearson'.format(lang)] = utils.calc_pearson(
                preds_ls[lang], trues_ls[lang])

    return model, test_dataloader, mse, optimizer, args, results, lang_availables
コード例 #2
0
def _train(model, train_dataloader, mse, optimizer, args, results):
    model.train()
    losses = []
    preds_ls = []
    trues_ls = []
    raw_srcs = []
    raw_refs = []
    raw_hyps = []
    sids = []
    for n_iter, batch_data in enumerate(train_dataloader):
        if args.debug:
            args.logger.debug('\rnumber of iteration = {}'.format(n_iter),
                              end='')
        optimizer.zero_grad()
        loss, preds, labels = run_model(model,
                                        batch_data,
                                        args,
                                        mse,
                                        optimizer,
                                        train=True)
        losses.append(loss)
        preds_ls.extend(preds)
        trues_ls.extend(labels)
        raw_srcs.extend(batch_data['raw_src'])
        raw_refs.extend(batch_data['raw_ref'])
        raw_hyps.extend(batch_data['raw_hyp'])
        sids.extend(batch_data['sid'])

    results['train'][args.optimizer]['batch={}'.format(
        args.batch_size)][args.n_trial - 1]['loss'].append(np.mean(losses))
    results['train'][args.optimizer]['batch={}'.format(
        args.batch_size)][args.n_trial - 1]['pearson'].append(
            utils.calc_pearson(preds_ls, trues_ls))
    results['train'][args.optimizer]['batch={}'.format(
        args.batch_size)][args.n_trial - 1]['pred'].append(preds_ls)
    results['train'][args.optimizer]['batch={}'.format(
        args.batch_size)][args.n_trial - 1]['true'].append(trues_ls)
    results['train'][args.optimizer]['batch={}'.format(
        args.batch_size)][args.n_trial - 1]['raw_src'].append(raw_srcs)
    results['train'][args.optimizer]['batch={}'.format(
        args.batch_size)][args.n_trial - 1]['raw_ref'].append(raw_refs)
    results['train'][args.optimizer]['batch={}'.format(
        args.batch_size)][args.n_trial - 1]['raw_hyp'].append(raw_hyps)
    results['train'][args.optimizer]['batch={}'.format(
        args.batch_size)][args.n_trial - 1]['sid'].append(sids)

    return model, train_dataloader, mse, optimizer, args, results
コード例 #3
0
ファイル: ___test.py プロジェクト: kosuketa/myscripts
def _valid(model, valid_dataloader, mse, optimizer, args, results,
           best_valid_loss, best_valid_pearson, n_epoch):
    model.eval()
    losses = []
    preds_ls = []
    trues_ls = []
    raw_srcs = []
    raw_refs = []
    raw_hyps = []
    for batch_data in valid_dataloader:
        with torch.no_grad():
            loss, preds, labels = run_model(model, batch_data, args, mse,
                                            optimizer)
        losses.append(loss)
        preds_ls.extend(preds)
        trues_ls.extend(labels)
        raw_srcs.extend(batch_data['raw_src'])
        raw_refs.extend(batch_data['raw_ref'])
        raw_hyps.extend(batch_data['raw_hyp'])

    results['valid'][args.optimizer]['batch={}'.format(
        args.batch_size)][args.n_trial - 1]['loss'].append(np.mean(losses))
    results['valid'][args.optimizer]['batch={}'.format(
        args.batch_size)][args.n_trial - 1]['pearson'].append(
            utils.calc_pearson(preds_ls, trues_ls))
    results['valid'][args.optimizer]['batch={}'.format(
        args.batch_size)][args.n_trial - 1]['pred'].append(preds_ls)
    results['valid'][args.optimizer]['batch={}'.format(
        args.batch_size)][args.n_trial - 1]['true'].append(trues_ls)
    results['valid'][args.optimizer]['batch={}'.format(
        args.batch_size)][args.n_trial - 1]['raw_src'].append(raw_srcs)
    results['valid'][args.optimizer]['batch={}'.format(
        args.batch_size)][args.n_trial - 1]['raw_ref'].append(raw_refs)
    results['valid'][args.optimizer]['batch={}'.format(
        args.batch_size)][args.n_trial - 1]['raw_hyp'].append(raw_hyps)

    # update lr
    if best_valid_loss > np.mean(losses):
        best_valid_loss = np.mean(losses)
    else:
        optimizer = update_lr(optimizer, args)

    #save model
    if best_valid_pearson['pearson'] < results['valid'][args.optimizer][
            'batch={}'.format(
                args.batch_size)][args.n_trial - 1]['pearson'][-1]:
        best_valid_pearson['pearson'] = results['valid'][args.optimizer][
            'batch={}'.format(args.batch_size)][args.n_trial -
                                                1]['pearson'][-1]
        best_valid_pearson['optimizer'] = args.optimizer
        best_valid_pearson['batch_size'] = args.batch_size
        best_valid_pearson['epoch'] = n_epoch
        args.logger.info('saving a model!')
        if args.amp:
            checkpoint = {
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'amp': apex.amp.state_dict()
            }
        else:
            checkpoint = {
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }
        checkpoint_file = os.path.join(args.tmp_path,
                                       'best_valid_checkpoint.pth')
        torch.save(checkpoint, checkpoint_file)
        if checkpoint_file not in args.tmp_files:
            args.tmp_files.append(checkpoint_file)
        args.logger.info('finished saving!')

    return model, valid_dataloader, mse, optimizer, args, results, best_valid_loss, best_valid_pearson