コード例 #1
0
def get_dataloader_fn(*, data_dir: str, batch_size: int, precision: str):
    kwargs = {
        "dim": 3,
        "gpus": 1,
        "seed": 0,
        "num_workers": 8,
        "meta": None,
        "oversampling": 0,
        "benchmark": False,
        "patch_size": [128, 128, 128],
    }

    imgs, lbls = load_data(data_dir, "*_x.npy"), load_data(data_dir, "*_y.npy")
    kfold = KFold(n_splits=5, shuffle=True, random_state=12345)
    _, val_idx = list(kfold.split(imgs))[2]
    imgs, lbls = get_split(imgs, val_idx), get_split(lbls, val_idx)
    dataloader = fetch_dali_loader(imgs, lbls, batch_size, "bermuda", **kwargs)

    def _dataloader_fn():
        for i, batch in enumerate(dataloader):
            fname = [f"{i}_{j}" for j in range(batch_size)]
            img = batch["image"].numpy()
            if "fp16" in precision:
                img = img.astype(np.half)
            img = {"INPUT__0": img}
            lbl = {"OUTPUT__0": batch["label"].squeeze(1).numpy().astype(int)}
            yield fname, img, lbl

    return _dataloader_fn
コード例 #2
0
    def setup(self, stage=None):
        imgs = load_data(self.data_path, "*_x.npy")
        lbls = load_data(self.data_path, "*_y.npy")

        self.test_imgs, self.kwargs["meta"] = get_test_fnames(
            self.args, self.data_path, self.kwargs["meta"])
        if self.args.exec_mode != "predict" or self.args.benchmark:
            train_idx, val_idx = list(self.kfold.split(imgs))[self.args.fold]
            self.train_imgs = get_split(imgs, train_idx)
            self.train_lbls = get_split(lbls, train_idx)
            self.val_imgs = get_split(imgs, val_idx)
            self.val_lbls = get_split(lbls, val_idx)
            if is_main_process():
                ntrain, nval = len(self.train_imgs), len(self.val_imgs)
                print(f"Number of examples: Train {ntrain} - Val {nval}")
        elif is_main_process():
            print(f"Number of test examples: {len(self.test_imgs)}")
コード例 #3
0
def eval_main(opt, fn_model):
    dev_eval_info_list = [
        EvalInfo(it) for it in json.load(
            open('%s.%s.eval' % (fn_model, get_split(opt)), 'r'))
    ]
    # read source tokens
    with codecs.open(opt.src, 'r', encoding='utf-8') as f_in:
        for i, l in enumerate(f_in):
            dev_eval_info_list[i].src = l.strip().split(' ')
    # filter: src_len >= 2
    dev_eval_info_list = list(
        filter(lambda x: len(x.src) >= 2, dev_eval_info_list))

    # start evaluation
    r = eval_func(
        ('noise:enc_word:mul:exp:miu_norm', dev_eval_info_list, opt.metric))
    print(opt.metric)
    print('ATT: %f' % r['att-noise:enc_word:mul:miu_norm'])
    print('BP: %f' % r['bp-noise:enc_word:mul:miu_norm'])
    print('')
コード例 #4
0
ファイル: evaluate.py プロジェクト: seanhtchoi/confidence
def evaluate_main(opt):
    # read language model
    if 'lm' in opt.confidence.split(','):
        if Path(opt.lm_path).exists():
            lm = kenlm.Model(opt.lm_path)
        else:
            print('==== LM does not exist: ' + opt.lm_path)

    for fn_model in tqdm(glob.glob(opt.model_path)):
        print(fn_model)
        opt_train = json.load(
            open(os.path.join(Path(fn_model).dirname(), 'opt.json'), 'r'))
        opt.model = fn_model
        translator = onmt.Translator(opt)
        # reset dropout rate
        translator.reset_dropout_rate(opt.dropout_rate)
        outF = codecs.open('%s.%s.sample' %
                           (opt.model, get_split(opt)), 'w', encoding='utf-8')
        tgtF = codecs.open(opt.tgt, 'r', encoding='utf-8')
        srcBatch, tgtBatch = [], []
        count = 0
        eval_info_list = []

        if opt.dump_beam:
            translator.initBeamAccum()

        for line in line_iter(codecs.open(opt.src, 'r', encoding='utf-8')):
            if line is not None:
                srcTokens = line.split()
                srcBatch += [srcTokens]
                tgtTokens = tgtF.readline().split()
                tgtBatch += [tgtTokens]

                if len(srcBatch) < opt.batch_size:
                    continue
            else:
                # at the end of file, check last batch
                if len(srcBatch) == 0:
                    break

            predBatch, predScore, goldScore, attn = translator.translate(
                srcBatch, tgtBatch)
            conf, conf_bp_src, conf_att_src, conf_tgt, conf_each_word = {}, {}, {}, {}, {}
            if opt.confidence != 'none':
                for confidence_method in set(opt.confidence.split(',')):
                    if confidence_method == 'lm':
                        conf[confidence_method] = list(
                            map(lambda x: lm.score(' '.join(x[0])), srcBatch))
                    else:
                        d, w = translator.confidence(srcBatch, list(
                            map(lambda x: x[0], predBatch)), confidence_method, opt)
                        for k, v in d.iteritems():
                            conf[k] = v
                        if (opt.conf_bp != 'none') and (len(w) > 0):
                            for k, v in w.iteritems():
                                conf_bp_src[k], conf_att_src[k], conf_tgt[k] = translator.confidence_bp(
                                    srcBatch, list(map(lambda x: x[0], predBatch)), v, opt_train)
            if opt.conf_each_word != 'none':
                for confidence_method in set(opt.conf_each_word.split(',')):
                    d = translator.confidence_each_word(srcBatch, list(
                        map(lambda x: x[0], predBatch)), confidence_method, opt)
                    for k, v in d.iteritems():
                        conf_each_word[k] = v

            for b in range(len(predBatch)):
                count += 1
                if opt.metric == 'django':
                    # post-process: copy <unk> tokens from srcBatch
                    def copy_unk(src, pred, attn_score):
                        post = []
                        for i, pred_token in enumerate(pred):
                            if pred_token == '<unk>':
                                _, ids = attn_score[i].sort(0, descending=True)
                                post.append(src[ids[0]])
                            else:
                                post.append(pred_token)
                        return post
                    outF.write(
                        " ".join(copy_unk(srcBatch[b], predBatch[b][0], attn[b][0])) + '\n')
                else:
                    outF.write(" ".join(predBatch[b][0]) + '\n')
                outF.flush()

                info = {'id': count, 'pred_score': predScore[b][0], 'pred_len': len(predBatch[b][0]), 'gold_score': goldScore[b], 'gold_len': len(
                    tgtBatch[b]), 'src_len': len(srcBatch[b]), 'src_unk': sum([0 if (translator.src_dict.lookup(w, None) is not None) else 1 for w in srcBatch[b]])}
                if opt.confidence != 'none':
                    info['confidence'] = dict(
                        [(k, v[b]) for k, v in conf.iteritems()])
                if opt.conf_bp != 'none':
                    info['conf_bp_src'] = dict([(k, v[b])
                                                for k, v in conf_bp_src.iteritems()])
                    info['conf_att_src'] = dict([(k, v[b])
                                                 for k, v in conf_att_src.iteritems()])
                    info['conf_tgt'] = dict([(k, v[b])
                                             for k, v in conf_tgt.iteritems()])
                if opt.conf_each_word != 'none':
                    info['conf_each_word'] = dict(
                        [(k, v[b]) for k, v in conf_each_word.iteritems()])
                eval_info_list.append(info)

                if opt.verbose:
                    print('')
                    # show attention score
                    print(" ".join(predBatch[b][0]))
                    for i, w in enumerate(predBatch[b][0]):
                        print(w)
                        _, ids = attn[b][0][i].sort(0, descending=True)
                        for j in ids[:5].tolist():
                            w_src = translator.src_dict.getLabel(translator.src_dict.lookup(
                                srcBatch[b][j], default=translator.src_dict.lookup(onmt.Constants.UNK_WORD)))
                            print("\t%s\t%d\t%3f" %
                                  (w_src, j, attn[b][0][i][j]))

                    srcSent = ' '.join(srcBatch[b])
                    if translator.tgt_dict.lower:
                        srcSent = srcSent.lower()
                    print('SENT %d: %s' % (count, srcSent))
                    print('PRED %d: %s' % (count, " ".join(predBatch[b][0])))
                    print("PRED SCORE: %.4f" % predScore[b][0])

                    tgtSent = ' '.join(tgtBatch[b])
                    if translator.tgt_dict.lower:
                        tgtSent = tgtSent.lower()
                    print('GOLD %d: %s ' % (count, tgtSent))
                    print("GOLD SCORE: %.4f" % goldScore[b])

                    if opt.n_best > 1:
                        print('\nBEST HYP:')
                        for n in range(opt.n_best):
                            print("[%.4f] %s" % (predScore[b][n],
                                                 " ".join(predBatch[b][n])))
                    print('')

            srcBatch, tgtBatch = [], []
        outF.close()
        tgtF.close()

        if opt.dump_beam:
            json.dump(translator.beam_accum, open('%s.%s.beam' %
                                                  (opt.model, get_split(opt)), 'w'))

        # read golden results and predictions
        with codecs.open(opt.tgt, 'r', encoding='utf-8') as f_in:
            gold_tgt_list = [l.strip() for l in f_in]
        with codecs.open('%s.%s.sample' % (opt.model, get_split(opt)), 'r', encoding='utf-8') as f_in:
            pred_tgt_list = [l.strip() for l in f_in]
        assert len(gold_tgt_list) == len(pred_tgt_list), '%d\t%d' % (
            len(gold_tgt_list), len(pred_tgt_list))

        # tree-level accuracy
        if opt.metric == 'tree_acc':
            for i, gold_tgt, pred_tgt in izip(itertools.count(), gold_tgt_list, pred_tgt_list):
                eval_info_list[i]['acc'] = 1 if is_tree_eq(
                    gold_tgt, pred_tgt, translator.tgt_dict) else 0
            m = {'acc': np.mean(list(map(lambda x: x['acc'], eval_info_list)))}
            pprint(m)
        elif opt.metric == 'django':
            for i, gold_tgt, pred_tgt in izip(itertools.count(), gold_tgt_list, pred_tgt_list):
                eval_info_list[i]['acc'] = 1 if is_py_eq(
                    gold_tgt, pred_tgt) else 0
            m = {'acc': np.mean(list(map(lambda x: x['acc'], eval_info_list)))}
            pprint(m)
        elif opt.metric == 'ifttt':
            if get_split(opt) == 'test':
                # read test category
                with codecs.open(opt.ifttt_eval_category, 'r', encoding='utf-8') as f_in:
                    eval_category_list = [
                        int(l.strip().split('\t')[1]) for l in f_in]
                assert len(gold_tgt_list) == len(eval_category_list), '%d\t%d' % (
                    len(gold_tgt_list), len(eval_category_list))
            for i, gold_tgt, pred_tgt in izip(itertools.count(), gold_tgt_list, pred_tgt_list):
                gold_decomp = decompose_ifttt_tgt(gold_tgt)
                pred_decomp = decompose_ifttt_tgt(pred_tgt)

                eval_info_list[i]['channel_acc'] = 1 if all(map(lambda k: gold_decomp[k] == pred_decomp[
                    k], ('if_channel', 'then_channel'))) else 0
                eval_info_list[i]['func_acc'] = 1 if all(map(lambda k: gold_decomp[k] == pred_decomp[
                    k], ('if_channel', 'then_channel', 'if_func', 'then_func'))) else 0
                eval_info_list[i]['f1'] = ifttt_f1_metric(
                    gold_decomp, pred_decomp)
                if get_split(opt) == 'test':
                    eval_info_list[i]['eval_category'] = eval_category_list[i]

            m = {}
            for eval_type in ('omit_non_eng', '+unintel', 'only>=3'):
                if get_split(opt) == 'test':
                    valid_eval_info_list = [
                        eval_info for eval_info in eval_info_list if is_valid_by_eval_type_ifttt(eval_info, eval_type)]
                else:
                    valid_eval_info_list = eval_info_list
                for metric_type in ('channel_acc', 'func_acc', 'f1'):
                    m['%s:%s' % (eval_type, metric_type)] = np.mean(
                        list(map(lambda x: x[metric_type], valid_eval_info_list)))
            # print the table of results
            table = []
            for eval_type in ('omit_non_eng', '+unintel', 'only>=3'):
                row = [eval_type]
                row += [m['%s:%s' % (eval_type, metric_type)]
                        for metric_type in ('channel_acc', 'func_acc', 'f1')]
                table.append(row)
            # print(tabulate(table, headers=[
            #       '', 'channel_acc', 'func_acc', 'f1']))
        elif opt.metric == 'word_f1':
            for i, gold_tgt, pred_tgt in izip(itertools.count(), gold_tgt_list, pred_tgt_list):
                tk_gold_tgt, tk_pred_tgt = gold_tgt.split(
                    ' '), pred_tgt.split(' ')
                min_len = min(len(tk_gold_tgt), len(tk_pred_tgt))
                c = sum([1 for it in xrange(min_len)
                         if tk_gold_tgt[it] == tk_pred_tgt[it]])
                if c == 0:
                    eval_info_list[i]['f1'] = 0
                else:
                    p = float(c) / float(len(tk_pred_tgt))
                    r = float(c) / float(len(tk_gold_tgt))
                    eval_info_list[i]['f1'] = 2 * p * r / (p + r)
            m = {'f1': np.mean(list(map(lambda x: x['f1'], eval_info_list)))}
            pprint(m)
        else:
            raise NotImplementedError

        with codecs.open('%s.%s.eval' % (opt.model, get_split(opt)), 'w', encoding='utf-8') as evalF:
            json.dump(eval_info_list, evalF, indent=2)