def get_dataloader_fn(*, data_dir: str, batch_size: int, precision: str): kwargs = { "dim": 3, "gpus": 1, "seed": 0, "num_workers": 8, "meta": None, "oversampling": 0, "benchmark": False, "patch_size": [128, 128, 128], } imgs, lbls = load_data(data_dir, "*_x.npy"), load_data(data_dir, "*_y.npy") kfold = KFold(n_splits=5, shuffle=True, random_state=12345) _, val_idx = list(kfold.split(imgs))[2] imgs, lbls = get_split(imgs, val_idx), get_split(lbls, val_idx) dataloader = fetch_dali_loader(imgs, lbls, batch_size, "bermuda", **kwargs) def _dataloader_fn(): for i, batch in enumerate(dataloader): fname = [f"{i}_{j}" for j in range(batch_size)] img = batch["image"].numpy() if "fp16" in precision: img = img.astype(np.half) img = {"INPUT__0": img} lbl = {"OUTPUT__0": batch["label"].squeeze(1).numpy().astype(int)} yield fname, img, lbl return _dataloader_fn
def setup(self, stage=None): imgs = load_data(self.data_path, "*_x.npy") lbls = load_data(self.data_path, "*_y.npy") self.test_imgs, self.kwargs["meta"] = get_test_fnames( self.args, self.data_path, self.kwargs["meta"]) if self.args.exec_mode != "predict" or self.args.benchmark: train_idx, val_idx = list(self.kfold.split(imgs))[self.args.fold] self.train_imgs = get_split(imgs, train_idx) self.train_lbls = get_split(lbls, train_idx) self.val_imgs = get_split(imgs, val_idx) self.val_lbls = get_split(lbls, val_idx) if is_main_process(): ntrain, nval = len(self.train_imgs), len(self.val_imgs) print(f"Number of examples: Train {ntrain} - Val {nval}") elif is_main_process(): print(f"Number of test examples: {len(self.test_imgs)}")
def eval_main(opt, fn_model): dev_eval_info_list = [ EvalInfo(it) for it in json.load( open('%s.%s.eval' % (fn_model, get_split(opt)), 'r')) ] # read source tokens with codecs.open(opt.src, 'r', encoding='utf-8') as f_in: for i, l in enumerate(f_in): dev_eval_info_list[i].src = l.strip().split(' ') # filter: src_len >= 2 dev_eval_info_list = list( filter(lambda x: len(x.src) >= 2, dev_eval_info_list)) # start evaluation r = eval_func( ('noise:enc_word:mul:exp:miu_norm', dev_eval_info_list, opt.metric)) print(opt.metric) print('ATT: %f' % r['att-noise:enc_word:mul:miu_norm']) print('BP: %f' % r['bp-noise:enc_word:mul:miu_norm']) print('')
def evaluate_main(opt): # read language model if 'lm' in opt.confidence.split(','): if Path(opt.lm_path).exists(): lm = kenlm.Model(opt.lm_path) else: print('==== LM does not exist: ' + opt.lm_path) for fn_model in tqdm(glob.glob(opt.model_path)): print(fn_model) opt_train = json.load( open(os.path.join(Path(fn_model).dirname(), 'opt.json'), 'r')) opt.model = fn_model translator = onmt.Translator(opt) # reset dropout rate translator.reset_dropout_rate(opt.dropout_rate) outF = codecs.open('%s.%s.sample' % (opt.model, get_split(opt)), 'w', encoding='utf-8') tgtF = codecs.open(opt.tgt, 'r', encoding='utf-8') srcBatch, tgtBatch = [], [] count = 0 eval_info_list = [] if opt.dump_beam: translator.initBeamAccum() for line in line_iter(codecs.open(opt.src, 'r', encoding='utf-8')): if line is not None: srcTokens = line.split() srcBatch += [srcTokens] tgtTokens = tgtF.readline().split() tgtBatch += [tgtTokens] if len(srcBatch) < opt.batch_size: continue else: # at the end of file, check last batch if len(srcBatch) == 0: break predBatch, predScore, goldScore, attn = translator.translate( srcBatch, tgtBatch) conf, conf_bp_src, conf_att_src, conf_tgt, conf_each_word = {}, {}, {}, {}, {} if opt.confidence != 'none': for confidence_method in set(opt.confidence.split(',')): if confidence_method == 'lm': conf[confidence_method] = list( map(lambda x: lm.score(' '.join(x[0])), srcBatch)) else: d, w = translator.confidence(srcBatch, list( map(lambda x: x[0], predBatch)), confidence_method, opt) for k, v in d.iteritems(): conf[k] = v if (opt.conf_bp != 'none') and (len(w) > 0): for k, v in w.iteritems(): conf_bp_src[k], conf_att_src[k], conf_tgt[k] = translator.confidence_bp( srcBatch, list(map(lambda x: x[0], predBatch)), v, opt_train) if opt.conf_each_word != 'none': for confidence_method in set(opt.conf_each_word.split(',')): d = translator.confidence_each_word(srcBatch, list( map(lambda x: x[0], predBatch)), confidence_method, opt) for k, v in d.iteritems(): conf_each_word[k] = v for b in range(len(predBatch)): count += 1 if opt.metric == 'django': # post-process: copy <unk> tokens from srcBatch def copy_unk(src, pred, attn_score): post = [] for i, pred_token in enumerate(pred): if pred_token == '<unk>': _, ids = attn_score[i].sort(0, descending=True) post.append(src[ids[0]]) else: post.append(pred_token) return post outF.write( " ".join(copy_unk(srcBatch[b], predBatch[b][0], attn[b][0])) + '\n') else: outF.write(" ".join(predBatch[b][0]) + '\n') outF.flush() info = {'id': count, 'pred_score': predScore[b][0], 'pred_len': len(predBatch[b][0]), 'gold_score': goldScore[b], 'gold_len': len( tgtBatch[b]), 'src_len': len(srcBatch[b]), 'src_unk': sum([0 if (translator.src_dict.lookup(w, None) is not None) else 1 for w in srcBatch[b]])} if opt.confidence != 'none': info['confidence'] = dict( [(k, v[b]) for k, v in conf.iteritems()]) if opt.conf_bp != 'none': info['conf_bp_src'] = dict([(k, v[b]) for k, v in conf_bp_src.iteritems()]) info['conf_att_src'] = dict([(k, v[b]) for k, v in conf_att_src.iteritems()]) info['conf_tgt'] = dict([(k, v[b]) for k, v in conf_tgt.iteritems()]) if opt.conf_each_word != 'none': info['conf_each_word'] = dict( [(k, v[b]) for k, v in conf_each_word.iteritems()]) eval_info_list.append(info) if opt.verbose: print('') # show attention score print(" ".join(predBatch[b][0])) for i, w in enumerate(predBatch[b][0]): print(w) _, ids = attn[b][0][i].sort(0, descending=True) for j in ids[:5].tolist(): w_src = translator.src_dict.getLabel(translator.src_dict.lookup( srcBatch[b][j], default=translator.src_dict.lookup(onmt.Constants.UNK_WORD))) print("\t%s\t%d\t%3f" % (w_src, j, attn[b][0][i][j])) srcSent = ' '.join(srcBatch[b]) if translator.tgt_dict.lower: srcSent = srcSent.lower() print('SENT %d: %s' % (count, srcSent)) print('PRED %d: %s' % (count, " ".join(predBatch[b][0]))) print("PRED SCORE: %.4f" % predScore[b][0]) tgtSent = ' '.join(tgtBatch[b]) if translator.tgt_dict.lower: tgtSent = tgtSent.lower() print('GOLD %d: %s ' % (count, tgtSent)) print("GOLD SCORE: %.4f" % goldScore[b]) if opt.n_best > 1: print('\nBEST HYP:') for n in range(opt.n_best): print("[%.4f] %s" % (predScore[b][n], " ".join(predBatch[b][n]))) print('') srcBatch, tgtBatch = [], [] outF.close() tgtF.close() if opt.dump_beam: json.dump(translator.beam_accum, open('%s.%s.beam' % (opt.model, get_split(opt)), 'w')) # read golden results and predictions with codecs.open(opt.tgt, 'r', encoding='utf-8') as f_in: gold_tgt_list = [l.strip() for l in f_in] with codecs.open('%s.%s.sample' % (opt.model, get_split(opt)), 'r', encoding='utf-8') as f_in: pred_tgt_list = [l.strip() for l in f_in] assert len(gold_tgt_list) == len(pred_tgt_list), '%d\t%d' % ( len(gold_tgt_list), len(pred_tgt_list)) # tree-level accuracy if opt.metric == 'tree_acc': for i, gold_tgt, pred_tgt in izip(itertools.count(), gold_tgt_list, pred_tgt_list): eval_info_list[i]['acc'] = 1 if is_tree_eq( gold_tgt, pred_tgt, translator.tgt_dict) else 0 m = {'acc': np.mean(list(map(lambda x: x['acc'], eval_info_list)))} pprint(m) elif opt.metric == 'django': for i, gold_tgt, pred_tgt in izip(itertools.count(), gold_tgt_list, pred_tgt_list): eval_info_list[i]['acc'] = 1 if is_py_eq( gold_tgt, pred_tgt) else 0 m = {'acc': np.mean(list(map(lambda x: x['acc'], eval_info_list)))} pprint(m) elif opt.metric == 'ifttt': if get_split(opt) == 'test': # read test category with codecs.open(opt.ifttt_eval_category, 'r', encoding='utf-8') as f_in: eval_category_list = [ int(l.strip().split('\t')[1]) for l in f_in] assert len(gold_tgt_list) == len(eval_category_list), '%d\t%d' % ( len(gold_tgt_list), len(eval_category_list)) for i, gold_tgt, pred_tgt in izip(itertools.count(), gold_tgt_list, pred_tgt_list): gold_decomp = decompose_ifttt_tgt(gold_tgt) pred_decomp = decompose_ifttt_tgt(pred_tgt) eval_info_list[i]['channel_acc'] = 1 if all(map(lambda k: gold_decomp[k] == pred_decomp[ k], ('if_channel', 'then_channel'))) else 0 eval_info_list[i]['func_acc'] = 1 if all(map(lambda k: gold_decomp[k] == pred_decomp[ k], ('if_channel', 'then_channel', 'if_func', 'then_func'))) else 0 eval_info_list[i]['f1'] = ifttt_f1_metric( gold_decomp, pred_decomp) if get_split(opt) == 'test': eval_info_list[i]['eval_category'] = eval_category_list[i] m = {} for eval_type in ('omit_non_eng', '+unintel', 'only>=3'): if get_split(opt) == 'test': valid_eval_info_list = [ eval_info for eval_info in eval_info_list if is_valid_by_eval_type_ifttt(eval_info, eval_type)] else: valid_eval_info_list = eval_info_list for metric_type in ('channel_acc', 'func_acc', 'f1'): m['%s:%s' % (eval_type, metric_type)] = np.mean( list(map(lambda x: x[metric_type], valid_eval_info_list))) # print the table of results table = [] for eval_type in ('omit_non_eng', '+unintel', 'only>=3'): row = [eval_type] row += [m['%s:%s' % (eval_type, metric_type)] for metric_type in ('channel_acc', 'func_acc', 'f1')] table.append(row) # print(tabulate(table, headers=[ # '', 'channel_acc', 'func_acc', 'f1'])) elif opt.metric == 'word_f1': for i, gold_tgt, pred_tgt in izip(itertools.count(), gold_tgt_list, pred_tgt_list): tk_gold_tgt, tk_pred_tgt = gold_tgt.split( ' '), pred_tgt.split(' ') min_len = min(len(tk_gold_tgt), len(tk_pred_tgt)) c = sum([1 for it in xrange(min_len) if tk_gold_tgt[it] == tk_pred_tgt[it]]) if c == 0: eval_info_list[i]['f1'] = 0 else: p = float(c) / float(len(tk_pred_tgt)) r = float(c) / float(len(tk_gold_tgt)) eval_info_list[i]['f1'] = 2 * p * r / (p + r) m = {'f1': np.mean(list(map(lambda x: x['f1'], eval_info_list)))} pprint(m) else: raise NotImplementedError with codecs.open('%s.%s.eval' % (opt.model, get_split(opt)), 'w', encoding='utf-8') as evalF: json.dump(eval_info_list, evalF, indent=2)