def compute_overlap(pd_arg, t_arg): start, end = pd_arg t_start, t_end = t_arg tp = max(0, min(end, t_end) - max(start, t_start)) prec = tp / (end - start) recall = tp / (t_end - t_start) return evaluate.f1(recall, prec)
def get_CV_results(): folder = './type_f1_file/' model_string = 'attention_' flag_string = '0_0_1_1_' folder_log = './log_files/' with open('data/test_type_count.pkl', 'r') as f: b = pickle.load(f) final_test_dict = {} final_auc_dict = {} total_F1_num = np.zeros(3) example_f1s = [] for i in range(20, 30): cv_id = i % 10 cv = str(i) test_dict, test_F1_num, auc_dict = get_one_CV_results_test_F1s( folder, folder_log, model_string, flag_string, cv) final_test_dict.update(test_dict) final_auc_dict.update(auc_dict) total_F1_num += test_F1_num log_file_path = folder_log + model_string + flag_string + cv + '.txt' example_f1s.append(parse_results.tail(log_file_path, 2)[0][0]) count_sum = 0 for k in final_test_dict: count_sum += b[int(k)][1] type_F1_weighted = 0.0 for k in final_test_dict: type_F1_weighted += (b[int(k)][1] / count_sum) * final_test_dict[k] print 'OpenNET type weighted average Macro {}'.format(type_F1_weighted) precision = total_F1_num[0] / total_F1_num[1] recall = total_F1_num[0] / total_F1_num[2] print 'OpenNET type micro {}'.format(evaluate.f1(precision, recall)) auc_array = [] for k in final_auc_dict: auc_array.append((b[int(k)][1] / count_sum) * final_auc_dict[k]) print 'average auc = {}'.format(np.sum(auc_array)) print '---popularity---' print 'id---name---freq---auc---' dicts = joblib.load( '/home/zys133/knowledge_base/NFGEC/data/Wiki/dicts_figer.pkl') for k in final_auc_dict: print '{}\t{}\t{}\t{:.4f}'.format(k, dicts['id2label'][k], b[k][1], final_auc_dict[k])
def eval_f1(self): ''' Get test set F1 score: 2 . (precision * recall) / (precision + recall) where an F1 score is calculated for each reply and the mean is returned Note: this can take some time Returns ------- float mean F1 score ''' get_reply = (lambda persona, msg: generate_reply_transformer( persona, msg, self.tokenizer, self.transformer, self.out_seq_length )[0]) return evaluate.f1(get_reply)
def eval_f1(self): ''' Get test set F1 score: 2 . (precision * recall) / (precision + recall) where an F1 score is calculated for each reply Note: this can take some time Returns ------- float mean F1 score ''' get_reply = (lambda persona, msg: generate_reply_seq2seq( self.encoder, self.decoder, self.tokenizer, (pre.START_SEQ_TOKEN + ' ' + persona + ' ' + pre.SEP_SEQ_TOKEN + ' ' + msg + ' ' + pre.END_SEQ_TOKEN), self.persona_length + self. msg_length, self.reply_length)[0]) return evaluate.f1(get_reply)
pool = Pool(args.ap_workers) jobs = [] for iou_idx, min_overlap in enumerate(iou_range): for cls in range(num_class): jobs.append( pool.apply_async(eval_ap, args=( [min_overlap], iou_idx, cls, gt_by_class[cls], prediction_by_class[cls], ), callback=callback)) f1 = evaluate.f1(prediction, min_overlap, gt) f1_values[iou_idx] = f1 pool.close() pool.join() print("Evaluation done.\n\n") map_iou = ap_values.mean(axis=0) mar = ar_values.mean(axis=0) display_title = "Detection Performance on {}".format(args.dataset) display_data = [["IoU thresh"], ["mean AP"], ["mean AR"], ["F1 criterion"]] for i in range(len(iou_range)): display_data[0].append("{:.02f}".format(iou_range[i])) display_data[1].append("{:.04f}".format(map_iou[i])) display_data[2].append("{:.04f}".format(mar[i]))
# calculate ious = np.arange(0.1, 1.0, 0.1) aps = np.zeros((len(ious), label_count)) ars = np.zeros((len(ious), label_count)) f1s = np.zeros((len(ious), )) miou = evaluate.miou_per_v(all_prediction, all_groundtruth) for i, iou in enumerate(ious): for cls in range(label_count): ap, ar = evaluate.ap(prediction_by_cls[cls], iou, groundtruth_by_cls[cls]) aps[i][cls] = ap ars[i][cls] = ar f1 = evaluate.f1(all_prediction, iou, all_groundtruth) f1s[i] = f1 map_ = np.mean(aps, axis=1) mar = np.mean(ars, axis=1) print("Criteria solved.") # print title = "C3D Detection Performance" datas = [["IoU threshold"], ["mean AP"], ["mean AR"], ["F1 criterion"]] for i, iou in enumerate(ious): datas[0].append("{:.2f}".format(iou)) datas[1].append("{:.4f}".format(map_[i])) datas[2].append("{:.4f}".format(mar[i])) datas[3].append("{:.4f}".format(f1s[i]))
for iou_idx, min_overlap in enumerate(iou_range): #for iou_idx, min_overlap in enumerate([0.6]): for cls in range(num_class): #for cls in [304]: #jobs.append(pool.apply_async(eval_ap, args=([min_overlap], iou_idx, cls, gt_by_cls[cls], plain_detections[cls],),callback=callback)) jobs.append( pool.apply_async(eval_ap, args=( [min_overlap], iou_idx, cls, pku_gt_by_class[cls], pku_prediction_by_class[cls], ), callback=callback)) f1 = evaluate.f1(pku_prediction, min_overlap, pku_gt) f1_values[iou_idx] = f1 pool.close() pool.join() print("Evaluation done.\n\n") """for zdy_i,zdy_iou in enumerate(iou_range): with open("accuracy_per_cls/cls_pku{:f}.txt".format(zdy_iou),"w") as zdy_f: for zdy_cls in range(num_class): zdy_f.write("{:d}\t{:.04f}\n".format(zdy_cls,ap_values[zdy_cls][zdy_i]))""" #map_iou = ap_values[1:,:].mean(axis=0) #mar = ar_values[1:,:].mean(axis=0) map_iou = ap_values.mean(axis=0) mar = ar_values.mean(axis=0) display_title = "Detection Performance on {}".format(args.dataset)
def train_network(): init_epoch = 0 best_f1 = 0 total_steps = 0 train_dir = ct.TRAIN_TXT val_dir = ct.VAL_TXT device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.backends.cudnn.benchmark = True train_data = OSCD_TRAIN(train_dir) train_dataloader = DataLoader(train_data, batch_size=ct.BATCH_SIZE, shuffle=True) val_data = OSCD_TEST(val_dir) val_dataloader = DataLoader(val_data, batch_size=1, shuffle=False) netg = NetG(ct.ISIZE, ct.NC * 2, ct.NZ, ct.NDF, ct.EXTRALAYERS).to(device=device) netd = NetD(ct.ISIZE, ct.GT_C, 1, ct.NGF, ct.EXTRALAYERS).to(device=device) netg.apply(weights_init) netd.apply(weights_init) if ct.RESUME: assert os.path.exists(os.path.join(ct.WEIGHTS_SAVE_DIR, 'current_netG.pth')) \ and os.path.exists(os.path.join(ct.WEIGHTS_SAVE_DIR, 'current_netG.pth')), \ 'There is not found any saved weights' print("\nLoading pre-trained networks.") init_epoch = torch.load( os.path.join(ct.WEIGHTS_SAVE_DIR, 'current_netG.pth'))['epoch'] netg.load_state_dict( torch.load(os.path.join(ct.WEIGHTS_SAVE_DIR, 'current_netG.pth'))['model_state_dict']) netd.load_state_dict( torch.load(os.path.join(ct.WEIGHTS_SAVE_DIR, 'current_netD.pth'))['model_state_dict']) with open(os.path.join(ct.OUTPUTS_DIR, 'f1_score.txt')) as f: lines = f.readlines() best_f1 = float(lines[-2].strip().split(':')[-1]) print("\tDone.\n") l_adv = l2_loss l_con = nn.L1Loss() l_enc = l2_loss l_bce = nn.BCELoss() l_cos = cos_loss dice = DiceLoss() optimizer_d = optim.Adam(netd.parameters(), lr=ct.LR, betas=(0.5, 0.999)) optimizer_g = optim.Adam(netg.parameters(), lr=ct.LR, betas=(0.5, 0.999)) start_time = time.time() for epoch in range(init_epoch + 1, ct.EPOCH): loss_g = [] loss_d = [] netg.train() netd.train() epoch_iter = 0 for i, data in enumerate(train_dataloader): INPUT_SIZE = [ct.ISIZE, ct.ISIZE] x1, x2, gt = data x1 = x1.to(device, dtype=torch.float) x2 = x2.to(device, dtype=torch.float) gt = gt.to(device, dtype=torch.float) gt = gt[:, 0, :, :].unsqueeze(1) x = torch.cat((x1, x2), 1) epoch_iter += ct.BATCH_SIZE total_steps += ct.BATCH_SIZE real_label = torch.ones(size=(x1.shape[0], ), dtype=torch.float32, device=device) fake_label = torch.zeros(size=(x1.shape[0], ), dtype=torch.float32, device=device) #forward fake = netg(x) pred_real = netd(gt) pred_fake = netd(fake).detach() err_d_fake = l_bce(pred_fake, fake_label) err_g = l_con(fake, gt) err_g_total = ct.G_WEIGHT * err_g + ct.D_WEIGHT * err_d_fake pred_fake_ = netd(fake.detach()) err_d_real = l_bce(pred_real, real_label) err_d_fake_ = l_bce(pred_fake_, fake_label) err_d_total = (err_d_real + err_d_fake_) * 0.5 #backward optimizer_g.zero_grad() err_g_total.backward(retain_graph=True) optimizer_g.step() optimizer_d.zero_grad() err_d_total.backward() optimizer_d.step() errors = utils.get_errors(err_d_total, err_g_total) loss_g.append(err_g_total.item()) loss_d.append(err_d_total.item()) counter_ratio = float(epoch_iter) / len(train_dataloader.dataset) if (i % ct.DISPOLAY_STEP == 0 and i > 0): print( 'epoch:', epoch, 'iteration:', i, ' G|D loss is {}|{}'.format(np.mean(loss_g[-51:]), np.mean(loss_d[-51:]))) if ct.DISPLAY: utils.plot_current_errors(epoch, counter_ratio, errors, vis) utils.display_current_images(gt.data, fake.data, vis) utils.save_current_images(epoch, gt.data, fake.data, ct.IM_SAVE_DIR, 'training_output_images') with open(os.path.join(ct.OUTPUTS_DIR, 'train_loss.txt'), 'a') as f: f.write( 'after %s epoch, loss is %g,loss1 is %g,loss2 is %g,loss3 is %g' % (epoch, np.mean(loss_g), np.mean(loss_d), np.mean(loss_g), np.mean(loss_d))) f.write('\n') if not os.path.exists(ct.WEIGHTS_SAVE_DIR): os.makedirs(ct.WEIGHTS_SAVE_DIR) utils.save_weights(epoch, netg, optimizer_g, ct.WEIGHTS_SAVE_DIR, 'netG') utils.save_weights(epoch, netd, optimizer_d, ct.WEIGHTS_SAVE_DIR, 'netD') duration = time.time() - start_time print('training duration is %g' % duration) #val phase print('Validating.................') pretrained_dict = torch.load( os.path.join(ct.WEIGHTS_SAVE_DIR, 'current_netG.pth'))['model_state_dict'] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') net = NetG(ct.ISIZE, ct.NC * 2, ct.NZ, ct.NDF, ct.EXTRALAYERS).to(device=device) net.load_state_dict(pretrained_dict, False) with net.eval() and torch.no_grad(): TP = 0 FN = 0 FP = 0 TN = 0 for k, data in enumerate(val_dataloader): x1, x2, label = data x1 = x1.to(device, dtype=torch.float) x2 = x2.to(device, dtype=torch.float) label = label.to(device, dtype=torch.float) label = label[:, 0, :, :].unsqueeze(1) x = torch.cat((x1, x2), 1) time_i = time.time() v_fake = net(x) tp, fp, tn, fn = eva.f1(v_fake, label) TP += tp FN += fn TN += tn FP += fp precision = TP / (TP + FP + 1e-8) oa = (TP + TN) / (TP + FN + TN + FP + 1e-8) recall = TP / (TP + FN + 1e-8) f1 = 2 * precision * recall / (precision + recall + 1e-8) if not os.path.exists(ct.BEST_WEIGHT_SAVE_DIR): os.makedirs(ct.BEST_WEIGHT_SAVE_DIR) if f1 > best_f1: best_f1 = f1 shutil.copy( os.path.join(ct.WEIGHTS_SAVE_DIR, 'current_netG.pth'), os.path.join(ct.BEST_WEIGHT_SAVE_DIR, 'netG.pth')) print('current F1: {}'.format(f1)) print('best f1: {}'.format(best_f1)) with open(os.path.join(ct.OUTPUTS_DIR, 'f1_score.txt'), 'a') as f: f.write('current epoch:{},current f1:{},best f1:{}'.format( epoch, f1, best_f1)) f.write('\n')
def test_network(): threshold = ct.THRESHOLD test_dir = ct.TEST_TXT path = os.path.join(ct.BEST_WEIGHT_SAVE_DIR, 'netG.pth') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') pretrained_dict = torch.load( path, map_location=torch.device(device))['model_state_dict'] test_data = OSCD_TEST(test_dir) test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False) net = NetG(ct.ISIZE, ct.NC * 2, ct.NZ, ct.NDF, ct.EXTRALAYERS).to(device) # net = nn.DataParallel(net) net.load_state_dict(pretrained_dict, False) torch.no_grad() net.eval() i = 0 TP = 0 FN = 0 FP = 0 TN = 0 for i, data in enumerate(test_dataloader): INPUT_SIZE = [ct.ISIZE, ct.ISIZE] x1, x2, gt = data x1 = x1.to(device, dtype=torch.float) x2 = x2.to(device, dtype=torch.float) gt = gt.to(device, dtype=torch.float) gt = gt[:, 0, :, :].unsqueeze(1) x = torch.cat((x1, x2), 1) fake = net(x) save_path = os.path.join(ct.IM_SAVE_DIR, 'test_output_images') if not os.path.isdir(save_path): os.makedirs(save_path) if ct.SAVE_TEST_IAMGES: vutils.save_image(x1.data, os.path.join(save_path, '%d_x1.png' % i), normalize=True) vutils.save_image(x2.data, os.path.join(save_path, '%d_x2.png' % i), normalize=True) vutils.save_image(fake.data, os.path.join(save_path, '%d_gt_fake.png' % i), normalize=True) vutils.save_image(gt, os.path.join(save_path, '%d_gt.png' % i), normalize=True) tp, fp, tn, fn = eva.f1(fake, gt) TP += tp FN += fn TN += tn FP += fp i += 1 print('testing {}th images'.format(i)) iou = TP / (FN + TP + FP + 1e-8) precision = TP / (TP + FP + 1e-8) oa = (TP + TN) / (TP + FN + TN + FP + 1e-8) recall = TP / (TP + FN + 1e-8) f1 = 2 * precision * recall / (precision + recall + 1e-8) P = ((TP + FP) * (TP + FN) + (FN + TN) * (FP + TN)) / ((TP + TN + FP + FN)**2 + 1e-8) Kappa = (oa - P) / (1 - P + 1e-8) results = { 'iou': iou, 'precision': precision, 'oa': oa, 'recall': recall, 'f1': f1, 'kappa': Kappa } with open(os.path.join(ct.OUTPUTS_DIR, 'test_score.txt'), 'a') as f: f.write('-----test results on the best model {}-----'.format( time.strftime('%Y-%m-%d %H:%M:%S'))) f.write('\n') for key, value in results.items(): print(key, value) f.write('{}: {}'.format(key, value)) f.write('\n')
def test(fhelper, train_args, test_args, corpus_file, linkings, train_path, test_path, model_path, crf, log_path, keep_boundary=False, use_baseline=False, use_feature=None, reverse_select=False, rstats=False, threshold=[0.7]): train_args.init_truth(corpus_file) data_set, test_set = extract_features(corpus_file, linkings, train_args, test_args, use_feature=use_feature, reverse_select=reverse_select) predictor = Predictor(crf, model_path, train_path, test_path) processes = [] test_crf_data = [] for i in fhelper.folds(): crf_data = extract_crf_data(fhelper.train_set(i), data_set) if keep_boundary: crf_ranges = get_ranges(crf_data) else: crf_ranges = None test_crf_data.append(extract_crf_data(fhelper.test_set(i), test_set)) processes.append(predictor.train(i, crf_data, crf_ranges)) print('training...', end='', flush=True) for i, p in enumerate(processes): p.wait() print(i, end='', flush=True) print() print('start testing') processes = [] if keep_boundary: test_crf_ranges = [get_ranges(crf_data) for crf_data in test_crf_data] else: test_crf_ranges = [None for crf_data in test_crf_data] for i in fhelper.folds(): processes.append( predictor.test(i, test_crf_data[i], test_crf_ranges[i])) preds = [] pred_probs = [] wow_count = 0 for crf_data, ranges, p in zip(test_crf_data, test_crf_ranges, processes): p.wait() arg_spans, probs = load_predict(p.stdout, ranges, use_baseline, crf_data) assert (len(arg_spans) == len(crf_data)) preds.append(arg_spans) pred_probs.append(probs) # evaluation cv_stats = defaultdict(list) log_stats = defaultdict(int) if rstats: r_stats = defaultdict(int) with open(log_path, 'w') as log_out: for i, crf_data, pds in zip(fhelper.folds(), test_crf_data, preds): correct = 0 tp = fp = total = 0 i_tp = i_fp = i_total = 0 p_i_tp = [0] * len(threshold) p_i_fp = [0] * len(threshold) for arg_span, item in zip(pds, crf_data): label, cindices, *_ = item s = set() if cindices in train_args.argument[label]: rtype = train_args.argument[label][cindices][1] pd_rtype = test_args.argument[label][cindices][1] if rtype == pd_rtype: s = train_args.edu_truth[label][cindices] log_error(log_out, s, arg_span, item, corpus_file, stats=log_stats) truth_boundaries = set() for start, end in s: truth_boundaries.add(start) truth_boundaries.add(end) assert (len(truth_boundaries) == len(s) + 1 or len(s) == len(truth_boundaries) == 0) pd_boundaries = set() for start, end in arg_span: pd_boundaries.add(start) pd_boundaries.add(end) assert (len(pd_boundaries) == len(arg_span) + 1 or len(arg_span) == len(pd_boundaries) == 0) tp += len(truth_boundaries & pd_boundaries) fp += len(pd_boundaries - truth_boundaries) if s == arg_span: correct += 1 if len(s) > 0: i_tp += 1 elif len(arg_span) > 0: i_fp += 1 # if predicted if len(arg_span) > 0: partial = [False] * len(threshold) # if num of args the same if len(s) == len(arg_span): EDU_offsets = corpus_file.edu_corpus[label] overlap_scores = [] for pd_arg, t_arg in zip(sorted(arg_span), sorted(s)): start = EDU_offsets[pd_arg[0]][0] end = EDU_offsets[pd_arg[-1] - 1][-1] t_start = EDU_offsets[t_arg[0]][0] t_end = EDU_offsets[t_arg[-1] - 1][-1] overlap_scores.append( compute_overlap((start, end), (t_start, t_end))) for j, th in enumerate(threshold): partial[j] = sum(overlap_scores) / len( overlap_scores) >= th for j in range(len(threshold)): if partial[j]: p_i_tp[j] += 1 else: p_i_fp[j] += 1 if rstats and len(s) > 0: def count_rstats(item, is_correct): r_stats[item] += 1 if is_correct: r_stats[item + '-correct'] += 1 # count connective length count_rstats('clen-{}'.format(len(cindices)), s == arg_span) # count argument length count_rstats('alen-{}'.format(len(s)), s == arg_span) # count front count_rstats('front', min(pd_boundaries) == min(truth_boundaries)) # count back count_rstats('back', max(pd_boundaries) == max(truth_boundaries)) # count middle middle_truth = set(truth_boundaries) middle_truth.remove(min(truth_boundaries)) middle_truth.remove(max(truth_boundaries)) middle_pd = set(pd_boundaries) middle_pd.remove(min(pd_boundaries)) middle_pd.remove(max(pd_boundaries)) count_rstats('middle', middle_pd == middle_truth) # count over/underpredict count_rstats('over', len(s) < len(arg_span)) count_rstats('under', len(s) > len(arg_span)) # count match or not if len(s) == len(cindices): count_rstats('match', s == arg_span) else: count_rstats('notmatch', s == arg_span) # types [ <]> # types [ <> ] # types < [] > if s != arg_span: tl, tr = min(truth_boundaries), max(truth_boundaries) pl, pr = min(pd_boundaries), max(pd_boundaries) if pl == tl and pr == tr: count_rstats('exact', False) else: if pl >= tl and pr <= tr: count_rstats('toosmall', False) elif pl <= tl and pr >= tr: count_rstats('toobig', False) else: count_rstats('cross', False) if pl == tl or pr == tr: count_rstats('at least one', False) else: count_rstats('both incorrect', False) for l in fhelper.test_set(i): d = train_args.edu_truth[l] for s in d.values(): if len(s) == 0: continue total += len(s) + 1 i_total += 1 assert (sum(len(pd) + 1 if len(pd) != 0 else 0 for pd in pds) == tp + fp) cv_stats['Total'].append(total) cv_stats['iTotal'].append(i_total) cv_stats['Propose'].append(tp + fp) cv_stats['iPropose'].append(i_tp + i_fp) for j in range(len(threshold)): cv_stats['piPropose{}'.format(j)].append(p_i_tp[j] + p_i_fp[j]) accuracy = correct / len(pds) if len(pds) > 0 else 1 recall = tp / total prec = tp / (tp + fp) if (tp + fp) > 0 else 1 f1 = evaluate.f1(recall, prec) cv_stats['Accuracy'].append(accuracy) cv_stats['Recall'].append(recall) cv_stats['Prec'].append(prec) cv_stats['F1'].append(f1) recall = i_tp / i_total prec = i_tp / (i_tp + i_fp) if (i_tp + i_fp) > 0 else 1 f1 = evaluate.f1(recall, prec) cv_stats['iRecall'].append(recall) cv_stats['iPrec'].append(prec) cv_stats['iF1'].append(f1) for j in range(len(threshold)): recall = p_i_tp[j] / i_total prec = p_i_tp[j] / (p_i_tp[j] + p_i_fp[j]) if ( p_i_tp[j] + p_i_fp[j]) > 0 else 1 f1 = evaluate.f1(recall, prec) cv_stats['piRecall{}'.format(j)].append(recall) cv_stats['piPrec{}'.format(j)].append(prec) cv_stats['piF1{}'.format(j)].append(f1) print('prec\trecall\tF1') print('{:.2%}\t{:.2%}\t{:.2%}'.format( np.mean(cv_stats['Prec']), np.mean(cv_stats['Recall']), np.mean(cv_stats['F1']), )) print('Fold Prec', cv_stats['Prec']) print('Fold Recall', cv_stats['Recall']) print('Fold F1', cv_stats['F1']) print('Instance') print('prec\trecall\tF1\tAccuracy') print('{:.2%}\t{:.2%}\t{:.2%}\t{:.2%}'.format( np.mean(cv_stats['iPrec']), np.mean(cv_stats['iRecall']), np.mean(cv_stats['iF1']), np.mean(cv_stats['Accuracy']))) print('pprec\tprecall\tpF1\tthreshold') for j, th in enumerate(threshold): print('{:.2%}\t{:.2%}\t{:.2%}\t{}'.format( np.mean(cv_stats['piPrec{}'.format(j)]), np.mean(cv_stats['piRecall{}'.format(j)]), np.mean(cv_stats['piF1{}'.format(j)]), th)) print('Fold Prec', cv_stats['iPrec']) print('Fold Recall', cv_stats['iRecall']) print('Fold F1', cv_stats['iF1']) print('Fold Accuracy', cv_stats['Accuracy']) print('Totally {} arguments for all'.format(sum(cv_stats['Total']))) print('Totally {} instances for all'.format(sum(cv_stats['iTotal']))) print('Totally {} arguments predicted for all'.format( sum(cv_stats['Propose']))) print('Totally {} instances predicted for all'.format( sum(cv_stats['iPropose']))) for j, th in enumerate(threshold): print('{} with threshold = {}'.format(j, th)) print('Fold pPrec', cv_stats['piPrec{}'.format(j)]) print('Fold pRecall', cv_stats['piRecall{}'.format(j)]) print('Fold pF1', cv_stats['piF1{}'.format(j)]) print('Totally {} partial instances predicted for all'.format( sum(cv_stats['piPropose{}'.format(j)]))) print('log stats:') # evaluate.print_counts(log_stats) if rstats: evaluate.print_counts(r_stats)