def eval_verification(descr,split): print('>> Evaluating %s task' % green('verification')) start = time.time() pos = pd.read_csv('utils/tasks/verif_pos_split-'+split['name']+'.csv').as_matrix() neg_intra = pd.read_csv('utils/tasks/verif_neg_intra_split-'+split['name']+'.csv').as_matrix() neg_inter = pd.read_csv('utils/tasks/verif_neg_inter_split-'+split['name']+'.csv').as_matrix() d_pos = get_verif_dists(descr,pos,1) d_neg_intra = get_verif_dists(descr,neg_intra,2) d_neg_inter = get_verif_dists(descr,neg_inter,3) results = defaultdict(lambda: defaultdict(lambda:defaultdict(dict))) for t in tp: l = np.vstack((np.zeros_like(d_pos[t]),np.ones_like(d_pos[t]))) d_intra = np.vstack((d_neg_intra[t],d_pos[t])) d_inter = np.vstack((d_neg_inter[t],d_pos[t])) # get results for the balanced protocol: 1M Positives - 1M Negatives fpr,tpr,auc = metrics.roc(-d_intra,l) results[t]['intra']['balanced']['fpr'] = fpr results[t]['intra']['balanced']['tpr'] = tpr results[t]['intra']['balanced']['auc'] = auc fpr,tpr,auc = metrics.roc(-d_inter,l) results[t]['inter']['balanced']['fpr'] = fpr results[t]['inter']['balanced']['tpr'] = tpr results[t]['inter']['balanced']['auc'] = auc # get results for the imbalanced protocol: 0.2M Positives - 1M Negatives N_imb = d_pos[t].shape[0] + int(d_pos[t].shape[0]*0.2) # 1M + 0.2*1M pr,rc,ap = metrics.pr(-d_intra[0:N_imb],l[0:N_imb]) results[t]['intra']['imbalanced']['pr'] = pr results[t]['intra']['imbalanced']['rc'] = rc results[t]['intra']['imbalanced']['ap'] = ap pr,rc,ap = metrics.pr(-d_inter[0:N_imb],l[0:N_imb]) results[t]['inter']['imbalanced']['pr'] = pr results[t]['inter']['imbalanced']['rc'] = rc results[t]['inter']['imbalanced']['ap'] = ap end = time.time() print(">> %s task finished in %.0f secs " % (green('Verification'),end-start)) return results
def plot_roc(y, f, label="", show=True, save=None): tprs, fprs = metrics.roc(y, f) plt.plot(fprs, tprs, label=label) plt.gca().set_aspect(1) plt.title("ROC") plt.xlabel("False positive rate") plt.ylabel("True positive rate") plt.legend() if save is not None: plt.savefig(save) if show: plt.show()
def evaluate(self, experiment_path: Path, task: str = 'aurora_clean', model_resolution=0.02, time_resolution=0.02, threshold=(0.5, 0.1), **kwargs): EVALUATION_DATA = { 'aurora_clean': { 'data': 'data/evaluation/hdf5/aurora_clean.h5', 'label': 'data/evaluation/labels/aurora_clean_labels.tsv', }, 'aurora_noisy': { 'data': 'data/evaluation/hdf5/aurora_noisy.h5', 'label': 'data/evaluation/labels/aurora_noisy_labels.tsv' }, 'dihard_dev': { 'data': 'data/evaluation/hdf5/dihard_dev.h5', 'label': 'data/evaluation/labels/dihard_dev.csv' }, 'dihard_eval': { 'data': 'data/evaluation/hdf5/dihard_eval.h5', 'label': 'data/evaluation/labels/dihard_eval.csv' }, 'aurora_snr_20': { 'data': 'data/evaluation/hdf5/aurora_noisy_musan_snr_20.0.hdf5', 'label': 'data/evaluation/labels/musan_labels.tsv' }, 'aurora_snr_15': { 'data': 'data/evaluation/hdf5/aurora_noisy_musan_snr_15.0.hdf5', 'label': 'data/evaluation/labels/musan_labels.tsv' }, 'aurora_snr_10': { 'data': 'data/evaluation/hdf5/aurora_noisy_musan_snr_10.0.hdf5', 'label': 'data/evaluation/labels/musan_labels.tsv' }, 'aurora_snr_5': { 'data': 'data/evaluation/hdf5/aurora_noisy_musan_snr_5.0.hdf5', 'label': 'data/evaluation/labels/musan_labels.tsv' }, 'aurora_snr_0': { 'data': 'data/evaluation/hdf5/aurora_noisy_musan_snr_0.0.hdf5', 'label': 'data/evaluation/labels/musan_labels.tsv' }, 'aurora_snr_-5': { 'data': 'data/evaluation/hdf5/aurora_noisy_musan_snr_-5.0.hdf5', 'label': 'data/evaluation/labels/musan_labels.tsv' }, 'dcase18': { 'data': 'data/evaluation/hdf5/dcase18.h5', 'label': 'data/evaluation/labels/dcase18.tsv', }, } assert task in EVALUATION_DATA, f"--task {'|'.join(list(EVALUATION_DATA.keys()))}" experiment_path = Path(experiment_path) if experiment_path.is_file(): # Model is given model_path = experiment_path experiment_path = experiment_path.parent else: model_path = next(Path(experiment_path).glob("run_model*")) config = torch.load(next(Path(experiment_path).glob("run_config*")), map_location='cpu') logger = utils.getfile_outlogger(None) # Use previous config, but update data such as kwargs config_parameters = dict(config, **kwargs) # Default columns to search for in data model_parameters = torch.load( model_path, map_location=lambda storage, loc: storage) encoder = torch.load('labelencoders/vad.pth') data = EVALUATION_DATA[task]['data'] label_df = pd.read_csv(EVALUATION_DATA[task]['label'], sep='\s+') label_df['filename'] = label_df['filename'].apply( lambda x: Path(x).name) logger.info(f"Label_df shape is {label_df.shape}") dset = dataset.EvalH5Dataset(data, fnames=np.unique( label_df['filename'].values)) dataloader = torch.utils.data.DataLoader(dset, batch_size=1, num_workers=4, shuffle=False) model = getattr(models, config_parameters['model'])( inputdim=dataloader.dataset.datadim, outputdim=len(encoder.classes_), **config_parameters['model_args']) model.load_state_dict(model_parameters) model = model.to(DEVICE).eval() ## VAD preprocessing data vad_label_helper_df = label_df.copy() vad_label_helper_df['onset'] = np.ceil(vad_label_helper_df['onset'] / model_resolution).astype(int) vad_label_helper_df['offset'] = np.ceil(vad_label_helper_df['offset'] / model_resolution).astype(int) vad_label_helper_df = vad_label_helper_df.groupby(['filename']).agg({ 'onset': tuple, 'offset': tuple, 'event_label': tuple }).reset_index() logger.trace(model) output_dfs = [] speech_label_idx = np.where('Speech' == encoder.classes_)[0].squeeze() speech_frame_predictions, speech_frame_ground_truth, speech_frame_prob_predictions = [], [],[] # Using only binary thresholding without filter if len(threshold) == 1: postprocessing_method = utils.binarize else: postprocessing_method = utils.double_threshold with torch.no_grad(), tqdm(total=len(dataloader), leave=False, unit='clip') as pbar: for feature, filename in dataloader: feature = torch.as_tensor(feature).to(DEVICE) # PANNS output a dict instead of 2 values prediction_tag, prediction_time = model(feature) prediction_tag = prediction_tag.to('cpu') prediction_time = prediction_time.to('cpu') if prediction_time is not None: # Some models do not predict timestamps cur_filename = filename[0] thresholded_prediction = postprocessing_method( prediction_time, *threshold) ## VAD predictions speech_frame_prob_predictions.append( prediction_time[..., speech_label_idx].squeeze()) ### Thresholded speech predictions speech_prediction = thresholded_prediction[ ..., speech_label_idx].squeeze() speech_frame_predictions.append(speech_prediction) targets = vad_label_helper_df[ vad_label_helper_df['filename'] == cur_filename][[ 'onset', 'offset' ]].values[0] target_arr = np.zeros_like(speech_prediction) for start, end in zip(*targets): target_arr[start:end] = 1 speech_frame_ground_truth.append(target_arr) #### SED predictions labelled_predictions = utils.decode_with_timestamps( encoder, thresholded_prediction) pred_label_df = pd.DataFrame( labelled_predictions[0], columns=['event_label', 'onset', 'offset']) if not pred_label_df.empty: pred_label_df['filename'] = cur_filename pred_label_df['onset'] *= model_resolution pred_label_df['offset'] *= model_resolution pbar.set_postfix(labels=','.join( np.unique(pred_label_df['event_label'].values))) pbar.update() output_dfs.append(pred_label_df) full_prediction_df = pd.concat(output_dfs) prediction_df = full_prediction_df[full_prediction_df['event_label'] == 'Speech'] assert set(['onset', 'offset', 'filename', 'event_label' ]).issubset(prediction_df.columns), "Format is wrong" assert set(['onset', 'offset', 'filename', 'event_label' ]).issubset(label_df.columns), "Format is wrong" logger.info("Calculating VAD measures ... ") speech_frame_ground_truth = np.concatenate(speech_frame_ground_truth, axis=0) speech_frame_predictions = np.concatenate(speech_frame_predictions, axis=0) speech_frame_prob_predictions = np.concatenate( speech_frame_prob_predictions, axis=0) vad_results = [] tn, fp, fn, tp = metrics.confusion_matrix( speech_frame_ground_truth, speech_frame_predictions).ravel() fer = 100 * ((fp + fn) / len(speech_frame_ground_truth)) acc = 100 * ((tp + tn) / (len(speech_frame_ground_truth))) p_miss = 100 * (fn / (fn + tp)) p_fa = 100 * (fp / (fp + tn)) for i in [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 0.7,0.9]: mp_fa, mp_miss = metrics.obtain_error_rates( speech_frame_ground_truth, speech_frame_prob_predictions, i) tn, fp, fn, tp = metrics.confusion_matrix( speech_frame_ground_truth, speech_frame_prob_predictions > i).ravel() sub_fer = 100 * ((fp + fn) / len(speech_frame_ground_truth)) logger.info( f"PFa {100*mp_fa:.2f} Pmiss {100*mp_miss:.2f} FER {sub_fer:.2f} t: {i:.2f}" ) auc = metrics.roc(speech_frame_ground_truth, speech_frame_prob_predictions) * 100 for avgtype in ('micro', 'macro', 'binary'): precision, recall, f1, _ = metrics.precision_recall_fscore_support( speech_frame_ground_truth, speech_frame_predictions, average=avgtype) vad_results.append( (avgtype, 100 * precision, 100 * recall, 100 * f1)) logger.info("Calculating segment based metric .. ") # Change order just for better printing in file prediction_df = prediction_df[[ 'filename', 'onset', 'offset', 'event_label' ]] metric = metrics.segment_based_evaluation_df( label_df, prediction_df, time_resolution=time_resolution) logger.info("Calculating event based metric .. ") event_metric = metrics.event_based_evaluation_df( label_df, prediction_df) prediction_df.to_csv(experiment_path / f'speech_predictions_{task}.tsv', sep='\t', index=False) full_prediction_df.to_csv(experiment_path / f'predictions_{task}.tsv', sep='\t', index=False) with open(experiment_path / f'evaluation_{task}.txt', 'w') as fp: for k, v in config_parameters.items(): print(f"{k}:{v}", file=fp) print(metric, file=fp) print(event_metric, file=fp) for avgtype, precision, recall, f1 in vad_results: print( f"VAD {avgtype} F1: {f1:<10.3f} {precision:<10.3f} Recall: {recall:<10.3f}", file=fp) print(f"FER: {fer:.2f}", file=fp) print(f"AUC: {auc:.2f}", file=fp) print(f"Pfa: {p_fa:.2f}", file=fp) print(f"Pmiss: {p_miss:.2f}", file=fp) print(f"ACC: {acc:.2f}", file=fp) logger.info(f"Results are at {experiment_path}") for avgtype, precision, recall, f1 in vad_results: print( f"VAD {avgtype:<10} F1: {f1:<10.3f} Pre: {precision:<10.3f} Recall: {recall:<10.3f}" ) print(f"FER: {fer:.2f}") print(f"AUC: {auc:.2f}") print(f"Pfa: {p_fa:.2f}") print(f"Pmiss: {p_miss:.2f}") print(f"ACC: {acc:.2f}") print(event_metric) print(metric)
if mask_file is not None: for i in range(pred.shape[0]): for j in range(pred.shape[1]): pred[i, j] = pred[i, j] if msk[i, j] == 255 else 0 return pred, out # out may be used in roc if __name__ == '__main__': model = MF_U_Net() model.eval() model.load_state_dict( torch.load('mf_unet2_400.pkl', map_location=torch.device('cpu'))) pred, out = seg_img(model, '../data/DRIVE/test/proc_imgs/01_test.tif', '../data/DRIVE/test/mask/01_test_mask.gif') ''' pred, out = seg_img(model, '../data/Image_01L.jpg') ''' cv.imshow('img', pred) cv.waitKey(0) cv.destroyAllWindows() lable_file = '../data/DRIVE/test/1st_manual/01_manual1.gif' #lable_file = '../data/Image_01L_1stHO.png' target = cv.imread(lable_file, cv.IMREAD_GRAYSCALE) if target is None: # cv2 cannot read gif target = imageio.mimread(lable_file)[0] # but imageio can met = metrics(pred, target) auroc = roc(target, out)
def computeROC(self): fpr, tpr, auc = metrics.roc(self.scores, self.labels) logger.info(f"Area under ROC: {auc}") return fpr, tpr, auc
lable_path = '' mask_path = '' out_path = '' ####################################### # get file name from paths pred_files = os.listdir(predict_path) lable_files = os.listdir(lable_path) mask_files = os.listdir(mask_path) # sort the file name list pred_files.sort() lable_files.sort() mask_files.sort() # combine image path with file names pred_files = [predict_path + f for f in pred_files] lable_files = [lable_path + f for f in lable_files] mask_files = [mask_path + f for f in mask_files] # get array of image array pred_imgs = get_imarr(pred_files) lable_imgs = get_imarr(lable_files) # mask_imgs = get_imarr(mask_files) # needless outs = np.load(out_path) print('data loaded.') met = metrics(pred_imgs, lable_imgs) auroc = roc(lable_imgs, outs) print(met)
def evaluate_tagging(self, experiment_path: str, tag_file='tagging_predictions_{}.txt', **kwargs): exppath = Path(experiment_path) if exppath.is_file(): # Best model passed! model_parameters = torch.load( str(exppath), map_location=lambda storage, loc: storage) experiment_path = exppath.parent # Just set upper path as default else: model_parameters = torch.load( glob.glob("{}/run_model*".format(experiment_path))[0], map_location=lambda storage, loc: storage) config = torch.load(glob.glob( "{}/run_config*".format(experiment_path))[0], map_location=lambda storage, loc: storage) logger = utils.getfile_outlogger(None) # Use previous config, but update data such as kwargs config_parameters = dict(config, **kwargs) # Default columns to search for in data config_parameters.setdefault('colname', ('filename', 'encoded')) encoder = torch.load(glob.glob( '{}/run_encoder*'.format(experiment_path))[0], map_location=lambda storage, loc: storage) test_data_filename = os.path.splitext( os.path.basename(config_parameters['label']))[0] strong_labels_df = pd.read_csv(config_parameters['label'], sep='\s+') # Evaluation is done via the filenames, not full paths if not np.issubdtype(strong_labels_df['filename'].dtype, np.number): strong_labels_df['filename'] = strong_labels_df['filename'].apply( os.path.basename) if 'audiofilepath' in strong_labels_df.columns: # In case of ave dataset, the audiofilepath column is the main column strong_labels_df['audiofilepath'] = strong_labels_df[ 'audiofilepath'].apply(os.path.basename) colname = 'audiofilepath' # AVE else: colname = 'filename' # Dcase etc. weak_labels_df = strong_labels_df.groupby( colname)['event_label'].unique().apply( tuple).to_frame().reset_index() if "event_labels" in strong_labels_df.columns: assert False, "Data with the column event_labels are used to train not to evaluate" weak_labels_array, encoder = utils.encode_labels( labels=weak_labels_df['event_label'], encoder=encoder) # assert (weak_labels_df['encoded'].apply(lambda x: sum(x)) > # 0).all(), "No targets found, is the encoder maybe not right?" for k, v in config_parameters.items(): logger.info(f"{k}:{v}") dataloader = dataset.getdataloader( { 'filename': weak_labels_df['filename'].values, 'encoded': weak_labels_array }, config_parameters['data'], batch_size=1, shuffle=False, colname=config_parameters[ 'colname'], # For other datasets with different key names num_workers=3, ) model = getattr(models, config_parameters['model'])( inputdim=dataloader.dataset.datadim, outputdim=len(encoder.classes_), **config_parameters['model_args']) model.load_state_dict(model_parameters) model = model.to(DEVICE).eval() y_pred, y_true = [], [] with torch.no_grad(): for batch in tqdm(dataloader, unit='file', leave=False): _, target, filenames = batch clip_pred, _, _ = self._forward(model, batch) clip_pred = clip_pred.cpu().detach().numpy() y_pred.append(clip_pred) y_true.append(target.numpy()) y_pred = np.concatenate(y_pred) y_true = np.concatenate(y_true) mAP = np.nan_to_num(metrics.mAP(y_true, y_pred)) auc = np.nan_to_num(metrics.roc(y_true, y_pred)) with open( os.path.join(experiment_path, tag_file.format(test_data_filename)), 'w') as wp: print(f"mAP:{mAP.mean():.3f}", file=wp) print(f"mAP:\n{mAP.mean():.3f}") print(f"AuC:{auc.mean():.3f}", file=wp) print(f"AuC:\n{auc.mean():.3f}")