예제 #1
0
def eval_verification(descr,split):
    print('>> Evaluating %s task' % green('verification'))

    start = time.time()
    pos = pd.read_csv('utils/tasks/verif_pos_split-'+split['name']+'.csv').as_matrix()
    neg_intra = pd.read_csv('utils/tasks/verif_neg_intra_split-'+split['name']+'.csv').as_matrix()
    neg_inter = pd.read_csv('utils/tasks/verif_neg_inter_split-'+split['name']+'.csv').as_matrix()

    d_pos = get_verif_dists(descr,pos,1)
    d_neg_intra = get_verif_dists(descr,neg_intra,2)
    d_neg_inter = get_verif_dists(descr,neg_inter,3)

    results = defaultdict(lambda: defaultdict(lambda:defaultdict(dict)))
    
    for t in tp:
        l = np.vstack((np.zeros_like(d_pos[t]),np.ones_like(d_pos[t])))
        d_intra = np.vstack((d_neg_intra[t],d_pos[t]))
        d_inter = np.vstack((d_neg_inter[t],d_pos[t]))

        # get results for the balanced protocol: 1M Positives - 1M Negatives
        fpr,tpr,auc = metrics.roc(-d_intra,l)        
        results[t]['intra']['balanced']['fpr'] = fpr
        results[t]['intra']['balanced']['tpr'] = tpr
        results[t]['intra']['balanced']['auc'] = auc

        fpr,tpr,auc = metrics.roc(-d_inter,l)
        results[t]['inter']['balanced']['fpr'] = fpr
        results[t]['inter']['balanced']['tpr'] = tpr
        results[t]['inter']['balanced']['auc'] = auc

        # get results for the imbalanced protocol: 0.2M Positives - 1M Negatives        
        N_imb = d_pos[t].shape[0] + int(d_pos[t].shape[0]*0.2) # 1M + 0.2*1M
        pr,rc,ap = metrics.pr(-d_intra[0:N_imb],l[0:N_imb])
        results[t]['intra']['imbalanced']['pr'] = pr
        results[t]['intra']['imbalanced']['rc'] = rc
        results[t]['intra']['imbalanced']['ap'] = ap
        
        pr,rc,ap = metrics.pr(-d_inter[0:N_imb],l[0:N_imb])
        results[t]['inter']['imbalanced']['pr'] = pr
        results[t]['inter']['imbalanced']['rc'] = rc
        results[t]['inter']['imbalanced']['ap'] = ap
    end = time.time()
    print(">> %s task finished in %.0f secs  " % (green('Verification'),end-start))
    return results
def plot_roc(y, f, label="", show=True, save=None):
    tprs, fprs = metrics.roc(y, f)
    plt.plot(fprs, tprs, label=label)
    plt.gca().set_aspect(1)
    plt.title("ROC")
    plt.xlabel("False positive rate")
    plt.ylabel("True positive rate")
    plt.legend()
    if save is not None:
        plt.savefig(save)
    if show:
        plt.show()
예제 #3
0
    def evaluate(self,
                 experiment_path: Path,
                 task: str = 'aurora_clean',
                 model_resolution=0.02,
                 time_resolution=0.02,
                 threshold=(0.5, 0.1),
                 **kwargs):
        EVALUATION_DATA = {
            'aurora_clean': {
                'data': 'data/evaluation/hdf5/aurora_clean.h5',
                'label': 'data/evaluation/labels/aurora_clean_labels.tsv',
            },
            'aurora_noisy': {
                'data': 'data/evaluation/hdf5/aurora_noisy.h5',
                'label': 'data/evaluation/labels/aurora_noisy_labels.tsv'
            },
            'dihard_dev': {
                'data': 'data/evaluation/hdf5/dihard_dev.h5',
                'label': 'data/evaluation/labels/dihard_dev.csv'
            },
            'dihard_eval': {
                'data': 'data/evaluation/hdf5/dihard_eval.h5',
                'label': 'data/evaluation/labels/dihard_eval.csv'
            },
            'aurora_snr_20': {
                'data':
                'data/evaluation/hdf5/aurora_noisy_musan_snr_20.0.hdf5',
                'label': 'data/evaluation/labels/musan_labels.tsv'
            },
            'aurora_snr_15': {
                'data':
                'data/evaluation/hdf5/aurora_noisy_musan_snr_15.0.hdf5',
                'label': 'data/evaluation/labels/musan_labels.tsv'
            },
            'aurora_snr_10': {
                'data':
                'data/evaluation/hdf5/aurora_noisy_musan_snr_10.0.hdf5',
                'label': 'data/evaluation/labels/musan_labels.tsv'
            },
            'aurora_snr_5': {
                'data': 'data/evaluation/hdf5/aurora_noisy_musan_snr_5.0.hdf5',
                'label': 'data/evaluation/labels/musan_labels.tsv'
            },
            'aurora_snr_0': {
                'data': 'data/evaluation/hdf5/aurora_noisy_musan_snr_0.0.hdf5',
                'label': 'data/evaluation/labels/musan_labels.tsv'
            },
            'aurora_snr_-5': {
                'data':
                'data/evaluation/hdf5/aurora_noisy_musan_snr_-5.0.hdf5',
                'label': 'data/evaluation/labels/musan_labels.tsv'
            },
            'dcase18': {
                'data': 'data/evaluation/hdf5/dcase18.h5',
                'label': 'data/evaluation/labels/dcase18.tsv',
            },
        }
        assert task in EVALUATION_DATA, f"--task {'|'.join(list(EVALUATION_DATA.keys()))}"
        experiment_path = Path(experiment_path)
        if experiment_path.is_file():  # Model is given
            model_path = experiment_path
            experiment_path = experiment_path.parent
        else:
            model_path = next(Path(experiment_path).glob("run_model*"))
        config = torch.load(next(Path(experiment_path).glob("run_config*")),
                            map_location='cpu')
        logger = utils.getfile_outlogger(None)
        # Use previous config, but update data such as kwargs
        config_parameters = dict(config, **kwargs)
        # Default columns to search for in data
        model_parameters = torch.load(
            model_path, map_location=lambda storage, loc: storage)
        encoder = torch.load('labelencoders/vad.pth')
        data = EVALUATION_DATA[task]['data']
        label_df = pd.read_csv(EVALUATION_DATA[task]['label'], sep='\s+')
        label_df['filename'] = label_df['filename'].apply(
            lambda x: Path(x).name)
        logger.info(f"Label_df shape is {label_df.shape}")

        dset = dataset.EvalH5Dataset(data,
                                     fnames=np.unique(
                                         label_df['filename'].values))

        dataloader = torch.utils.data.DataLoader(dset,
                                                 batch_size=1,
                                                 num_workers=4,
                                                 shuffle=False)

        model = getattr(models, config_parameters['model'])(
            inputdim=dataloader.dataset.datadim,
            outputdim=len(encoder.classes_),
            **config_parameters['model_args'])

        model.load_state_dict(model_parameters)
        model = model.to(DEVICE).eval()

        ## VAD preprocessing data
        vad_label_helper_df = label_df.copy()
        vad_label_helper_df['onset'] = np.ceil(vad_label_helper_df['onset'] /
                                               model_resolution).astype(int)
        vad_label_helper_df['offset'] = np.ceil(vad_label_helper_df['offset'] /
                                                model_resolution).astype(int)

        vad_label_helper_df = vad_label_helper_df.groupby(['filename']).agg({
            'onset':
            tuple,
            'offset':
            tuple,
            'event_label':
            tuple
        }).reset_index()
        logger.trace(model)

        output_dfs = []

        speech_label_idx = np.where('Speech' == encoder.classes_)[0].squeeze()
        speech_frame_predictions, speech_frame_ground_truth, speech_frame_prob_predictions = [], [],[]
        # Using only binary thresholding without filter
        if len(threshold) == 1:
            postprocessing_method = utils.binarize
        else:
            postprocessing_method = utils.double_threshold
        with torch.no_grad(), tqdm(total=len(dataloader),
                                   leave=False,
                                   unit='clip') as pbar:
            for feature, filename in dataloader:
                feature = torch.as_tensor(feature).to(DEVICE)
                # PANNS output a dict instead of 2 values
                prediction_tag, prediction_time = model(feature)
                prediction_tag = prediction_tag.to('cpu')
                prediction_time = prediction_time.to('cpu')

                if prediction_time is not None:  # Some models do not predict timestamps

                    cur_filename = filename[0]

                    thresholded_prediction = postprocessing_method(
                        prediction_time, *threshold)

                    ## VAD predictions
                    speech_frame_prob_predictions.append(
                        prediction_time[..., speech_label_idx].squeeze())
                    ### Thresholded speech predictions
                    speech_prediction = thresholded_prediction[
                        ..., speech_label_idx].squeeze()
                    speech_frame_predictions.append(speech_prediction)
                    targets = vad_label_helper_df[
                        vad_label_helper_df['filename'] == cur_filename][[
                            'onset', 'offset'
                        ]].values[0]
                    target_arr = np.zeros_like(speech_prediction)
                    for start, end in zip(*targets):
                        target_arr[start:end] = 1
                    speech_frame_ground_truth.append(target_arr)

                    #### SED predictions

                    labelled_predictions = utils.decode_with_timestamps(
                        encoder, thresholded_prediction)
                    pred_label_df = pd.DataFrame(
                        labelled_predictions[0],
                        columns=['event_label', 'onset', 'offset'])
                    if not pred_label_df.empty:
                        pred_label_df['filename'] = cur_filename
                        pred_label_df['onset'] *= model_resolution
                        pred_label_df['offset'] *= model_resolution
                        pbar.set_postfix(labels=','.join(
                            np.unique(pred_label_df['event_label'].values)))
                        pbar.update()
                        output_dfs.append(pred_label_df)

        full_prediction_df = pd.concat(output_dfs)
        prediction_df = full_prediction_df[full_prediction_df['event_label'] ==
                                           'Speech']
        assert set(['onset', 'offset', 'filename', 'event_label'
                    ]).issubset(prediction_df.columns), "Format is wrong"
        assert set(['onset', 'offset', 'filename', 'event_label'
                    ]).issubset(label_df.columns), "Format is wrong"
        logger.info("Calculating VAD measures ... ")
        speech_frame_ground_truth = np.concatenate(speech_frame_ground_truth,
                                                   axis=0)
        speech_frame_predictions = np.concatenate(speech_frame_predictions,
                                                  axis=0)
        speech_frame_prob_predictions = np.concatenate(
            speech_frame_prob_predictions, axis=0)

        vad_results = []
        tn, fp, fn, tp = metrics.confusion_matrix(
            speech_frame_ground_truth, speech_frame_predictions).ravel()
        fer = 100 * ((fp + fn) / len(speech_frame_ground_truth))
        acc = 100 * ((tp + tn) / (len(speech_frame_ground_truth)))

        p_miss = 100 * (fn / (fn + tp))
        p_fa = 100 * (fp / (fp + tn))
        for i in [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 0.7,0.9]:
            mp_fa, mp_miss = metrics.obtain_error_rates(
                speech_frame_ground_truth, speech_frame_prob_predictions, i)
            tn, fp, fn, tp = metrics.confusion_matrix(
                speech_frame_ground_truth,
                speech_frame_prob_predictions > i).ravel()
            sub_fer = 100 * ((fp + fn) / len(speech_frame_ground_truth))
            logger.info(
                f"PFa {100*mp_fa:.2f} Pmiss {100*mp_miss:.2f} FER {sub_fer:.2f} t: {i:.2f}"
            )

        auc = metrics.roc(speech_frame_ground_truth,
                          speech_frame_prob_predictions) * 100
        for avgtype in ('micro', 'macro', 'binary'):
            precision, recall, f1, _ = metrics.precision_recall_fscore_support(
                speech_frame_ground_truth,
                speech_frame_predictions,
                average=avgtype)
            vad_results.append(
                (avgtype, 100 * precision, 100 * recall, 100 * f1))

        logger.info("Calculating segment based metric .. ")
        # Change order just for better printing in file
        prediction_df = prediction_df[[
            'filename', 'onset', 'offset', 'event_label'
        ]]
        metric = metrics.segment_based_evaluation_df(
            label_df, prediction_df, time_resolution=time_resolution)
        logger.info("Calculating event based metric .. ")
        event_metric = metrics.event_based_evaluation_df(
            label_df, prediction_df)

        prediction_df.to_csv(experiment_path /
                             f'speech_predictions_{task}.tsv',
                             sep='\t',
                             index=False)
        full_prediction_df.to_csv(experiment_path / f'predictions_{task}.tsv',
                                  sep='\t',
                                  index=False)
        with open(experiment_path / f'evaluation_{task}.txt', 'w') as fp:
            for k, v in config_parameters.items():
                print(f"{k}:{v}", file=fp)
            print(metric, file=fp)
            print(event_metric, file=fp)
            for avgtype, precision, recall, f1 in vad_results:
                print(
                    f"VAD {avgtype} F1: {f1:<10.3f} {precision:<10.3f} Recall: {recall:<10.3f}",
                    file=fp)
            print(f"FER: {fer:.2f}", file=fp)
            print(f"AUC: {auc:.2f}", file=fp)
            print(f"Pfa: {p_fa:.2f}", file=fp)
            print(f"Pmiss: {p_miss:.2f}", file=fp)
            print(f"ACC: {acc:.2f}", file=fp)
        logger.info(f"Results are at {experiment_path}")
        for avgtype, precision, recall, f1 in vad_results:
            print(
                f"VAD {avgtype:<10} F1: {f1:<10.3f} Pre: {precision:<10.3f} Recall: {recall:<10.3f}"
            )
        print(f"FER: {fer:.2f}")
        print(f"AUC: {auc:.2f}")
        print(f"Pfa: {p_fa:.2f}")
        print(f"Pmiss: {p_miss:.2f}")
        print(f"ACC: {acc:.2f}")
        print(event_metric)
        print(metric)
예제 #4
0
    if mask_file is not None:
        for i in range(pred.shape[0]):
            for j in range(pred.shape[1]):
                pred[i, j] = pred[i, j] if msk[i, j] == 255 else 0

    return pred, out  # out may be used in roc


if __name__ == '__main__':
    model = MF_U_Net()
    model.eval()
    model.load_state_dict(
        torch.load('mf_unet2_400.pkl', map_location=torch.device('cpu')))

    pred, out = seg_img(model, '../data/DRIVE/test/proc_imgs/01_test.tif',
                        '../data/DRIVE/test/mask/01_test_mask.gif')
    '''
    pred, out = seg_img(model,
                        '../data/Image_01L.jpg')
    '''
    cv.imshow('img', pred)
    cv.waitKey(0)
    cv.destroyAllWindows()
    lable_file = '../data/DRIVE/test/1st_manual/01_manual1.gif'
    #lable_file = '../data/Image_01L_1stHO.png'
    target = cv.imread(lable_file, cv.IMREAD_GRAYSCALE)
    if target is None:  # cv2 cannot read gif
        target = imageio.mimread(lable_file)[0]  # but imageio can
    met = metrics(pred, target)
    auroc = roc(target, out)
 def computeROC(self):
     fpr, tpr, auc = metrics.roc(self.scores, self.labels)
     logger.info(f"Area under ROC: {auc}")
     return fpr, tpr, auc
lable_path = ''
mask_path = ''
out_path = ''
#######################################

# get file name from  paths
pred_files = os.listdir(predict_path)
lable_files = os.listdir(lable_path)
mask_files = os.listdir(mask_path)

# sort the file name list
pred_files.sort()
lable_files.sort()
mask_files.sort()

# combine image path with file names
pred_files = [predict_path + f for f in pred_files]
lable_files = [lable_path + f for f in lable_files]
mask_files = [mask_path + f for f in mask_files]

# get array of image array
pred_imgs = get_imarr(pred_files)
lable_imgs = get_imarr(lable_files)
# mask_imgs = get_imarr(mask_files)  # needless
outs = np.load(out_path)
print('data loaded.')

met = metrics(pred_imgs, lable_imgs)
auroc = roc(lable_imgs, outs)
print(met)
예제 #7
0
    def evaluate_tagging(self,
                         experiment_path: str,
                         tag_file='tagging_predictions_{}.txt',
                         **kwargs):
        exppath = Path(experiment_path)
        if exppath.is_file(): # Best model passed!
            model_parameters = torch.load(
                str(exppath),
                map_location=lambda storage, loc: storage)
            experiment_path = exppath.parent # Just set upper path as default
        else:
            model_parameters = torch.load(
                glob.glob("{}/run_model*".format(experiment_path))[0],
                map_location=lambda storage, loc: storage)
        config = torch.load(glob.glob(
            "{}/run_config*".format(experiment_path))[0],
                            map_location=lambda storage, loc: storage)
        logger = utils.getfile_outlogger(None)
        # Use previous config, but update data such as kwargs
        config_parameters = dict(config, **kwargs)
        # Default columns to search for in data
        config_parameters.setdefault('colname', ('filename', 'encoded'))
        encoder = torch.load(glob.glob(
            '{}/run_encoder*'.format(experiment_path))[0],
                             map_location=lambda storage, loc: storage)

        test_data_filename = os.path.splitext(
            os.path.basename(config_parameters['label']))[0]
        strong_labels_df = pd.read_csv(config_parameters['label'], sep='\s+')
        # Evaluation is done via the filenames, not full paths
        if not np.issubdtype(strong_labels_df['filename'].dtype, np.number):
            strong_labels_df['filename'] = strong_labels_df['filename'].apply(
                os.path.basename)
        if 'audiofilepath' in strong_labels_df.columns:  # In case of ave dataset, the audiofilepath column is the main column
            strong_labels_df['audiofilepath'] = strong_labels_df[
                'audiofilepath'].apply(os.path.basename)
            colname = 'audiofilepath'  # AVE
        else:
            colname = 'filename'  # Dcase etc.
        weak_labels_df = strong_labels_df.groupby(
            colname)['event_label'].unique().apply(
                tuple).to_frame().reset_index()
        if "event_labels" in strong_labels_df.columns:
            assert False, "Data with the column event_labels are used to train not to evaluate"
        weak_labels_array, encoder = utils.encode_labels(
            labels=weak_labels_df['event_label'], encoder=encoder)
        # assert (weak_labels_df['encoded'].apply(lambda x: sum(x)) >
        # 0).all(), "No targets found, is the encoder maybe not right?"
        for k, v in config_parameters.items():
            logger.info(f"{k}:{v}")
        dataloader = dataset.getdataloader(
            {
                'filename': weak_labels_df['filename'].values,
                'encoded': weak_labels_array
            },
            config_parameters['data'],
            batch_size=1,
            shuffle=False,
            colname=config_parameters[
                'colname'],  # For other datasets with different key names
            num_workers=3,
        )
        model = getattr(models, config_parameters['model'])(
            inputdim=dataloader.dataset.datadim,
            outputdim=len(encoder.classes_),
            **config_parameters['model_args'])
        model.load_state_dict(model_parameters)
        model = model.to(DEVICE).eval()
        y_pred, y_true = [], []
        with torch.no_grad():
            for batch in tqdm(dataloader, unit='file', leave=False):
                _, target, filenames = batch
                clip_pred, _, _ = self._forward(model, batch)
                clip_pred = clip_pred.cpu().detach().numpy()
                y_pred.append(clip_pred)
                y_true.append(target.numpy())
        y_pred = np.concatenate(y_pred)
        y_true = np.concatenate(y_true)
        mAP = np.nan_to_num(metrics.mAP(y_true, y_pred))
        auc = np.nan_to_num(metrics.roc(y_true, y_pred))
        with open(
                os.path.join(experiment_path,
                             tag_file.format(test_data_filename)), 'w') as wp:
            print(f"mAP:{mAP.mean():.3f}", file=wp)
            print(f"mAP:\n{mAP.mean():.3f}")
            print(f"AuC:{auc.mean():.3f}", file=wp)
            print(f"AuC:\n{auc.mean():.3f}")