def vad_metrics(predictions,
                reference_segments,
                sr=22050,
                window_length=int(np.floor(0.032 * 22050)),
                hop_length=int(np.floor(0.016 * 22050))):
    frame_times = librosa.frames_to_time(range(len(predictions)),
                                         sr=sr,
                                         hop_length=hop_length,
                                         n_fft=window_length)
    predicted_segments = voice_segments(predictions, frame_times)

    hypothesis = Annotation()
    for seg in predicted_segments:
        hypothesis[Segment(seg[0], seg[1])] = 1

    reference = Annotation()
    for seg in reference_segments:
        reference[Segment(seg[0], seg[1])] = 1

    precision = DetectionPrecision()(reference, hypothesis)
    error = DetectionErrorRate()(reference, hypothesis)
    recall = DetectionRecall()(reference, hypothesis)
    accuracy = DetectionAccuracy()(reference, hypothesis)

    metrics = {
        "precision": precision,
        "error": error,
        "recall": recall,
        "accuracy": accuracy
    }

    print(metrics)

    return metrics
def detection(protocol, subset, hypotheses, collar=0.0, skip_overlap=False):

    options = {
        'collar': collar,
        'skip_overlap': skip_overlap,
        'parallel': True
    }

    metrics = {
        'error': DetectionErrorRate(**options),
        'accuracy': DetectionAccuracy(**options),
        'precision': DetectionPrecision(**options),
        'recall': DetectionRecall(**options)
    }

    reports = get_reports(protocol, subset, hypotheses, metrics)

    report = metrics['error'].report(display=False)
    accuracy = metrics['accuracy'].report(display=False)
    precision = metrics['precision'].report(display=False)
    recall = metrics['recall'].report(display=False)

    report['accuracy', '%'] = accuracy[metrics['accuracy'].name, '%']
    report['precision', '%'] = precision[metrics['precision'].name, '%']
    report['recall', '%'] = recall[metrics['recall'].name, '%']

    report = reindex(report)

    columns = list(report.columns)
    report = report[[columns[0]] + columns[-3:] + columns[1:-3]]

    summary = 'Detection (collar = {0:g} ms{1})'.format(
        1000 * collar, ', no overlap' if skip_overlap else '')

    headers = [summary] + \
              [report.columns[i][0] for i in range(4)] + \
              ['%' if c[1] == '%' else c[0] for c in report.columns[4:]]

    print(
        tabulate(report,
                 headers=headers,
                 tablefmt="simple",
                 floatfmt=".2f",
                 numalign="decimal",
                 stralign="left",
                 missingval="",
                 showindex="default",
                 disable_numparse=False))
def get_detection_metrics(reference, hypothesis, uem=None):
    metric_dict = {}
    metric = DetectionErrorRate()
    met = metric(reference, hypothesis, uem=uem)
    metric_dict[metric.metric_name()] = met
    metric = DetectionAccuracy()
    met = metric(reference, hypothesis, uem=uem)
    metric_dict[metric.metric_name()] = met
    metric = DetectionPrecision()
    met = metric(reference, hypothesis, uem=uem)
    metric_dict[metric.metric_name()] = met
    metric = DetectionRecall()
    met = metric(reference, hypothesis, uem=uem)
    metric_dict[metric.metric_name()] = met

    return metric_dict
def test_accuracy(reference, hypothesis):
    # 15 correct / 20 total
    detectionAccuracy = DetectionAccuracy()
    accuracy = detectionAccuracy(reference, hypothesis)
    npt.assert_almost_equal(accuracy, 0.75, decimal=3)
def test(dataset, medium_template, config_yml, weights_h5, output_dir):

    # load configuration file
    with open(config_yml, 'r') as fp:
        config = yaml.load(fp)

    # this is where model architecture was saved
    architecture_yml = os.path.dirname(
        os.path.dirname(weights_h5)) + '/architecture.yml'

    # -- DATASET --
    db, task, protocol, subset = dataset.split('.')
    database = get_database(db, medium_template=medium_template)
    protocol = database.get_protocol(task, protocol)

    if not hasattr(protocol, subset):
        raise NotImplementedError('')

    file_generator = getattr(protocol, subset)()

    # -- FEATURE EXTRACTION --
    # input sequence duration
    duration = config['feature_extraction']['duration']
    # MFCCs
    feature_extractor = YaafeMFCC(**config['feature_extraction']['mfcc'])
    # normalization
    normalize = config['feature_extraction']['normalize']

    # -- TESTING --
    # overlap ratio between each window
    overlap = config['testing']['overlap']
    step = duration * (1. - overlap)

    # prediction smoothing
    onset = config['testing']['binarize']['onset']
    offset = config['testing']['binarize']['offset']
    binarizer = Binarize(onset=0.5, offset=0.5)

    sequence_labeling = SequenceLabeling.from_disk(architecture_yml,
                                                   weights_h5)

    aggregation = SequenceLabelingAggregation(sequence_labeling,
                                              feature_extractor,
                                              normalize=normalize,
                                              duration=duration,
                                              step=step)

    collar = 0.500
    error_rate = DetectionErrorRate(collar=collar)
    accuracy = DetectionAccuracy(collar=collar)
    precision = DetectionPrecision(collar=collar)
    recall = DetectionRecall(collar=collar)

    LINE = '{uri} {e:.3f} {a:.3f} {p:.3f} {r:.3f} {f:.3f}\n'

    PATH = '{output_dir}/eval.{dataset}.{subset}.txt'
    path = PATH.format(output_dir=output_dir, dataset=dataset, subset=subset)

    with open(path, 'w') as fp:

        header = '# uri error accuracy precision recall f_measure\n'
        fp.write(header)
        fp.flush()

        for current_file in file_generator:

            uri = current_file['uri']
            wav = current_file['medium']['wav']
            annotated = current_file['annotated']
            annotation = current_file['annotation']

            predictions = aggregation.apply(wav)
            hypothesis = binarizer.apply(predictions, dimension=1)

            e = error_rate(annotation, hypothesis, uem=annotated)
            a = accuracy(annotation, hypothesis, uem=annotated)
            p = precision(annotation, hypothesis, uem=annotated)
            r = recall(annotation, hypothesis, uem=annotated)
            f = f_measure(p, r)

            line = LINE.format(uri=uri, e=e, a=a, p=p, r=r, f=f)
            fp.write(line)
            fp.flush()

            PATH = '{output_dir}/{uri}.json'
            path = PATH.format(output_dir=output_dir, uri=uri)
            dump_to(hypothesis, path)

        # average on whole corpus
        uri = '{dataset}.{subset}'.format(dataset=dataset, subset=subset)
        e = abs(error_rate)
        a = abs(accuracy)
        p = abs(precision)
        r = abs(recall)
        f = f_measure(p, r)
        line = LINE.format(uri=uri, e=e, a=a, p=p, r=r, f=f)
        fp.write(line)
        fp.flush()