def vad_metrics(predictions, reference_segments, sr=22050, window_length=int(np.floor(0.032 * 22050)), hop_length=int(np.floor(0.016 * 22050))): frame_times = librosa.frames_to_time(range(len(predictions)), sr=sr, hop_length=hop_length, n_fft=window_length) predicted_segments = voice_segments(predictions, frame_times) hypothesis = Annotation() for seg in predicted_segments: hypothesis[Segment(seg[0], seg[1])] = 1 reference = Annotation() for seg in reference_segments: reference[Segment(seg[0], seg[1])] = 1 precision = DetectionPrecision()(reference, hypothesis) error = DetectionErrorRate()(reference, hypothesis) recall = DetectionRecall()(reference, hypothesis) accuracy = DetectionAccuracy()(reference, hypothesis) metrics = { "precision": precision, "error": error, "recall": recall, "accuracy": accuracy } print(metrics) return metrics
def detection(protocol, subset, hypotheses, collar=0.0, skip_overlap=False): options = { 'collar': collar, 'skip_overlap': skip_overlap, 'parallel': True } metrics = { 'error': DetectionErrorRate(**options), 'accuracy': DetectionAccuracy(**options), 'precision': DetectionPrecision(**options), 'recall': DetectionRecall(**options) } reports = get_reports(protocol, subset, hypotheses, metrics) report = metrics['error'].report(display=False) accuracy = metrics['accuracy'].report(display=False) precision = metrics['precision'].report(display=False) recall = metrics['recall'].report(display=False) report['accuracy', '%'] = accuracy[metrics['accuracy'].name, '%'] report['precision', '%'] = precision[metrics['precision'].name, '%'] report['recall', '%'] = recall[metrics['recall'].name, '%'] report = reindex(report) columns = list(report.columns) report = report[[columns[0]] + columns[-3:] + columns[1:-3]] summary = 'Detection (collar = {0:g} ms{1})'.format( 1000 * collar, ', no overlap' if skip_overlap else '') headers = [summary] + \ [report.columns[i][0] for i in range(4)] + \ ['%' if c[1] == '%' else c[0] for c in report.columns[4:]] print( tabulate(report, headers=headers, tablefmt="simple", floatfmt=".2f", numalign="decimal", stralign="left", missingval="", showindex="default", disable_numparse=False))
def get_detection_metrics(reference, hypothesis, uem=None): metric_dict = {} metric = DetectionErrorRate() met = metric(reference, hypothesis, uem=uem) metric_dict[metric.metric_name()] = met metric = DetectionAccuracy() met = metric(reference, hypothesis, uem=uem) metric_dict[metric.metric_name()] = met metric = DetectionPrecision() met = metric(reference, hypothesis, uem=uem) metric_dict[metric.metric_name()] = met metric = DetectionRecall() met = metric(reference, hypothesis, uem=uem) metric_dict[metric.metric_name()] = met return metric_dict
def test_accuracy(reference, hypothesis): # 15 correct / 20 total detectionAccuracy = DetectionAccuracy() accuracy = detectionAccuracy(reference, hypothesis) npt.assert_almost_equal(accuracy, 0.75, decimal=3)
def test(dataset, medium_template, config_yml, weights_h5, output_dir): # load configuration file with open(config_yml, 'r') as fp: config = yaml.load(fp) # this is where model architecture was saved architecture_yml = os.path.dirname( os.path.dirname(weights_h5)) + '/architecture.yml' # -- DATASET -- db, task, protocol, subset = dataset.split('.') database = get_database(db, medium_template=medium_template) protocol = database.get_protocol(task, protocol) if not hasattr(protocol, subset): raise NotImplementedError('') file_generator = getattr(protocol, subset)() # -- FEATURE EXTRACTION -- # input sequence duration duration = config['feature_extraction']['duration'] # MFCCs feature_extractor = YaafeMFCC(**config['feature_extraction']['mfcc']) # normalization normalize = config['feature_extraction']['normalize'] # -- TESTING -- # overlap ratio between each window overlap = config['testing']['overlap'] step = duration * (1. - overlap) # prediction smoothing onset = config['testing']['binarize']['onset'] offset = config['testing']['binarize']['offset'] binarizer = Binarize(onset=0.5, offset=0.5) sequence_labeling = SequenceLabeling.from_disk(architecture_yml, weights_h5) aggregation = SequenceLabelingAggregation(sequence_labeling, feature_extractor, normalize=normalize, duration=duration, step=step) collar = 0.500 error_rate = DetectionErrorRate(collar=collar) accuracy = DetectionAccuracy(collar=collar) precision = DetectionPrecision(collar=collar) recall = DetectionRecall(collar=collar) LINE = '{uri} {e:.3f} {a:.3f} {p:.3f} {r:.3f} {f:.3f}\n' PATH = '{output_dir}/eval.{dataset}.{subset}.txt' path = PATH.format(output_dir=output_dir, dataset=dataset, subset=subset) with open(path, 'w') as fp: header = '# uri error accuracy precision recall f_measure\n' fp.write(header) fp.flush() for current_file in file_generator: uri = current_file['uri'] wav = current_file['medium']['wav'] annotated = current_file['annotated'] annotation = current_file['annotation'] predictions = aggregation.apply(wav) hypothesis = binarizer.apply(predictions, dimension=1) e = error_rate(annotation, hypothesis, uem=annotated) a = accuracy(annotation, hypothesis, uem=annotated) p = precision(annotation, hypothesis, uem=annotated) r = recall(annotation, hypothesis, uem=annotated) f = f_measure(p, r) line = LINE.format(uri=uri, e=e, a=a, p=p, r=r, f=f) fp.write(line) fp.flush() PATH = '{output_dir}/{uri}.json' path = PATH.format(output_dir=output_dir, uri=uri) dump_to(hypothesis, path) # average on whole corpus uri = '{dataset}.{subset}'.format(dataset=dataset, subset=subset) e = abs(error_rate) a = abs(accuracy) p = abs(precision) r = abs(recall) f = f_measure(p, r) line = LINE.format(uri=uri, e=e, a=a, p=p, r=r, f=f) fp.write(line) fp.flush()