def loss(self, current_file: dict, hypothesis: Annotation) -> float: """Compute (1 - coverage) at target purity If purity < target, return 1 + (1 - purity) Parameters ---------- current_file : `dict` File as provided by a pyannote.database protocol. hypothesis : `pyannote.core.Annotation` Speech turns. Returns ------- error : `float` 1. - cluster coverage. """ metric = DiarizationPurityCoverageFMeasure() reference = current_file["annotation"] uem = get_annotated(current_file) f_measure = metric(reference, hypothesis, uem=uem) purity, coverage, _ = metric.compute_metrics() if purity > self.purity: return 1.0 - coverage else: return 1.0 + (1.0 - purity)
def get_metric(self, parallel=False) -> Union[DiarizationPurityCoverageFMeasure, SegmentationPurityCoverageFMeasure]: """Return new instance of f-score metric""" if not self.fscore: raise NotImplementedError() if self.diarization: return DiarizationPurityCoverageFMeasure(parallel=parallel) return SegmentationPurityCoverageFMeasure(tolerance=0.5, parallel=parallel)
def fun(threshold): _metric = DiarizationPurityCoverageFMeasure(weighted=False) for current_file in getattr(_protocol, subset)(): uri = get_unique_identifier(current_file) uem = get_annotated(current_file) reference = current_file["annotation"] clusters = fcluster(Z[uri], threshold, criterion="distance") hypothesis = Annotation(uri=uri) for (start_time, end_time), cluster in zip(t[uri], clusters): hypothesis[Segment(start_time, end_time)] = cluster _ = _metric(reference, hypothesis, uem=uem) return 1.0 - abs(_metric)
import numpy as np import matplotlib.pyplot as plt # AMI protocol from pyannote.database import get_protocol protocol = get_protocol('Test.SpeakerDiarization.MixHeadset') from pyannote.database import get_annotated # precomputed scores from pyannote.audio.features import Precomputed precomputed = Precomputed('./precomputed/scd') from pyannote.metrics.diarization import DiarizationPurityCoverageFMeasure metric = DiarizationPurityCoverageFMeasure() from pyannote.metrics.segmentation import SegmentationPurityCoverageFMeasure metric = SegmentationPurityCoverageFMeasure() # peak detection min_duration = 1.0 from pyannote.audio.signal import Peak # alpha / min_duration are tunable parameters (and should be tuned for better performance) # we use log_scale = True because of the final log-softmax in the StackedRNN model alphas = np.linspace(0, 1, 20) purity_list = [] coverage_list = [] for alpha in alphas:
def validate_epoch(self, epoch, protocol_name, subset='development', validation_data=None): target_purity = self.purity # load model for current epoch model = self.load_model(epoch).to(self.device) model.eval() if isinstance(self.feature_extraction_, Precomputed): self.feature_extraction_.use_memmap = False duration = self.task_.duration step = .25 * duration sequence_labeling = SequenceLabeling( model, self.feature_extraction_, duration=duration, step=.25 * duration, batch_size=self.batch_size, source='audio', device=self.device) protocol = get_protocol(protocol_name, progress=False, preprocessors=self.preprocessors_) # extract predictions for all files. predictions = {} for current_file in getattr(protocol, subset)(): uri = get_unique_identifier(current_file) predictions[uri] = sequence_labeling.apply(current_file) # dichotomic search to find alpha that maximizes coverage # while having at least `target_purity` lower_alpha = 0. upper_alpha = 1. best_alpha = .5 * (lower_alpha + upper_alpha) best_coverage = 0. for _ in range(10): current_alpha = .5 * (lower_alpha + upper_alpha) peak = Peak(alpha=current_alpha, min_duration=0.0, log_scale=model.logsoftmax) metric = DiarizationPurityCoverageFMeasure() # NOTE -- embarrasingly parallel # TODO -- parallelize this for current_file in getattr(protocol, subset)(): reference = current_file['annotation'] uri = get_unique_identifier(current_file) hypothesis = peak.apply(predictions[uri], dimension=1) hypothesis = hypothesis.to_annotation() uem = get_annotated(current_file) metric(reference, hypothesis, uem=uem) purity, coverage, _ = metric.compute_metrics() if purity < target_purity: upper_alpha = current_alpha else: lower_alpha = current_alpha if coverage > best_coverage: best_coverage = coverage best_alpha = current_alpha task = 'speaker_change_detection' metric_name = f'{task}/coverage@{target_purity:.2f}purity' return { metric_name: {'minimize': False, 'value': best_coverage}, f'{task}/threshold': {'minimize': 'NA', 'value': best_alpha}}