def test_false_rejection_rate_with_no_occurences_returns_zero(self): result = evaluator.KWSEvaluator().evaluate( annotations.LabelList(labels=[]), annotations.LabelList( labels=[annotations.Label('four', 2.5, 3.0)])) assert result.false_rejection_rate() == 0.0
def test_evaluate_with_multiple_labels(self): ref = evaluator.Outcome( label_lists={ 'a': annotations.LabelList(labels=[ annotations.Label('a b', start=0, end=3), annotations.Label('a d', start=3, end=5), annotations.Label('f a b', start=5, end=6) ]) }) hyp = evaluator.Outcome(label_lists={ 'a': annotations.LabelList(labels=[annotations.Label('a b d f i b')]) }) result = evaluator.ASREvaluator().do_evaluate(ref, hyp) assert len(result.utt_to_label_pairs) == 1 assert result.utt_to_label_pairs['a'] == [ alignment.LabelPair(annotations.Label('a'), annotations.Label('a')), alignment.LabelPair(annotations.Label('b'), annotations.Label('b')), alignment.LabelPair(annotations.Label('a'), None), alignment.LabelPair(annotations.Label('d'), annotations.Label('d')), alignment.LabelPair(annotations.Label('f'), annotations.Label('f')), alignment.LabelPair(annotations.Label('a'), annotations.Label('i')), alignment.LabelPair(annotations.Label('b'), annotations.Label('b')), ]
def test_validate(self): ds = resources.create_single_label_corpus() utt4_ll = annotations.LabelList(idx='default', labels=[ annotations.Label('a', start=0.0, end=1.44), annotations.Label('a', start=1.89, end=10.0), ]) ds.utterances['utt-4'].set_label_list(utt4_ll) utt6_ll = annotations.LabelList(idx='default', labels=[ annotations.Label('a', start=1.33, end=5.9), annotations.Label('a', start=5.9, end=14.7), ]) ds.utterances['utt-6'].set_label_list(utt6_ll) val = validation.LabelCoverageValidator('default') result = val.validate(ds) assert not result.passed assert set(result.uncovered_segments.keys()) == {'utt-4', 'utt-6'} assert result.uncovered_segments['utt-4'] == [(1.44, 1.89)] assert result.uncovered_segments['utt-6'] == [(0.0, 1.33), (14.7, 15.0)]
def test_evaluate(self): ref = evaluator.Outcome( label_lists={ 'a': annotations.LabelList( labels=[annotations.Label('a b a d f a b')]) }) hyp = evaluator.Outcome(label_lists={ 'a': annotations.LabelList(labels=[annotations.Label('a b d f i b')]) }) result = evaluator.ASREvaluator().do_evaluate(ref, hyp) assert len(result.utt_to_label_pairs) == 1 assert result.utt_to_label_pairs['a'] == [ alignment.LabelPair(annotations.Label('a'), annotations.Label('a')), alignment.LabelPair(annotations.Label('b'), annotations.Label('b')), alignment.LabelPair(annotations.Label('a'), None), alignment.LabelPair(annotations.Label('d'), annotations.Label('d')), alignment.LabelPair(annotations.Label('f'), annotations.Label('f')), alignment.LabelPair(annotations.Label('a'), annotations.Label('i')), alignment.LabelPair(annotations.Label('b'), annotations.Label('b')), ]
def test_split(self): ll_1 = annotations.LabelList('phones', labels=[ annotations.Label('alpha', start=0.0, end=30.0) ]) ll_2 = annotations.LabelList('words', labels=[ annotations.Label('b', start=0.0, end=30.0) ]) utt = tracks.Utterance('utt-1', 'track-x', start=0.0, end=40.0, label_lists=[ll_1, ll_2]) res = utt.split([14.0, 29.5]) assert len(res) == 3 assert res[0].start == 0.0 assert res[0].end == 14.0 assert 'phones' in res[0].label_lists.keys() assert 'words' in res[0].label_lists.keys() assert res[1].start == 14.0 assert res[1].end == 29.5 assert 'phones' in res[1].label_lists.keys() assert 'words' in res[1].label_lists.keys() assert res[2].start == 29.5 assert res[2].end == 40.0 assert 'phones' in res[2].label_lists.keys() assert 'words' in res[2].label_lists.keys()
def test_align_different_start(self): ref = annotations.LabelList(labels=[ annotations.Label('b', 4, 9) ]) hyp = annotations.LabelList(labels=[ annotations.Label('b', 2, 8) ]) result = alignment.InvariantSegmentAligner().align(ref, hyp) assert len(result) == 3 segment = result[0] assert segment.start == 2 assert segment.end == 4 assert segment.ref == [] assert segment.hyp == [annotations.Label('b', 2, 8)] segment = result[1] assert segment.start == 4 assert segment.end == 8 assert segment.ref == [annotations.Label('b', 4, 9)] assert segment.hyp == [annotations.Label('b', 2, 8)] segment = result[2] assert segment.start == 8 assert segment.end == 9 assert segment.ref == [annotations.Label('b', 4, 9)] assert segment.hyp == []
def test_create_event_list(self): ll_ref = annotations.LabelList(labels=[ annotations.Label('a', 0.89, 13.73), annotations.Label('a', 13.73, 17.49), annotations.Label('b', 17.49, 22.75) ]) ll_hyp = annotations.LabelList(labels=[ annotations.Label('b', 0.1, 1.656), annotations.Label('a', 1.656, 1.976), annotations.Label('b', 1.976, 3.896), annotations.Label('a', 3.896, 3.957) ]) events = alignment.InvariantSegmentAligner.create_event_list(ll_ref, ll_hyp, time_threshold=0.01) assert events[0] == (0.1, [(0.1, 'S', 1, annotations.Label('b', 0.1, 1.656))]) assert events[1] == (0.89, [(0.89, 'S', 0, annotations.Label('a', 0.89, 13.73))]) assert events[2] == (1.656, [(1.656, 'E', 1, annotations.Label('b', 0.1, 1.656)), (1.656, 'S', 1, annotations.Label('a', 1.656, 1.976))]) assert events[3] == (1.976, [(1.976, 'E', 1, annotations.Label('a', 1.656, 1.976)), (1.976, 'S', 1, annotations.Label('b', 1.976, 3.896))]) assert events[4] == (3.896, [(3.896, 'E', 1, annotations.Label('b', 1.976, 3.896)), (3.896, 'S', 1, annotations.Label('a', 3.896, 3.957))]) assert events[5] == (3.957, [(3.957, 'E', 1, annotations.Label('a', 3.896, 3.957))]) assert events[6] == (13.73, [(13.73, 'E', 0, annotations.Label('a', 0.89, 13.73)), (13.73, 'S', 0, annotations.Label('a', 13.73, 17.49))]) assert events[7] == (17.49, [(17.49, 'E', 0, annotations.Label('a', 13.73, 17.49)), (17.49, 'S', 0, annotations.Label('b', 17.49, 22.75))]) assert events[8] == (22.75, [(22.75, 'E', 0, annotations.Label('b', 17.49, 22.75))])
def test_validate_returns_part_of_overlapping_label(self): ds = resources.create_single_label_corpus() utt4_ll = annotations.LabelList(idx='default', labels=[ annotations.Label('a', start=0.0, end=9.0), annotations.Label('b', start=9.0, end=13.0), ]) ds.utterances['utt-4'].set_label_list(utt4_ll) utt6_ll = annotations.LabelList(idx='default', labels=[ annotations.Label('a', start=-2.0, end=5.9), annotations.Label('b', start=5.9, end=14.7), ]) ds.utterances['utt-6'].set_label_list(utt6_ll) val = validation.LabelOverflowValidator('default') result = val.validate(ds) assert not result.passed assert set(result.overflow_segments.keys()) == {'utt-4', 'utt-6'} assert result.overflow_segments['utt-4'] == [(10.0, 13.0, 'b')] assert result.overflow_segments['utt-6'] == [(-2.0, 0.0, 'a')]
def test_eq_ignores_label_list_relation(self): a = annotations.Label('some label A', 1.0, 2.0) b = annotations.Label('some label a', 1.0, 2.0) al = annotations.LabelList(idx='one', labels=[a]) bl = annotations.LabelList(idx='another', labels=[b]) assert a.label_list == al assert b.label_list == bl assert a == b
def test_align(self): ref = annotations.LabelList(labels=[ annotations.Label('a', 0, 3), annotations.Label('b', 3, 6), annotations.Label('c', 7, 10) ]) hyp = annotations.LabelList(labels=[ annotations.Label('a', 0, 3), annotations.Label('b', 4, 8), annotations.Label('c', 8, 10) ]) result = alignment.InvariantSegmentAligner().align(ref, hyp) assert len(result) == 6 segment = result[0] assert segment.start == 0 assert segment.end == 3 assert segment.ref == [annotations.Label('a', 0, 3)] assert segment.hyp == [annotations.Label('a', 0, 3)] segment = result[1] assert segment.start == 3 assert segment.end == 4 assert segment.ref == [annotations.Label('b', 3, 6)] assert segment.hyp == [] segment = result[2] assert segment.start == 4 assert segment.end == 6 assert segment.ref == [annotations.Label('b', 3, 6)] assert segment.hyp == [annotations.Label('b', 4, 8)] segment = result[3] assert segment.start == 6 assert segment.end == 7 assert segment.ref == [] assert segment.hyp == [annotations.Label('b', 4, 8)] segment = result[4] assert segment.start == 7 assert segment.end == 8 assert segment.ref == [annotations.Label('c', 7, 10)] assert segment.hyp == [annotations.Label('b', 4, 8)] segment = result[5] assert segment.start == 8 assert segment.end == 10 assert segment.ref == [annotations.Label('c', 7, 10)] assert segment.hyp == [annotations.Label('c', 8, 10)]
def corpus_with_more_labels(): """ Corpus with an extra label-list. """ corpus = resources.create_single_label_corpus() corpus.utterances['utt-1'].set_label_list( annotations.LabelList(idx='radio', labels=[annotations.Label('alpha')])) corpus.utterances['utt-2'].set_label_list( annotations.LabelList(idx='radio', labels=[annotations.Label('alpha')])) corpus.utterances['utt-3'].set_label_list( annotations.LabelList(idx='radio', labels=[annotations.Label('beta')])) corpus.utterances['utt-4'].set_label_list( annotations.LabelList(idx='radio', labels=[annotations.Label('beta')])) corpus.utterances['utt-5'].set_label_list( annotations.LabelList(idx='radio', labels=[annotations.Label('beta')])) corpus.utterances['utt-6'].set_label_list( annotations.LabelList(idx='radio', labels=[annotations.Label('beta')])) corpus.utterances['utt-7'].set_label_list( annotations.LabelList(idx='radio', labels=[annotations.Label('beta')])) corpus.utterances['utt-8'].set_label_list( annotations.LabelList(idx='radio', labels=[annotations.Label('beta')])) return corpus
def read_labels(path, corpus): for label_file in glob.glob( os.path.join(path, '{}_*.txt'.format(LABEL_FILE_PREFIX))): file_name = os.path.basename(label_file) key = file_name[len('{}_'.format(LABEL_FILE_PREFIX) ):len(file_name) - len('.txt')] utterance_labels = collections.defaultdict(list) labels = textfile.read_separated_lines_generator(label_file, separator=' ', max_columns=4) for record in labels: label = record[3] start = float(record[1]) end = float(record[2]) meta = None meta_match = META_PATTERN.match(label) if end == -1: end = float('inf') if meta_match is not None: meta_json = meta_match.group(2) meta = json.loads(meta_json) label = meta_match.group(1) utterance_labels[record[0]].append( annotations.Label(label, start, end, meta=meta)) for utterance_idx, labels in utterance_labels.items(): ll = annotations.LabelList(idx=key, labels=labels) corpus.utterances[utterance_idx].set_label_list(ll)
def test_encode_label_ends_at_utterance_end(self): track = tracks.FileTrack('file1', resources.sample_wav_file('med_len.wav')) utt = tracks.Utterance('utt1', track, start=3, end=14) ll = annotations.LabelList(labels=[ annotations.Label('speech', 0, 4), annotations.Label('music', 4, 9), annotations.Label('speech', 9, float('inf')), ]) utt.set_label_list(ll) enc = encoding.FrameHotEncoder(['music', 'speech', 'noise'], 'default', frame_settings=units.FrameSettings( 32000, 16000), sr=16000) actual = enc.encode_utterance(utt) expected = np.array([ [0, 1, 0], [0, 1, 0], [0, 1, 0], [1, 1, 0], [1, 0, 0], [1, 0, 0], [1, 0, 0], [1, 0, 0], [1, 1, 0], [0, 1, 0], ]).astype(np.float32) assert np.array_equal(expected, actual)
def read_labels(path, corpus): label_reference_file = os.path.join(path, LABEL_FILE) label_references = textfile.read_separated_lines(label_reference_file, separator=' ', max_columns=3) for record in label_references: utt_idx = record[0] label_path = os.path.join(path, record[1]) label_idx = None if len(record) > 2: label_idx = record[2] ll = annotations.LabelList(idx=label_idx) for label in audacity.read_label_file(label_path): start = label[0] end = label[1] value = label[2] if end < 0: end = float('inf') ll.addl(value, start, end) ll.apply(extract_meta_from_label_value) corpus.utterances[utt_idx].set_label_list(ll)
def test_read_samples(self): path = resources.sample_wav_file('wav_1.wav') track = tracks.FileTrack('wav', path) issuer = issuers.Issuer('toni') utt = tracks.Utterance('t', track, issuer=issuer, start=1.0, end=2.30) l1 = annotations.Label('a', 0.15, 0.448) l2 = annotations.Label('a', 0.5, 0.73) ll = annotations.LabelList(labels=[l1, l2]) utt.set_label_list(ll) expected, __ = librosa.core.load(path, sr=None, offset=1.15, duration=0.298) assert np.array_equal(l1.read_samples(), expected) expected, __ = librosa.core.load(path, sr=None, offset=1.5, duration=1.73 - 1.5) print(expected.shape) print(l2.read_samples().shape) assert np.array_equal(l2.read_samples(), expected)
def test_read_label_list_de(self): path = os.path.join(os.path.dirname(__file__), 'audacity_labels_de.txt') ll = audacity.read_label_list(path) assert ll == annotations.LabelList(labels=[ annotations.Label('music', 43352.824046, 43525.837661), annotations.Label('speech_male', 43512.446969, 43531.343483), ])
def test_align_empty_hypothesis(self): ref = annotations.LabelList(labels=[ annotations.Label('b', 4, 8) ]) hyp = annotations.LabelList(labels=[ ]) result = alignment.InvariantSegmentAligner().align(ref, hyp) assert len(result) == 1 segment = result[0] assert segment.start == 4 assert segment.end == 8 assert segment.ref == [annotations.Label('b', 4, 8)] assert segment.hyp == []
def setup_method(self): self.ll_1 = annotations.LabelList(idx='alpha', labels=[ annotations.Label('a', 3.2, 4.5), annotations.Label('b', 5.1, 8.9), annotations.Label( 'c', 7.2, 10.5), annotations.Label('d', 10.5, 14), annotations.Label('d', 15, 18) ]) self.ll_2 = annotations.LabelList(idx='bravo', labels=[ annotations.Label('a', 1.0, 4.2), annotations.Label('e', 4.2, 7.9), annotations.Label( 'c', 7.2, 10.5), annotations.Label('f', 10.5, 14), annotations.Label('d', 15, 17.3) ]) self.ll_duplicate_idx = annotations.LabelList( idx='charlie', labels=[ annotations.Label('t', 1.0, 4.2), annotations.Label('h', 4.2, 7.9) ]) self.ll_3 = annotations.LabelList(idx='charlie', labels=[ annotations.Label('a', 1.0, 4.2), annotations.Label('g', 4.2, 7.9) ]) self.track = tracks.FileTrack('wav', resources.sample_wav_file('wav_1.wav')) self.issuer = issuers.Issuer('toni') self.utt = tracks.Utterance('test', self.track, issuer=self.issuer, start=1.25, end=1.30, label_lists=[ self.ll_1, self.ll_2, self.ll_duplicate_idx, self.ll_3 ])
def test_encode_utterance_with_single_label(self): ll = annotations.LabelList(idx='go', labels=[annotations.Label('a c b')]) utt = tracks.Utterance('utt-1', None, label_lists=ll) encoder = encoding.TokenOrdinalEncoder('go', ['a', 'b', 'c']) encoded = encoder.encode_utterance(utt) assert np.array_equal(encoded, [0, 2, 1])
def test_encode_utterance_with_non_existing_label_list_raises_error(self): ll = annotations.LabelList(idx='go', labels=[annotations.Label('a c b unknown')]) utt = tracks.Utterance('utt-1', None, label_lists=ll) encoder = encoding.TokenOrdinalEncoder('not_existing', ['a', 'b', 'c']) with pytest.raises(ValueError): encoder.encode_utterance(utt)
def test_read_label_list_with_empty_value(self): path = os.path.join(os.path.dirname(__file__), 'audacity_labels_empty_value.txt') ll = audacity.read_label_list(path) assert ll == annotations.LabelList(labels=[ annotations.Label('music', 1, 4), annotations.Label('', 4, 7), annotations.Label('speech_male', 7, 9), ])
def ll_ref(): return annotations.LabelList(labels=[ annotations.Label('up', start=5.28, end=5.99), annotations.Label('down', start=10.35, end=11.12), annotations.Label('right', start=20.87, end=22.01), annotations.Label('up', start=33.00, end=33.4), annotations.Label('up', start=33.4, end=33.8), annotations.Label('down', start=39.28, end=40.0) ])
def ll_hyp(): return annotations.LabelList(labels=[ annotations.Label('up', start=5.20, end=5.88), annotations.Label('right', start=10.30, end=11.08), annotations.Label('up', start=32.00, end=32.5), annotations.Label('up', start=34.2, end=34.8), annotations.Label('left', start=39.3, end=39.9), annotations.Label('down', start=39.27, end=40.01) ])
def test_align_with_empty_segments(self): ref = annotations.LabelList(labels=[ annotations.Label('a', 0, 3), annotations.Label('b', 4, 6), ]) hyp = annotations.LabelList(labels=[ annotations.Label('a', 0, 3), annotations.Label('c', 5, 8), ]) result = alignment.InvariantSegmentAligner().align(ref, hyp) assert len(result) == 5 segment = result[0] assert segment.start == 0 assert segment.end == 3 assert segment.ref == [annotations.Label('a', 0, 3)] assert segment.hyp == [annotations.Label('a', 0, 3)] segment = result[1] assert segment.start == 3 assert segment.end == 4 assert segment.ref == [] assert segment.hyp == [] segment = result[2] assert segment.start == 4 assert segment.end == 5 assert segment.ref == [annotations.Label('b', 4, 6)] assert segment.hyp == [] segment = result[3] assert segment.start == 5 assert segment.end == 6 assert segment.ref == [annotations.Label('b', 4, 6)] assert segment.hyp == [annotations.Label('c', 5, 8)] segment = result[4] assert segment.start == 6 assert segment.end == 8 assert segment.ref == [] assert segment.hyp == [annotations.Label('c', 5, 8)]
def relabel(label_list, projections): """ Relabel an entire :py:class:`~audiomate.annotations.LabelList` using user-defined projections. Labels can be renamed, removed or overlapping labels can be flattened to a single label per segment. Each entry in the dictionary of projections represents a single projection that maps a combination of labels (key) to a single new label (value). The combination of labels to be mapped is a tuple of naturally sorted labels that apply to one or more segments simultaneously. By defining a special wildcard projection using `('**',)` is is not required to specify a projection for every single combination of labels. This method raises a :py:class:`~audiomate.corpus.utils.labellist.UnmappedLabelsException` if a projection for one or more combinations of labels is not defined. Args: label_list (audiomate.annotations.LabelList): The label list to relabel projections (dict): A dictionary that maps tuples of label combinations to string labels. Returns: audiomate.annotations.LabelList: New label list with remapped labels Raises: UnmappedLabelsException: If a projection for one or more combinations of labels is not defined. Example: >>> projections = { ... ('a',): 'a', ... ('b',): 'b', ... ('c',): 'c', ... ('a', 'b',): 'a_b', ... ('a', 'b', 'c',): 'a_b_c', ... ('**',): 'b_c', ... } >>> label_list = annotations.LabelList(labels=[ ... annotations.Label('a', 3.2, 4.5), ... annotations.Label('b', 4.0, 4.9), ... annotations.Label('c', 4.2, 5.1) ... ]) >>> ll = relabel(label_list, projections) >>> [l.value for l in ll] ['a', 'a_b', 'a_b_c', 'b_c', 'c'] """ unmapped_combinations = find_missing_projections(label_list, projections) if len(unmapped_combinations) > 0: raise UnmappedLabelsException('Unmapped combinations: {}'.format(unmapped_combinations)) new_labels = [] for labeled_segment in label_list.ranges(): combination = tuple(sorted([label.value for label in labeled_segment[2]])) label_mapping = projections[combination] if combination in projections else projections[WILDCARD_COMBINATION] if label_mapping == '': continue new_labels.append(annotations.Label(label_mapping, labeled_segment[0], labeled_segment[1])) return annotations.LabelList(idx=label_list.idx, labels=new_labels)
def test_all_tokens_with_custom_delimiter(self): corpus = resources.create_dataset() ll = annotations.LabelList(idx='test', labels=[ annotations.Label('a, b, a, c') ]) corpus.utterances['utt-1'].set_label_list(ll) target_lls = ['test'] expected_tokens = {'a', 'b', 'c'} assert corpus.all_tokens(delimiter=',', label_list_ids=target_lls) == expected_tokens
def test_all_tokens_returns_only_from_selected_label_lists(self): corpus = resources.create_dataset() ll = annotations.LabelList(idx='test', labels=[ annotations.Label('what can he do') ]) corpus.utterances['utt-1'].set_label_list(ll) target_lls = [audiomate.corpus.LL_WORD_TRANSCRIPT] expected_tokens = {'who', 'am', 'i', 'are', 'is', 'he', 'you', 'she', 'they'} assert corpus.all_tokens(label_list_ids=target_lls) == expected_tokens
def utt_with_noise(): utt = tracks.Utterance('utt-2', 'file-2') utt.set_label_list( annotations.LabelList(idx='alpha', labels=[ annotations.Label('music', 0, 5), annotations.Label('speech', 5, 12), annotations.Label('noise', 13, 15) ])) utt.set_label_list( annotations.LabelList(idx='bravo', labels=[ annotations.Label('music', 0, 1), annotations.Label('speech', 2, 6) ])) return utt
def test_split_with_overlap(self): ll_1 = annotations.LabelList('phones', labels=[ annotations.Label('alpha', start=0.0, end=30.0), annotations.Label('bravo', start=20.0, end=42.0) ]) ll_2 = annotations.LabelList( 'words', labels=[annotations.Label('b', start=8.0, end=30.0)]) utt = tracks.Utterance('utt-1', 'file-x', start=10.0, end=55.0, label_lists=[ll_1, ll_2]) res = utt.split([12.0, 24.0], track_relative=False, overlap=2.0) assert len(res) == 3 assert res[0].start == 10.0 assert res[0].end == 24.0 assert res[0].label_lists['phones'] == annotations.LabelList( idx='phones', labels=[annotations.Label('alpha', 0.0, 14.0)]) assert res[0].label_lists['words'] == annotations.LabelList( idx='words', labels=[annotations.Label('b', 8.0, 14.0)]) print(res[1].label_lists['phones'].labels) assert res[1].start == 20.0 assert res[1].end == 36.0 assert res[1].label_lists['phones'] == annotations.LabelList( idx='phones', labels=[ annotations.Label('alpha', 0.0, 16.0), annotations.Label('bravo', 10.0, 16.0), ]) assert res[1].label_lists['words'] == annotations.LabelList( idx='words', labels=[annotations.Label('b', 0.0, 16.0)]) assert res[2].start == 32.0 assert res[2].end == 55.0 assert res[2].label_lists['phones'] == annotations.LabelList( idx='phones', labels=[ annotations.Label('alpha', 0.0, 8.0), annotations.Label('bravo', 0.0, 20.0), ]) assert res[2].label_lists['words'] == annotations.LabelList( idx='words', labels=[annotations.Label('b', 0.0, 8.0)])
def classification_ref_and_hyp_label_list(): """ Sample output of a classification system, consisting of a reference and hypothesis label-list. """ ll_ref = annotations.LabelList(labels=[ annotations.Label('music', start=0, end=5), annotations.Label('speech', start=5, end=11), annotations.Label('mix', start=11, end=14), annotations.Label('speech', start=14, end=19) ]) ll_hyp = annotations.LabelList(labels=[ annotations.Label('music', start=0, end=4), annotations.Label('speech', start=4, end=6), annotations.Label('mix', start=8, end=16), annotations.Label('speech', start=16, end=21) ]) return ll_ref, ll_hyp