Beispiel #1
0
    def test_relabel_flattens_full_overlap_into_combined_label(self):
        projections = {
            ('a', ): 'a',
            ('b', ): 'b',
            ('a', 'b'): 'a_b',
        }

        label_list = assets.LabelList(
            labels=[assets.Label('a', 3.2, 4.9),
                    assets.Label('b', 3.9, 4.5)])

        actual = relabeling.relabel(label_list, projections)

        assert len(actual) == 3

        assert actual[0].start == 3.2
        assert actual[0].end == 3.9
        assert actual[0].value == 'a'

        assert actual[1].start == 3.9
        assert actual[1].end == 4.5
        assert actual[1].value == 'a_b'

        assert actual[2].start == 4.5
        assert actual[2].end == 4.9
        assert actual[2].value == 'a'
Beispiel #2
0
    def setUp(self):
        self.utt1 = assets.Utterance('utt-1', 'file-1')

        self.utt1.set_label_list(
            assets.LabelList(idx='alpha',
                             labels=[
                                 assets.Label('music', 0, 5),
                                 assets.Label('speech', 5, 12),
                                 assets.Label('music', 13, 15)
                             ]))

        self.utt1.set_label_list(
            assets.LabelList(idx='bravo',
                             labels=[
                                 assets.Label('music', 0, 1),
                                 assets.Label('speech', 2, 6)
                             ]))

        self.utt2 = assets.Utterance('utt-2', 'file-2')

        self.utt2.set_label_list(
            assets.LabelList(idx='alpha',
                             labels=[
                                 assets.Label('music', 0, 5),
                                 assets.Label('speech', 5, 12),
                                 assets.Label('noise', 13, 15)
                             ]))

        self.utt2.set_label_list(
            assets.LabelList(idx='bravo',
                             labels=[
                                 assets.Label('music', 0, 1),
                                 assets.Label('speech', 2, 6)
                             ]))
Beispiel #3
0
    def test_relabel_proceeds_despite_unmapped_labels_in_presence_of_wildcard_rule(
            self):
        label_list = assets.LabelList(labels=[
            assets.Label('a', 3.2, 5.1),
            assets.Label('b', 4.2, 4.7),
            assets.Label('c', 4.3, 4.8)
        ])

        actual = relabeling.relabel(label_list, {
            ('a', ): 'new_label_a',
            ('**', ): 'catch_all'
        })

        assert len(actual) == 5

        assert actual[0].start == 3.2
        assert actual[0].end == 4.2
        assert actual[0].value == 'new_label_a'

        assert actual[1].start == 4.2
        assert actual[1].end == 4.3
        assert actual[1].value == 'catch_all'

        assert actual[2].start == 4.3
        assert actual[2].end == 4.7
        assert actual[2].value == 'catch_all'

        assert actual[3].start == 4.7
        assert actual[3].end == 4.8
        assert actual[3].value == 'catch_all'

        assert actual[4].start == 4.8
        assert actual[4].end == 5.1
        assert actual[4].value == 'new_label_a'
Beispiel #4
0
    def test_read_samples(self):
        file = assets.File('wav', resources.sample_wav_file('wav_1.wav'))
        issuer = assets.Issuer('toni')
        utt = assets.Utterance('test',
                               file,
                               issuer=issuer,
                               start=1.0,
                               end=2.30)

        l1 = assets.Label('a', 0.15, 0.448)
        l2 = assets.Label('a', 0.5, 0.73)
        ll = assets.LabelList(labels=[l1, l2])

        utt.set_label_list(ll)

        expected, __ = librosa.core.load(file.path,
                                         sr=None,
                                         offset=1.15,
                                         duration=0.298)
        assert np.array_equal(l1.read_samples(), expected)

        expected, __ = librosa.core.load(file.path,
                                         sr=None,
                                         offset=1.5,
                                         duration=0.23)
        assert np.array_equal(l2.read_samples(), expected)
Beispiel #5
0
    def test_all_projections_missing_if_no_projections_defined(self):
        label_list = assets.LabelList(labels=[
            assets.Label('b', 3.2, 4.5),
            assets.Label('a', 4.0, 4.9),
            assets.Label('c', 4.2, 5.1)
        ])

        unmapped_combinations = relabeling.find_missing_projections(
            label_list, {})

        assert len(unmapped_combinations) == 5
        assert ('b', ) in unmapped_combinations
        assert (
            'a',
            'b',
        ) in unmapped_combinations
        assert (
            'a',
            'b',
            'c',
        ) in unmapped_combinations
        assert (
            'a',
            'c',
        ) in unmapped_combinations
        assert ('c', ) in unmapped_combinations
Beispiel #6
0
    def test_ranges_zero_to_end(self):
        ll = assets.LabelList(
            labels=[assets.Label('a', 0, -1),
                    assets.Label('b', 5.1, 8.9)])

        ranges = ll.ranges()

        r = next(ranges)
        self.assertEqual(0, r[0])
        self.assertEqual(5.1, r[1])
        self.assertIn(ll[0], r[2])

        r = next(ranges)
        self.assertEqual(5.1, r[0])
        self.assertEqual(8.9, r[1])
        self.assertIn(ll[0], r[2])
        self.assertIn(ll[1], r[2])

        r = next(ranges)
        self.assertEqual(8.9, r[0])
        self.assertEqual(-1, r[1])
        self.assertIn(ll[0], r[2])

        with self.assertRaises(StopIteration):
            next(ranges)
Beispiel #7
0
    def test_relabel_removes_overlapping_segment(self):
        projections = {
            ('a', ): 'a',
            (
                'a',
                'b',
            ): '',
            ('b', ): 'b',
        }

        label_list = assets.LabelList(
            labels=[assets.Label('a', 3.2, 5.1),
                    assets.Label('b', 4.2, 4.7)])

        actual = relabeling.relabel(label_list, projections)

        assert len(actual) == 2

        assert actual[0].start == 3.2
        assert actual[0].end == 4.2
        assert actual[0].value == 'a'

        assert actual[1].start == 4.7
        assert actual[1].end == 5.1
        assert actual[1].value == 'a'
Beispiel #8
0
    def test_no_missing_projections_if_projection_complete(self):
        projections = {
            ('b', ): 'foo',
            (
                'a',
                'b',
            ): 'foo',
            (
                'a',
                'b',
                'c',
            ): 'foo',
            (
                'a',
                'c',
            ): 'foo',
            ('c', ): 'bar'
        }

        label_list = assets.LabelList(labels=[
            assets.Label('b', 3.2, 4.5),
            assets.Label('a', 4.0, 4.9),
            assets.Label('c', 4.2, 5.1)
        ])

        unmapped_combinations = relabeling.find_missing_projections(
            label_list, projections)

        assert len(unmapped_combinations) == 0
Beispiel #9
0
    def test_eq_ignores_label_list_relation(self):
        a = assets.Label('some label A', 1.0, 2.0)
        b = assets.Label('some label a', 1.0, 2.0)

        al = assets.LabelList(idx='one', labels=[a])
        bl = assets.LabelList(idx='another', labels=[b])

        assert a.label_list == al
        assert b.label_list == bl
        assert a == b
Beispiel #10
0
    def test_extend(self):
        ll = assets.LabelList()

        label_a = assets.Label('some text')
        label_b = assets.Label('more text')
        label_c = assets.Label('text again')
        ll.extend([label_a, label_b, label_c])

        assert len(ll) == 3
        assert label_a.label_list == ll
        assert label_b.label_list == ll
        assert label_c.label_list == ll
Beispiel #11
0
    def test_label_total_durations(self):
        ll = assets.LabelList(labels=[
            assets.Label('a', 3.2, 4.5),
            assets.Label('b', 5.1, 8.9),
            assets.Label('c', 7.2, 10.5),
            assets.Label('a', 10.5, 14),
            assets.Label('c', 13, 14)
        ])

        res = ll.label_total_duration()

        assert res['a'] == pytest.approx(4.8)
        assert res['b'] == pytest.approx(3.8)
        assert res['c'] == pytest.approx(4.3)
Beispiel #12
0
    def test_label_count(self):
        ll = assets.LabelList(labels=[
            assets.Label('a', 3.2, 4.5),
            assets.Label('b', 5.1, 8.9),
            assets.Label('c', 7.2, 10.5),
            assets.Label('a', 10.5, 14),
            assets.Label('c', 13, 14)
        ])

        res = ll.label_count()

        self.assertEqual(2, res['a'])
        self.assertEqual(1, res['b'])
        self.assertEqual(2, res['c'])
Beispiel #13
0
    def test_ranges_include_labels(self):
        ll = assets.LabelList(
            labels=[assets.Label('a', 3.2, 4.5),
                    assets.Label('b', 5.1, 8.9)])

        ranges = ll.ranges(include_labels=['a'])

        r = next(ranges)
        self.assertEqual(3.2, r[0])
        self.assertEqual(4.5, r[1])
        self.assertIn(ll[0], r[2])

        with self.assertRaises(StopIteration):
            next(ranges)
Beispiel #14
0
def corpus_with_more_labels():
    """
    Corpus with an extra label-list.
    """
    corpus = resources.create_single_label_corpus()

    corpus.utterances['utt-1'].set_label_list(
        assets.LabelList(idx='radio', labels=[assets.Label('alpha')]))

    corpus.utterances['utt-2'].set_label_list(
        assets.LabelList(idx='radio', labels=[assets.Label('alpha')]))

    corpus.utterances['utt-3'].set_label_list(
        assets.LabelList(idx='radio', labels=[assets.Label('beta')]))

    corpus.utterances['utt-4'].set_label_list(
        assets.LabelList(idx='radio', labels=[assets.Label('beta')]))

    corpus.utterances['utt-5'].set_label_list(
        assets.LabelList(idx='radio', labels=[assets.Label('beta')]))

    corpus.utterances['utt-6'].set_label_list(
        assets.LabelList(idx='radio', labels=[assets.Label('beta')]))

    corpus.utterances['utt-7'].set_label_list(
        assets.LabelList(idx='radio', labels=[assets.Label('beta')]))

    corpus.utterances['utt-8'].set_label_list(
        assets.LabelList(idx='radio', labels=[assets.Label('beta')]))

    return corpus
def test_merge_consecutive_labels_with_same_values():
    ll = assets.LabelList(labels=[
        assets.Label('a', 0, 0.993),
        assets.Label('a', 1.001, 2.8),
        assets.Label('b', 2.8, 3.94)
    ])

    label_cleaning.merge_consecutive_labels_with_same_values(ll,
                                                             threshold=0.01)

    assert len(ll) == 2

    assert ll[0] == assets.Label('a', 0, 2.8)
    assert ll[1] == assets.Label('b', 2.8, 3.94)
Beispiel #16
0
    def test_relabel_flattens_partial_overlap_into_combined_label(self):
        projections = {
            ('a', ): 'a',
            ('b', ): 'b',
            ('c', ): 'c',
            (
                'a',
                'b',
            ): 'a_b',
            (
                'a',
                'b',
                'c',
            ): 'a_b_c',
            (
                'b',
                'c',
            ): 'b_c',
        }

        label_list = assets.LabelList(labels=[
            assets.Label('a', 3.2, 4.5),
            assets.Label('b', 4.0, 4.9),
            assets.Label('c', 4.2, 5.1)
        ])

        actual = relabeling.relabel(label_list, projections)

        assert len(actual) == 5

        assert actual[0].start == 3.2
        assert actual[0].end == 4.0
        assert actual[0].value == 'a'

        assert actual[1].start == 4.0
        assert actual[1].end == 4.2
        assert actual[1].value == 'a_b'

        assert actual[2].start == 4.2
        assert actual[2].end == 4.5
        assert actual[2].value == 'a_b_c'

        assert actual[3].start == 4.5
        assert actual[3].end == 4.9
        assert actual[3].value == 'b_c'

        assert actual[4].start == 4.9
        assert actual[4].end == 5.1
        assert actual[4].value == 'c'
Beispiel #17
0
    def test_relabel_throws_error_if_unmapped_labels_are_detected(self):
        label_list = assets.LabelList(labels=[
            assets.Label('a', 3.2, 5.1),
            assets.Label('b', 4.2, 4.7),
            assets.Label('c', 4.3, 4.8)
        ])

        unmapped_combinations = [('a', 'b'), ('a', 'b', 'c'), ('a', 'c')]
        expected_message = 'Unmapped combinations: {}'.format(
            unmapped_combinations)

        with pytest.raises(relabeling.UnmappedLabelsException) as ex:
            relabeling.relabel(label_list, {('a', ): 'foo'})

        assert ex.value.message == expected_message
Beispiel #18
0
    def test_label_creation(self):
        a = assets.Label('value', 6.2, 8.9)

        assert a.value == 'value'
        assert a.start == 6.2
        assert a.end == 8.9
        assert len(a.meta) == 0
Beispiel #19
0
 def setUp(self):
     file = assets.File('wav', resources.sample_wav_file('wav_1.wav'))
     utt = assets.Utterance('utt', file, start=0.3, end=-1)
     ll = assets.LabelList()
     self.test_label = assets.Label('a', start=0.5, end=-1)
     ll.append(self.test_label)
     utt.set_label_list(ll)
Beispiel #20
0
    def read_labels(path, corpus):

        for label_file in glob.glob(
                os.path.join(path, '{}_*.txt'.format(LABEL_FILE_PREFIX))):
            file_name = os.path.basename(label_file)
            key = file_name[len('{}_'.format(LABEL_FILE_PREFIX)
                                ):len(file_name) - len('.txt')]

            utterance_labels = collections.defaultdict(list)

            labels = textfile.read_separated_lines_generator(label_file,
                                                             separator=' ',
                                                             max_columns=4)

            for record in labels:
                label = record[3]
                start = float(record[1])
                end = float(record[2])
                meta = None
                meta_match = META_PATTERN.match(label)

                if meta_match is not None:
                    meta_json = meta_match.group(2)
                    meta = json.loads(meta_json)
                    label = meta_match.group(1)

                utterance_labels[record[0]].append(
                    assets.Label(label, start, end, meta=meta))

            for utterance_idx, labels in utterance_labels.items():
                ll = assets.LabelList(idx=key, labels=labels)
                corpus.utterances[utterance_idx].set_label_list(ll)
Beispiel #21
0
    def test_missing_projections_are_naturally_sorted(self):
        label_list = assets.LabelList(labels=[
            assets.Label('b', 1.0, 2.0),
            assets.Label('a', 1.5, 2.5),
        ])

        unmapped_combinations = relabeling.find_missing_projections(
            label_list, {})

        assert len(unmapped_combinations) == 3
        assert unmapped_combinations[0] == ('a', )
        assert unmapped_combinations[1] == (
            'a',
            'b',
        )
        assert unmapped_combinations[2] == ('b', )
Beispiel #22
0
    def test_no_missing_projections_if_covered_by_catch_all_rule(self):
        projections = {
            ('b', ): 'new_label_b',
            ('**', ): 'new_label_all',
        }

        label_list = assets.LabelList(labels=[
            assets.Label('b', 3.2, 4.5),
            assets.Label('a', 4.0, 4.9),
            assets.Label('c', 4.2, 5.1)
        ])

        unmapped_combinations = relabeling.find_missing_projections(
            label_list, projections)

        assert len(unmapped_combinations) == 0
Beispiel #23
0
    def test_encode_utterance_takes_lower_index_first(self):
        file = assets.File('file-idx', resources.sample_wav_file('wav_1.wav'))
        utt = assets.Utterance('utt-idx', file, start=0, end=5)
        ll = assets.LabelList(
            labels=[assets.Label('music', 0, 3),
                    assets.Label('speech', 3, 5)])
        utt.set_label_list(ll)

        enc = label_encoding.FrameOrdinalEncoder(
            ['speech', 'music', 'noise'],
            frame_settings=units.FrameSettings(32000, 16000),
            sr=16000)

        actual = enc.encode(utt)
        expected = np.array([1, 1, 0, 0]).astype(np.int)

        assert np.array_equal(expected, actual)
Beispiel #24
0
    def test_relabel_removes_unwanted_labels(self):
        projections = {
            ('a', ): '',
            ('b', ): 'b',
        }

        label_list = assets.LabelList(
            labels=[assets.Label('a', 3.2, 4.4),
                    assets.Label('b', 4.4, 5.1)])

        actual = relabeling.relabel(label_list, projections)

        assert len(actual) == 1

        assert actual[0].start == 4.4
        assert actual[0].end == 5.1
        assert actual[0].value == 'b'
Beispiel #25
0
    def test_label_creation_with_info(self):
        a = assets.Label('value', 6.2, 8.9, meta={'something': 2})

        assert a.value == 'value'
        assert a.start == 6.2
        assert a.end == 8.9
        assert len(a.meta) == 1
        assert a.meta['something'] == 2
Beispiel #26
0
    def test_append(self):
        ll = assets.LabelList()

        label = assets.Label('some text')
        ll.append(label)

        assert len(ll) == 1
        assert label.label_list == ll
Beispiel #27
0
    def test_set_label_list(self):
        ll_4 = assets.LabelList(
            idx='delta',
            labels=[assets.Label('y', 0.0, 3.3),
                    assets.Label('t', 3.8, 7.9)])

        self.utt.set_label_list(ll_4)

        assert len(self.utt.label_lists) == 4
        assert self.utt.label_lists['alpha'] == self.ll_1
        assert self.utt.label_lists['bravo'] == self.ll_2
        assert self.utt.label_lists['charlie'] == self.ll_3
        assert self.utt.label_lists['delta'] == ll_4
        assert self.ll_1.utterance == self.utt
        assert self.ll_2.utterance == self.utt
        assert self.ll_3.utterance == self.utt
        assert ll_4.utterance == self.utt
Beispiel #28
0
    def test_relabel_maps_a_onto_b(self):
        label_list = assets.LabelList(labels=[assets.Label('a', 3.2, 4.5)])

        actual = relabeling.relabel(label_list, {('a', ): 'b'})

        assert len(actual) == 1
        assert actual[0].start == 3.2
        assert actual[0].end == 4.5
        assert actual[0].value == 'b'
Beispiel #29
0
    def test_no_duplicate_missing_projections_reported(self):
        label_list = assets.LabelList(labels=[
            assets.Label('b', 1.0, 2.0),
            assets.Label('a', 1.5, 2.5),
            assets.Label('b', 3.0, 4.0),
            assets.Label('a', 3.5, 4.5),
        ])

        unmapped_combinations = relabeling.find_missing_projections(
            label_list, {})

        assert len(unmapped_combinations) == 3
        assert ('b', ) in unmapped_combinations
        assert (
            'a',
            'b',
        ) in unmapped_combinations
        assert ('a', ) in unmapped_combinations
Beispiel #30
0
    def test_ranges_with_empty(self):
        ll = assets.LabelList(labels=[
            assets.Label('a', 3.2, 4.5),
            assets.Label('b', 5.1, 8.9),
            assets.Label('c', 7.2, 10.5),
            assets.Label('d', 10.5, 14)
        ])

        ranges = ll.ranges(yield_ranges_without_labels=True)

        r = next(ranges)
        self.assertEqual(3.2, r[0])
        self.assertEqual(4.5, r[1])
        self.assertIn(ll[0], r[2])

        r = next(ranges)
        self.assertEqual(4.5, r[0])
        self.assertEqual(5.1, r[1])
        self.assertEqual(0, len(r[2]))

        r = next(ranges)
        self.assertEqual(5.1, r[0])
        self.assertEqual(7.2, r[1])
        self.assertIn(ll[1], r[2])

        r = next(ranges)
        self.assertEqual(7.2, r[0])
        self.assertEqual(8.9, r[1])
        self.assertIn(ll[1], r[2])
        self.assertIn(ll[2], r[2])

        r = next(ranges)
        self.assertEqual(8.9, r[0])
        self.assertEqual(10.5, r[1])
        self.assertIn(ll[2], r[2])

        r = next(ranges)
        self.assertEqual(10.5, r[0])
        self.assertEqual(14, r[1])
        self.assertIn(ll[3], r[2])

        with self.assertRaises(StopIteration):
            next(ranges)