Exemple #1
0
    def test_match(self):
        utt_filter = subview.MatchingUtteranceIdxFilter(
            utterance_idxs={'a', 'b', 'd'})

        assert utt_filter.match(tracks.Utterance('a', 'x'), None)
        assert utt_filter.match(tracks.Utterance('b', 'x'), None)
        assert utt_filter.match(tracks.Utterance('d', 'x'), None)
        assert not utt_filter.match(tracks.Utterance('c', 'x'), None)
        assert not utt_filter.match(tracks.Utterance('e', 'x'), None)
Exemple #2
0
    def test_match_inverse(self):
        filter = subview.MatchingUtteranceIdxFilter(
            utterance_idxs={'a', 'b', 'd'}, inverse=True)

        assert not filter.match(tracks.Utterance('a', 'x'), None)
        assert not filter.match(tracks.Utterance('b', 'x'), None)
        assert not filter.match(tracks.Utterance('d', 'x'), None)
        assert filter.match(tracks.Utterance('c', 'x'), None)
        assert filter.match(tracks.Utterance('e', 'x'), None)
Exemple #3
0
    def test_read_samples(self):
        path = resources.sample_wav_file('wav_1.wav')
        track = tracks.FileTrack('wav', path)
        issuer = issuers.Issuer('toni')
        utt = tracks.Utterance('t', track, issuer=issuer, start=1.0, end=2.30)

        l1 = annotations.Label('a', 0.15, 0.448)
        l2 = annotations.Label('a', 0.5, 0.73)
        ll = annotations.LabelList(labels=[l1, l2])

        utt.set_label_list(ll)

        expected, __ = librosa.core.load(path,
                                         sr=None,
                                         offset=1.15,
                                         duration=0.298)
        assert np.array_equal(l1.read_samples(), expected)

        expected, __ = librosa.core.load(path,
                                         sr=None,
                                         offset=1.5,
                                         duration=1.73 - 1.5)

        print(expected.shape)
        print(l2.read_samples().shape)
        assert np.array_equal(l2.read_samples(), expected)
Exemple #4
0
def generate_utterances(track, issuer, n, n_ll_range, n_label_range, rand=None):
    if rand is None:
        rand = random.Random()

    items = []

    for i in range(n):
        utt_idx = '{}-utt-{}'.format(track.idx, i)
        start = rand.random() * 3
        end = 3 + rand.random() * 8

        utt = tracks.Utterance(
            utt_idx,
            track,
            issuer=issuer,
            start=start,
            end=end
        )
        n_ll = rand.randint(*n_ll_range)

        for ll in generate_label_lists(n_ll, n_label_range, rand=rand):
            utt.set_label_list(ll)

        items.append(utt)

    return items
Exemple #5
0
    def test_split_with_cutting_point_after_end_returns_one_utt(self):
        utt = tracks.Utterance('utt-1', None, start=4.0, end=20.0)
        res = utt.split([24.5])

        assert len(res) == 1
        assert res[0].start == 4.0
        assert res[0].end == 20.0
Exemple #6
0
    def test_does_utt_match_target_format_with_invalid_format_returns_false(self):
        file_path = resources.get_resource_path(('audio_formats', 'mp3_2_44_1k_16b.mp3'))
        track = tracks.FileTrack('t', file_path)
        utt = tracks.Utterance('u', track)

        c = conversion.WavAudioFileConverter()
        assert not c._does_utt_match_target_format(utt)
Exemple #7
0
    def test_does_utt_match_target_format_returns_true(self):
        file_path = resources.sample_wav_file('wav_1.wav')
        track = tracks.FileTrack('t', file_path)
        utt = tracks.Utterance('u', track)

        c = conversion.WavAudioFileConverter()
        assert c._does_utt_match_target_format(utt)
Exemple #8
0
    def test_split_utt_relative_with_labels(self):
        ll_1 = annotations.LabelList('phones', labels=[
            annotations.Label('alpha', start=0.0, end=30.0)
        ])
        ll_2 = annotations.LabelList('words', labels=[
            annotations.Label('b', start=8.0, end=30.0)
        ])
        utt = tracks.Utterance('utt-1', 'file-x', start=10.0, end=40.0, label_lists=[ll_1, ll_2])

        res = utt.split([14.0], track_relative=False)

        assert len(res) == 2

        assert res[0].start == 10.0
        assert res[0].end == 24.0
        assert res[0].label_lists['phones'] == annotations.LabelList(idx='phones', labels=[
            annotations.Label('alpha', 0.0, 14.0)
        ])
        assert res[0].label_lists['words'] == annotations.LabelList(idx='words', labels=[
            annotations.Label('b', 8.0, 14.0)
        ])

        assert res[1].start == 24.0
        assert res[1].end == 40.0
        assert res[1].label_lists['phones'] == annotations.LabelList(idx='phones', labels=[
            annotations.Label('alpha', 0.0, 16.0)
        ])
        assert res[1].label_lists['words'] == annotations.LabelList(idx='words', labels=[
            annotations.Label('b', 0.0, 16.0)
        ])
Exemple #9
0
    def test_split(self):
        ll_1 = annotations.LabelList('phones', labels=[
            annotations.Label('alpha', start=0.0, end=30.0)
        ])
        ll_2 = annotations.LabelList('words', labels=[
            annotations.Label('b', start=0.0, end=30.0)
        ])
        utt = tracks.Utterance('utt-1', 'track-x', start=0.0, end=40.0, label_lists=[ll_1, ll_2])

        res = utt.split([14.0, 29.5])

        assert len(res) == 3

        assert res[0].start == 0.0
        assert res[0].end == 14.0
        assert 'phones' in res[0].label_lists.keys()
        assert 'words' in res[0].label_lists.keys()

        assert res[1].start == 14.0
        assert res[1].end == 29.5
        assert 'phones' in res[1].label_lists.keys()
        assert 'words' in res[1].label_lists.keys()

        assert res[2].start == 29.5
        assert res[2].end == 40.0
        assert 'phones' in res[2].label_lists.keys()
        assert 'words' in res[2].label_lists.keys()
Exemple #10
0
    def test_encode_label_ends_at_utterance_end(self):
        track = tracks.FileTrack('file1',
                                 resources.sample_wav_file('med_len.wav'))
        utt = tracks.Utterance('utt1', track, start=3, end=14)
        ll = annotations.LabelList(labels=[
            annotations.Label('speech', 0, 4),
            annotations.Label('music', 4, 9),
            annotations.Label('speech', 9, float('inf')),
        ])
        utt.set_label_list(ll)

        enc = encoding.FrameHotEncoder(['music', 'speech', 'noise'],
                                       'default',
                                       frame_settings=units.FrameSettings(
                                           32000, 16000),
                                       sr=16000)

        actual = enc.encode_utterance(utt)
        expected = np.array([
            [0, 1, 0],
            [0, 1, 0],
            [0, 1, 0],
            [1, 1, 0],
            [1, 0, 0],
            [1, 0, 0],
            [1, 0, 0],
            [1, 0, 0],
            [1, 1, 0],
            [0, 1, 0],
        ]).astype(np.float32)

        assert np.array_equal(expected, actual)
Exemple #11
0
    def test_import_utterance_no_track(self, corpus):
        importing_utterances = [
            tracks.Utterance('a', tracks.FileTrack('notexist', 'notexist'),
                             corpus.issuers['existing_issuer'], 0, 10)
        ]

        with pytest.raises(ValueError):
            corpus.import_utterances(importing_utterances)
Exemple #12
0
    def test_split_sets_track(self):
        file = tracks.FileTrack('file-1', '/some/path')
        utt = tracks.Utterance('utt-1', file, start=0.0, end=10.0)
        res = utt.split([5.2])

        assert len(res) == 2
        assert res[0].track == file
        assert res[1].track == file
Exemple #13
0
    def test_import_utterance_no_issuer(self, corpus):
        importing_utterances = [
            tracks.Utterance('a', corpus.tracks['existing_file'],
                             issuers.Issuer('notexist'), 0, 10)
        ]

        with pytest.raises(ValueError):
            corpus.import_utterances(importing_utterances)
Exemple #14
0
    def test_split_sets_issuer(self):
        issuer = issuers.Speaker('spk-1')
        utt = tracks.Utterance('utt-1', None, issuer=issuer, start=0.0, end=10.0)
        res = utt.split([5.2])

        assert len(res) == 2
        assert res[0].issuer == issuer
        assert res[1].issuer == issuer
    def test_encode_utterance_with_single_label(self):
        ll = annotations.LabelList(idx='go',
                                   labels=[annotations.Label('a c b')])
        utt = tracks.Utterance('utt-1', None, label_lists=ll)

        encoder = encoding.TokenOrdinalEncoder('go', ['a', 'b', 'c'])
        encoded = encoder.encode_utterance(utt)

        assert np.array_equal(encoded, [0, 2, 1])
Exemple #16
0
    def test_split_when_utt_start_is_not_zero(self):
        utt = tracks.Utterance('utt-1', None, start=6.0, end=20.0)
        res = utt.split([3.0])

        assert len(res) == 2
        assert res[0].start == 6.0
        assert res[0].end == 9.0
        assert res[1].start == 9.0
        assert res[1].end == 20.
Exemple #17
0
    def test_split_utt_relative(self):
        utt = tracks.Utterance('utt-1', None, start=6.0, end=20.0)
        res = utt.split([8.0], track_relative=False)

        assert len(res) == 2
        assert res[0].start == 6.0
        assert res[0].end == 14.0
        assert res[1].start == 14.0
        assert res[1].end == 20.0
Exemple #18
0
    def test_split_endless(self):
        utt = tracks.Utterance('utt-1', None, start=0.0)
        res = utt.split([24.5])

        assert len(res) == 2
        assert res[0].start == 0.0
        assert res[0].end == 24.5
        assert res[1].start == 24.5
        assert res[1].end == float('inf')
    def test_encode_utterance_with_non_existing_label_list_raises_error(self):
        ll = annotations.LabelList(idx='go',
                                   labels=[annotations.Label('a c b unknown')])
        utt = tracks.Utterance('utt-1', None, label_lists=ll)

        encoder = encoding.TokenOrdinalEncoder('not_existing', ['a', 'b', 'c'])

        with pytest.raises(ValueError):
            encoder.encode_utterance(utt)
Exemple #20
0
    def new_utterance(self,
                      utterance_idx,
                      track_idx,
                      issuer_idx=None,
                      start=0,
                      end=float('inf')):
        """
        Add a new utterance to the corpus with the given data.

        Parameters:
            track_idx (str): The track id the utterance is in.
            utterance_idx (str): The id to associate with the utterance.
                                 If None or already exists, one is generated.
            issuer_idx (str): The issuer id to associate with the utterance.
            start (float): Start of the utterance within the track [seconds].
            end (float): End of the utterance within the track [seconds].
                         ``inf`` equals the end of the track.

        Returns:
            Utterance: The newly added utterance.
        """

        new_utt_idx = utterance_idx

        # Check if there is a track with the given idx
        if track_idx not in self._tracks.keys():
            raise ValueError(
                'Track with id {} does not exist!'.format(track_idx))

        # Check if issuer exists
        issuer = None

        if issuer_idx is not None:
            if issuer_idx not in self._issuers.keys():
                raise ValueError(
                    'Issuer with id {} does not exist!'.format(issuer_idx))
            else:
                issuer = self._issuers[issuer_idx]

        # Add index to idx if already existing
        if new_utt_idx in self._utterances.keys():
            new_utt_idx = naming.index_name_if_in_list(new_utt_idx,
                                                       self._utterances.keys())

        new_utt = tracks.Utterance(new_utt_idx,
                                   self.tracks[track_idx],
                                   issuer=issuer,
                                   start=start,
                                   end=end)

        self._utterances[new_utt_idx] = new_utt

        return new_utt
Exemple #21
0
    def test_import_utterances(self, corpus):
        importing_utterances = [
            tracks.Utterance('a', corpus.tracks['existing_file'],
                             corpus.issuers['existing_issuer'], 0, 10),
            tracks.Utterance('b', corpus.tracks['existing_file'],
                             corpus.issuers['existing_issuer'], 10, 20),
            tracks.Utterance('existing_utt', corpus.tracks['existing_file'],
                             corpus.issuers['existing_issuer'], 20, 30)
        ]

        mapping = corpus.import_utterances(importing_utterances)

        assert corpus.num_utterances == 4
        assert 'a' in corpus.utterances.keys()
        assert 'b' in corpus.utterances.keys()
        assert 'existing_utt_1' in corpus.utterances.keys()

        assert len(mapping) == 3
        assert mapping['a'].idx == 'a'
        assert mapping['b'].idx == 'b'
        assert mapping['existing_utt'].idx == 'existing_utt_1'
Exemple #22
0
def corpus():
    corpus = audiomate.Corpus()

    ex_file = tracks.FileTrack('existing_file', '../any/path.wav')
    ex_issuer = issuers.Issuer('existing_issuer')
    ex_utterance = tracks.Utterance('existing_utt', ex_file, issuer=ex_issuer)

    corpus.tracks['existing_file'] = ex_file
    corpus.issuers['existing_issuer'] = ex_issuer
    corpus.utterances['existing_utt'] = ex_utterance

    return corpus
Exemple #23
0
    def test_split_with_overlap(self):
        ll_1 = annotations.LabelList('phones',
                                     labels=[
                                         annotations.Label('alpha',
                                                           start=0.0,
                                                           end=30.0),
                                         annotations.Label('bravo',
                                                           start=20.0,
                                                           end=42.0)
                                     ])
        ll_2 = annotations.LabelList(
            'words', labels=[annotations.Label('b', start=8.0, end=30.0)])
        utt = tracks.Utterance('utt-1',
                               'file-x',
                               start=10.0,
                               end=55.0,
                               label_lists=[ll_1, ll_2])

        res = utt.split([12.0, 24.0], track_relative=False, overlap=2.0)

        assert len(res) == 3

        assert res[0].start == 10.0
        assert res[0].end == 24.0
        assert res[0].label_lists['phones'] == annotations.LabelList(
            idx='phones', labels=[annotations.Label('alpha', 0.0, 14.0)])
        assert res[0].label_lists['words'] == annotations.LabelList(
            idx='words', labels=[annotations.Label('b', 8.0, 14.0)])

        print(res[1].label_lists['phones'].labels)

        assert res[1].start == 20.0
        assert res[1].end == 36.0
        assert res[1].label_lists['phones'] == annotations.LabelList(
            idx='phones',
            labels=[
                annotations.Label('alpha', 0.0, 16.0),
                annotations.Label('bravo', 10.0, 16.0),
            ])
        assert res[1].label_lists['words'] == annotations.LabelList(
            idx='words', labels=[annotations.Label('b', 0.0, 16.0)])

        assert res[2].start == 32.0
        assert res[2].end == 55.0
        assert res[2].label_lists['phones'] == annotations.LabelList(
            idx='phones',
            labels=[
                annotations.Label('alpha', 0.0, 8.0),
                annotations.Label('bravo', 0.0, 20.0),
            ])
        assert res[2].label_lists['words'] == annotations.LabelList(
            idx='words', labels=[annotations.Label('b', 0.0, 8.0)])
Exemple #24
0
    def setup_method(self):
        self.ll_1 = annotations.LabelList(idx='alpha',
                                          labels=[
                                              annotations.Label('a', 3.2, 4.5),
                                              annotations.Label('b', 5.1, 8.9),
                                              annotations.Label(
                                                  'c', 7.2, 10.5),
                                              annotations.Label('d', 10.5, 14),
                                              annotations.Label('d', 15, 18)
                                          ])

        self.ll_2 = annotations.LabelList(idx='bravo',
                                          labels=[
                                              annotations.Label('a', 1.0, 4.2),
                                              annotations.Label('e', 4.2, 7.9),
                                              annotations.Label(
                                                  'c', 7.2, 10.5),
                                              annotations.Label('f', 10.5, 14),
                                              annotations.Label('d', 15, 17.3)
                                          ])

        self.ll_duplicate_idx = annotations.LabelList(
            idx='charlie',
            labels=[
                annotations.Label('t', 1.0, 4.2),
                annotations.Label('h', 4.2, 7.9)
            ])

        self.ll_3 = annotations.LabelList(idx='charlie',
                                          labels=[
                                              annotations.Label('a', 1.0, 4.2),
                                              annotations.Label('g', 4.2, 7.9)
                                          ])

        self.track = tracks.FileTrack('wav',
                                      resources.sample_wav_file('wav_1.wav'))
        self.issuer = issuers.Issuer('toni')
        self.utt = tracks.Utterance('test',
                                    self.track,
                                    issuer=self.issuer,
                                    start=1.25,
                                    end=1.30,
                                    label_lists=[
                                        self.ll_1, self.ll_2,
                                        self.ll_duplicate_idx, self.ll_3
                                    ])
Exemple #25
0
def test_utt_read_samples(benchmark):
    utts = []

    wav_path = resources.get_test_resource_path(('wav_files', 'med_len.wav'))
    track = tracks.FileTrack('idx', wav_path)
    utts.append(tracks.Utterance('uidx', track))
    utts.append(tracks.Utterance('uidx', track, start=2.8))
    utts.append(tracks.Utterance('uidx', track, end=10.2))
    utts.append(tracks.Utterance('uidx', track, start=2.4, end=9.8))

    mp3_path = resources.get_test_resource_path(
        ('audio_formats', 'mp3_2_44_1k_16b.mp3'))
    track = tracks.FileTrack('idx', mp3_path)
    utts.append(tracks.Utterance('uidx', track))
    utts.append(tracks.Utterance('uidx', track, start=2.8))
    utts.append(tracks.Utterance('uidx', track, end=4.9))
    utts.append(tracks.Utterance('uidx', track, start=0.4, end=4.8))

    benchmark(run, utts)
    def test_encode_utterance_with_multiple_non_overlapping_labels(self):
        ll = annotations.LabelList(idx='go',
                                   labels=[
                                       annotations.Label('a c b',
                                                         start=0,
                                                         end=5),
                                       annotations.Label('c b b',
                                                         start=5,
                                                         end=9.4),
                                       annotations.Label('a a a',
                                                         start=9.5,
                                                         end=10.2)
                                   ])
        utt = tracks.Utterance('utt-1', None, label_lists=ll)

        encoder = encoding.TokenOrdinalEncoder('go', ['a', 'b', 'c'])
        encoded = encoder.encode_utterance(utt)

        assert np.array_equal(encoded, [0, 2, 1, 2, 1, 1, 0, 0, 0])
Exemple #27
0
def utt_with_noise():
    utt = tracks.Utterance('utt-2', 'file-2')

    utt.set_label_list(
        annotations.LabelList(idx='alpha',
                              labels=[
                                  annotations.Label('music', 0, 5),
                                  annotations.Label('speech', 5, 12),
                                  annotations.Label('noise', 13, 15)
                              ]))

    utt.set_label_list(
        annotations.LabelList(idx='bravo',
                              labels=[
                                  annotations.Label('music', 0, 1),
                                  annotations.Label('speech', 2, 6)
                              ]))

    return utt
    def test_encode_utterance_with_overlapping_labels_raises_error(self):
        ll = annotations.LabelList(idx='go',
                                   labels=[
                                       annotations.Label('a c b',
                                                         start=0,
                                                         end=5),
                                       annotations.Label('c b b',
                                                         start=2,
                                                         end=9.4),
                                       annotations.Label('a a a',
                                                         start=9.5,
                                                         end=10.2)
                                   ])
        utt = tracks.Utterance('utt-1', None, label_lists=ll)

        encoder = encoding.TokenOrdinalEncoder('go', ['a', 'b', 'c'])

        with pytest.raises(ValueError):
            encoder.encode_utterance(utt)
Exemple #29
0
    def test_encode_utterance_takes_lower_index_first(self):
        file = tracks.FileTrack('file-idx',
                                resources.sample_wav_file('wav_1.wav'))
        utt = tracks.Utterance('utt-idx', file, start=0, end=5)
        ll = annotations.LabelList(labels=[
            annotations.Label('music', 0, 3),
            annotations.Label('speech', 3, 5)
        ])
        utt.set_label_list(ll)

        enc = encoding.FrameOrdinalEncoder(['speech', 'music', 'noise'],
                                           'default',
                                           frame_settings=units.FrameSettings(
                                               32000, 16000),
                                           sr=16000)

        actual = enc.encode_utterance(utt)
        expected = np.array([1, 1, 0, 0]).astype(np.int)

        assert np.array_equal(expected, actual)
Exemple #30
0
    def test_validate_utterance_returns_completly_outlying_label(self):
        utt = tracks.Utterance('utt-idx', None, start=10.0, end=17.9)
        ll = annotations.LabelList(idx='default',
                                   labels=[
                                       annotations.Label('a',
                                                         start=-4.0,
                                                         end=-2.0),
                                       annotations.Label('b',
                                                         start=19.0,
                                                         end=22.0),
                                   ])
        utt.set_label_list(ll)

        val = validation.LabelOverflowValidator('default')
        result = val.validate_utterance(utt)
        result = sorted(result, key=lambda x: x[0])

        assert len(result) == 2

        assert result[0] == (-4.0, -2.0, 'a')
        assert result[1] == (19.0, 22.0, 'b')