Esempio n. 1
0
    def test_import_tracks(self, corpus):
        importing_tracks = [
            tracks.FileTrack('a', '/some/path.wav'),
            tracks.FileTrack('b', '/some/other/path.wav'),
            tracks.FileTrack('existing_file', '/some/otherer/path.wav'),
        ]

        idx_mapping = corpus.import_tracks(importing_tracks)

        assert corpus.num_tracks == 4

        assert 'a' in corpus.tracks.keys()
        assert corpus.tracks['a'].path == '/some/path.wav'

        assert 'b' in corpus.tracks.keys()
        assert corpus.tracks['b'].path == '/some/other/path.wav'

        assert 'existing_file_1' in corpus.tracks.keys()
        assert corpus.tracks[
            'existing_file_1'].path == '/some/otherer/path.wav'

        assert len(idx_mapping) == 3
        assert 'a' in idx_mapping['a'].idx
        assert 'b' in idx_mapping['b'].idx
        assert idx_mapping['existing_file'].idx == 'existing_file_1'
Esempio n. 2
0
    def test_exports_wavs_from_container_tracks(self, writer, tmpdir):
        path = tmpdir.strpath
        container_ds_path = os.path.join(path, 'container_ds')
        out_path = os.path.join(path, 'export')

        ds = resources.create_dataset()
        ds.relocate_audio_to_single_container(container_ds_path)

        writer.save(ds, out_path)

        print(os.listdir(out_path))

        track_path = os.path.join(out_path, 'audio', 'wav-1.wav')
        track = tracks.FileTrack(None, track_path)
        assert os.path.isfile(track_path)
        assert track.duration == pytest.approx(2.5951875)
        assert np.allclose(track.read_samples(),
                           ds.tracks['wav-1'].read_samples(),
                           atol=1e-05)

        track_path = os.path.join(out_path, 'audio', 'wav_2.wav')
        track = tracks.FileTrack(None, track_path)
        assert os.path.isfile(track_path)
        assert track.duration == pytest.approx(2.5951875)

        track_path = os.path.join(out_path, 'audio', 'wav_3.wav')
        track = tracks.FileTrack(None, track_path)
        assert os.path.isfile(track_path)
        assert track.duration == pytest.approx(2.5951875)

        track_path = os.path.join(out_path, 'audio', 'wav_4.wav')
        track = tracks.FileTrack(None, track_path)
        assert os.path.isfile(track_path)
        assert track.duration == pytest.approx(2.5951875)
Esempio n. 3
0
    def test_encode_label_ends_at_utterance_end(self):
        track = tracks.FileTrack('file1',
                                 resources.sample_wav_file('med_len.wav'))
        utt = tracks.Utterance('utt1', track, start=3, end=14)
        ll = annotations.LabelList(labels=[
            annotations.Label('speech', 0, 4),
            annotations.Label('music', 4, 9),
            annotations.Label('speech', 9, float('inf')),
        ])
        utt.set_label_list(ll)

        enc = encoding.FrameHotEncoder(['music', 'speech', 'noise'],
                                       'default',
                                       frame_settings=units.FrameSettings(
                                           32000, 16000),
                                       sr=16000)

        actual = enc.encode_utterance(utt)
        expected = np.array([
            [0, 1, 0],
            [0, 1, 0],
            [0, 1, 0],
            [1, 1, 0],
            [1, 0, 0],
            [1, 0, 0],
            [1, 0, 0],
            [1, 0, 0],
            [1, 1, 0],
            [0, 1, 0],
        ]).astype(np.float32)

        assert np.array_equal(expected, actual)
Esempio n. 4
0
    def relocate_audio_to_wav_files(self, target_path):
        """
        Copies every track to its own wav file in the given folder.
        Every track will be stored at ``target_path/track_id.wav``.
        """

        if not os.path.isdir(target_path):
            os.makedirs(target_path)

        new_tracks = {}

        # First create a new container track for all existing tracks
        for track in self.tracks.values():
            track_path = os.path.join(target_path, '{}.wav'.format(track.idx))
            sr = track.sampling_rate
            samples = track.read_samples()

            audio.write_wav(track_path, samples, sr=sr)
            new_track = tracks.FileTrack(track.idx, track_path)

            new_tracks[track.idx] = new_track

        # Update track list of corpus
        self._tracks = new_tracks

        # Update utterances to point to new tracks
        for utterance in self.utterances.values():
            new_track = self.tracks[utterance.track.idx]
            utterance.track = new_track
Esempio n. 5
0
    def test_process_track_online(self, processor, tmpdir):
        wav_path = os.path.join(tmpdir.strpath, 'file.wav')
        wav_content = np.random.random(174)

        librosa.output.write_wav(wav_path, wav_content, 16000)
        track = tracks.FileTrack('idx', wav_path)

        chunks = list(
            processor.process_track_online(track,
                                           frame_size=20,
                                           hop_size=10,
                                           chunk_size=8))

        assert len(chunks) == 3
        assert np.allclose(chunks[0][0], wav_content[0:20], atol=0.0001)
        assert np.allclose(chunks[2][-1],
                           np.pad(wav_content[160:], (0, 6), mode='constant'),
                           atol=0.0001)
        assert chunks[0].dtype == np.float32

        assert processor.called_with_sr == [16000, 16000, 16000]
        assert processor.called_with_offset == [0, 8, 16]
        assert processor.called_with_last == [False, False, True]
        assert processor.called_with_utterance == [None, None, None]
        assert processor.called_with_corpus == [None, None, None]
Esempio n. 6
0
    def test_does_utt_match_target_format_with_invalid_format_returns_false(self):
        file_path = resources.get_resource_path(('audio_formats', 'mp3_2_44_1k_16b.mp3'))
        track = tracks.FileTrack('t', file_path)
        utt = tracks.Utterance('u', track)

        c = conversion.WavAudioFileConverter()
        assert not c._does_utt_match_target_format(utt)
Esempio n. 7
0
    def test_does_utt_match_target_format_returns_true(self):
        file_path = resources.sample_wav_file('wav_1.wav')
        track = tracks.FileTrack('t', file_path)
        utt = tracks.Utterance('u', track)

        c = conversion.WavAudioFileConverter()
        assert c._does_utt_match_target_format(utt)
Esempio n. 8
0
    def test_read_samples(self):
        path = resources.sample_wav_file('wav_1.wav')
        track = tracks.FileTrack('wav', path)
        issuer = issuers.Issuer('toni')
        utt = tracks.Utterance('t', track, issuer=issuer, start=1.0, end=2.30)

        l1 = annotations.Label('a', 0.15, 0.448)
        l2 = annotations.Label('a', 0.5, 0.73)
        ll = annotations.LabelList(labels=[l1, l2])

        utt.set_label_list(ll)

        expected, __ = librosa.core.load(path,
                                         sr=None,
                                         offset=1.15,
                                         duration=0.298)
        assert np.array_equal(l1.read_samples(), expected)

        expected, __ = librosa.core.load(path,
                                         sr=None,
                                         offset=1.5,
                                         duration=1.73 - 1.5)

        print(expected.shape)
        print(l2.read_samples().shape)
        assert np.array_equal(l2.read_samples(), expected)
Esempio n. 9
0
    def test_compute_online(self):
        test_file_path = resources.sample_wav_file('wav_1.wav')
        y, sr = librosa.load(test_file_path, sr=None)

        # EXPECTED
        y_pad = np.pad(y, (0, 1024), mode='constant', constant_values=0)
        S = np.abs(
            librosa.stft(y_pad, center=False, n_fft=2048, hop_length=1024))**2
        S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr)
        S = librosa.power_to_db(S)
        exp_onsets = librosa.onset.onset_strength(S=S, center=False).T
        exp_onsets = exp_onsets.reshape(exp_onsets.shape[0], 1)

        # ACTUAL
        test_file = tracks.FileTrack('idx', test_file_path)
        onset = pipeline.OnsetStrength()
        onset_gen = onset.process_track_online(test_file,
                                               2048,
                                               1024,
                                               chunk_size=5)

        chunks = list(onset_gen)
        onsets = np.vstack(chunks)

        print(onsets.shape, exp_onsets.shape)

        assert np.allclose(onsets, exp_onsets)
Esempio n. 10
0
    def test_split_sets_track(self):
        file = tracks.FileTrack('file-1', '/some/path')
        utt = tracks.Utterance('utt-1', file, start=0.0, end=10.0)
        res = utt.split([5.2])

        assert len(res) == 2
        assert res[0].track == file
        assert res[1].track == file
Esempio n. 11
0
    def test_read_samples_fix_sampling_rate(self, name, audio_path):
        audio_path = os.path.join(audio_path, name)
        file_obj = tracks.FileTrack('some_idx', audio_path)

        expected, __ = librosa.core.load(audio_path, sr=16000, mono=True)
        actual = file_obj.read_samples(sr=16000)

        assert np.array_equal(actual, expected)
Esempio n. 12
0
    def test_import_utterance_no_track(self, corpus):
        importing_utterances = [
            tracks.Utterance('a', tracks.FileTrack('notexist', 'notexist'),
                             corpus.issuers['existing_issuer'], 0, 10)
        ]

        with pytest.raises(ValueError):
            corpus.import_utterances(importing_utterances)
Esempio n. 13
0
    def test_process_empty_track_raises_error(self, processor, tmpdir):
        wav_path = os.path.join(tmpdir.strpath, 'file.wav')
        wav_content = np.random.random(0)

        librosa.output.write_wav(wav_path, wav_content, 16000)
        file_track = tracks.FileTrack('idx', wav_path)

        with pytest.raises(ValueError):
            processor.process_track(file_track, frame_size=4096, hop_size=2048, sr=16000)
Esempio n. 14
0
def test_utt_read_samples(benchmark):
    utts = []

    wav_path = resources.get_test_resource_path(('wav_files', 'med_len.wav'))
    track = tracks.FileTrack('idx', wav_path)
    utts.append(tracks.Utterance('uidx', track))
    utts.append(tracks.Utterance('uidx', track, start=2.8))
    utts.append(tracks.Utterance('uidx', track, end=10.2))
    utts.append(tracks.Utterance('uidx', track, start=2.4, end=9.8))

    mp3_path = resources.get_test_resource_path(
        ('audio_formats', 'mp3_2_44_1k_16b.mp3'))
    track = tracks.FileTrack('idx', mp3_path)
    utts.append(tracks.Utterance('uidx', track))
    utts.append(tracks.Utterance('uidx', track, start=2.8))
    utts.append(tracks.Utterance('uidx', track, end=4.9))
    utts.append(tracks.Utterance('uidx', track, start=0.4, end=4.8))

    benchmark(run, utts)
Esempio n. 15
0
    def _load(self, path):
        corpus = audiomate.Corpus()

        article_paths = sorted(self.get_articles(path))
        reader_map = {}
        file_map = {}

        for article_path in article_paths:
            audio_files = self.get_audio_file_info(article_path)
            reader_name, reader_gender = self.get_reader_info(article_path)
            segments = self.get_segments(article_path)

            if reader_name not in reader_map.keys():
                speaker = issuers.Speaker('{:0>8}'.format(len(reader_map)),
                                          gender=reader_gender)
                reader_map[reader_name] = speaker
                corpus.import_issuers(speaker)
            else:
                speaker = reader_map[reader_name]

            for start, end, text in segments:
                file_path = self.find_audio_file_for_segment(
                    start, end, audio_files)

                if file_path is not None:
                    if file_path not in file_map.keys():
                        track = tracks.FileTrack(
                            '{:0>10}'.format(len(file_map)), file_path)
                        file_map[file_path] = track
                        corpus.import_tracks(track)
                    else:
                        track = file_map[file_path]

                    track_offset = audio_files[file_path]
                    utt_start = start - track_offset
                    utt_end = end - track_offset

                    utt_idx = '{}_{}_{}_{}'.format(speaker.idx, track.idx,
                                                   int(start * 1000),
                                                   int(end * 1000))

                    if utt_idx not in self.invalid_utterance_ids:
                        utt = corpus.new_utterance(utt_idx,
                                                   track.idx,
                                                   issuer_idx=speaker.idx,
                                                   start=utt_start,
                                                   end=utt_end)

                        ll = annotations.LabelList.create_single(
                            text, audiomate.corpus.LL_WORD_TRANSCRIPT)

                        utt.set_label_list(ll)

        return audiomate.Corpus.from_corpus(corpus)
Esempio n. 16
0
def test_write_wav(tmpdir):
    samples = np.random.random(50000)
    sr = 16000
    path = os.path.join(tmpdir.strpath, 'audio.wav')

    audio.write_wav(path, samples, sr=sr)

    assert os.path.isfile(path)

    track = tracks.FileTrack('idx', path)
    assert np.allclose(samples, track.read_samples(), atol=1.e-04)
Esempio n. 17
0
def corpus():
    corpus = audiomate.Corpus()

    ex_file = tracks.FileTrack('existing_file', '../any/path.wav')
    ex_issuer = issuers.Issuer('existing_issuer')
    ex_utterance = tracks.Utterance('existing_utt', ex_file, issuer=ex_issuer)

    corpus.tracks['existing_file'] = ex_file
    corpus.issuers['existing_issuer'] = ex_issuer
    corpus.utterances['existing_utt'] = ex_utterance

    return corpus
Esempio n. 18
0
    def test_read_samples_range(self, name, audio_path):
        audio_path = os.path.join(audio_path, name)
        file_obj = tracks.FileTrack('some_idx', audio_path)

        expected, __ = librosa.core.load(audio_path,
                                         sr=None,
                                         mono=True,
                                         offset=1.0,
                                         duration=1.7)
        actual = file_obj.read_samples(offset=1.0, duration=1.7)

        assert np.array_equal(actual, expected)
Esempio n. 19
0
def generate_tracks(n, rand=None):
    if rand is None:
        rand = random.Random()

    items = []

    for i in range(n):
        track_idx = 'track-{}'.format(i)
        path = '/fake/{}.wav'.format(track_idx)
        track = tracks.FileTrack(track_idx, path)

        items.append(track)

    return items
Esempio n. 20
0
    def test_process_track_with_downsampling(self, processor, tmpdir):
        wav_path = os.path.join(tmpdir.strpath, 'file.wav')
        wav_content = np.random.random(22)

        librosa.output.write_wav(wav_path, wav_content, 4)
        file_track = tracks.FileTrack('idx', wav_path)

        processed = processor.process_track(file_track, frame_size=4, hop_size=2, sr=2)

        assert processed.shape == (5, 4)

        assert processor.called_with_sr == [2]
        assert processor.called_with_offset == [0]
        assert processor.called_with_last == [True]
        assert processor.called_with_utterance == [None]
        assert processor.called_with_corpus == [None]
Esempio n. 21
0
    def test_process_track_smaller_than_frame_size(self, processor, tmpdir):
        wav_path = os.path.join(tmpdir.strpath, 'file.wav')
        wav_content = np.random.random(22)

        librosa.output.write_wav(wav_path, wav_content, 16000)
        file_track = tracks.FileTrack('idx', wav_path)

        processed = processor.process_track(file_track, frame_size=4096, hop_size=2048, sr=16000)

        assert processed.shape == (1, 4096)
        assert np.allclose(processed[0], np.pad(wav_content, (0, 4074), mode='constant'), atol=0.0001)

        assert processor.called_with_sr == [16000]
        assert processor.called_with_offset == [0]
        assert processor.called_with_last == [True]
        assert processor.called_with_utterance == [None]
        assert processor.called_with_corpus == [None]
Esempio n. 22
0
    def setup_method(self):
        self.ll_1 = annotations.LabelList(idx='alpha',
                                          labels=[
                                              annotations.Label('a', 3.2, 4.5),
                                              annotations.Label('b', 5.1, 8.9),
                                              annotations.Label(
                                                  'c', 7.2, 10.5),
                                              annotations.Label('d', 10.5, 14),
                                              annotations.Label('d', 15, 18)
                                          ])

        self.ll_2 = annotations.LabelList(idx='bravo',
                                          labels=[
                                              annotations.Label('a', 1.0, 4.2),
                                              annotations.Label('e', 4.2, 7.9),
                                              annotations.Label(
                                                  'c', 7.2, 10.5),
                                              annotations.Label('f', 10.5, 14),
                                              annotations.Label('d', 15, 17.3)
                                          ])

        self.ll_duplicate_idx = annotations.LabelList(
            idx='charlie',
            labels=[
                annotations.Label('t', 1.0, 4.2),
                annotations.Label('h', 4.2, 7.9)
            ])

        self.ll_3 = annotations.LabelList(idx='charlie',
                                          labels=[
                                              annotations.Label('a', 1.0, 4.2),
                                              annotations.Label('g', 4.2, 7.9)
                                          ])

        self.track = tracks.FileTrack('wav',
                                      resources.sample_wav_file('wav_1.wav'))
        self.issuer = issuers.Issuer('toni')
        self.utt = tracks.Utterance('test',
                                    self.track,
                                    issuer=self.issuer,
                                    start=1.25,
                                    end=1.30,
                                    label_lists=[
                                        self.ll_1, self.ll_2,
                                        self.ll_duplicate_idx, self.ll_3
                                    ])
Esempio n. 23
0
    def test_read_frames(self, tmpdir):
        wav_path = os.path.join(tmpdir.strpath, 'file.wav')
        wav_content = np.random.random(10044)

        librosa.output.write_wav(wav_path, wav_content, 16000)
        file_obj = tracks.FileTrack('some_idx', wav_path)

        data = list(file_obj.read_frames(frame_size=400, hop_size=160))
        frames = np.array([x[0] for x in data])
        last = [x[1] for x in data]

        assert frames.shape == (62, 400)
        assert frames.dtype == np.float32
        assert np.allclose(frames[0], wav_content[:400], atol=0.0001)
        expect = np.pad(wav_content[9760:], (0, 116), mode='constant')
        assert np.allclose(frames[61], expect, atol=0.0001)

        assert last[:-1] == [False] * (len(data) - 1)
        assert last[-1]
Esempio n. 24
0
    def test_process_track(self, processor, tmpdir):
        wav_path = os.path.join(tmpdir.strpath, 'file.wav')
        wav_content = np.random.random(22)

        librosa.output.write_wav(wav_path, wav_content, 4)
        file_track = tracks.FileTrack('idx', wav_path)

        processed = processor.process_track(file_track, frame_size=4, hop_size=2)

        assert processed.shape == (10, 4)
        assert processed.dtype == np.float32
        assert np.allclose(processed[0], wav_content[0:4], atol=0.0001)
        assert np.allclose(processed[9], wav_content[18:22], atol=0.0001)

        assert processor.called_with_sr == [4]
        assert processor.called_with_offset == [0]
        assert processor.called_with_last == [True]
        assert processor.called_with_utterance == [None]
        assert processor.called_with_corpus == [None]
Esempio n. 25
0
    def test_encode_utterance_takes_lower_index_first(self):
        file = tracks.FileTrack('file-idx',
                                resources.sample_wav_file('wav_1.wav'))
        utt = tracks.Utterance('utt-idx', file, start=0, end=5)
        ll = annotations.LabelList(labels=[
            annotations.Label('music', 0, 3),
            annotations.Label('speech', 3, 5)
        ])
        utt.set_label_list(ll)

        enc = encoding.FrameOrdinalEncoder(['speech', 'music', 'noise'],
                                           'default',
                                           frame_settings=units.FrameSettings(
                                               32000, 16000),
                                           sr=16000)

        actual = enc.encode_utterance(utt)
        expected = np.array([1, 1, 0, 0]).astype(np.int)

        assert np.array_equal(expected, actual)
Esempio n. 26
0
    def new_file(self, path, track_idx, copy_file=False):
        """
        Adds a new audio file to the corpus with the given data.

        Parameters:
            path (str): Path of the file to add.
            track_idx (str): The id to associate the file-track with.
            copy_file (bool): If True the file is copied to the data set folder, otherwise the given
                              path is used directly.

        Returns:
            FileTrack: The newly added file.
        """

        new_file_idx = track_idx
        new_file_path = os.path.abspath(path)

        # Add index to idx if already existing
        if new_file_idx in self._tracks.keys():
            new_file_idx = naming.index_name_if_in_list(
                new_file_idx, self._tracks.keys())

        # Copy file to default file dir
        if copy_file:
            if not os.path.isdir(self.path):
                raise ValueError(
                    'To copy file the dataset needs to have a path.')

            __, ext = os.path.splitext(path)

            new_file_folder = os.path.join(self.path, DEFAULT_FILE_SUBDIR)
            new_file_path = os.path.join(new_file_folder,
                                         '{}{}'.format(new_file_idx, ext))
            os.makedirs(new_file_folder, exist_ok=True)
            shutil.copy(path, new_file_path)

        # Create file obj
        new_file = tracks.FileTrack(new_file_idx, new_file_path)
        self._tracks[new_file_idx] = new_file

        return new_file
Esempio n. 27
0
    def test_compute_online(self):
        # Data: 41523 samples, 16 kHz
        # yields 40 frames with frame-size 2048 and hop-size 1024
        test_file_path = resources.sample_wav_file('wav_1.wav')
        y, sr = librosa.load(test_file_path, sr=None)

        # EXPECTED
        y_pad = np.pad(y, (0, 1024), mode='constant', constant_values=0)
        S = np.abs(librosa.stft(y_pad, center=False, n_fft=2048, hop_length=1024)) ** 2
        S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr)
        S = librosa.power_to_db(S)
        onsets = librosa.onset.onset_strength(S=S, center=False)
        exp_tgram = librosa.feature.tempogram(onset_envelope=onsets, sr=sr, win_length=4, center=True).T

        # ACTUAL
        test_file = tracks.FileTrack('idx', test_file_path)
        tgram_step = pipeline.Tempogram(win_length=4)
        tgram_gen = tgram_step.process_track_online(test_file, 2048, 1024, chunk_size=5)

        chunks = list(tgram_gen)
        tgrams = np.vstack(chunks)

        assert np.allclose(tgrams, exp_tgram)
Esempio n. 28
0
def sample_utterance():
    file_track = tracks.FileTrack('test_file', resources.sample_wav_file('wav_1.wav'))
    utterance = tracks.Utterance('test', file_track)
    return utterance
Esempio n. 29
0
def test_read_samples(benchmark):
    wav_path = resources.get_test_resource_path(('wav_files', 'med_len.wav'))
    track = tracks.FileTrack('idx', wav_path)
    benchmark(run, track)
Esempio n. 30
0
 def test_contains_track_returns_false(self, ds):
     track = tracks.FileTrack('wav-1', '/some/other/path/here')
     res = ds.contains_track(track)
     assert not res