예제 #1
0
    def test_extract_files(self, tmpdir):
        sample_file_path = resources.get_resource_path(
            ['sample_files', 'voxforge_sample.tgz'])
        extracted = voxforge.VoxforgeDownloader.extract_files(
            [sample_file_path], tmpdir.strpath)

        base_folder = os.path.join(tmpdir.strpath, 'Aaron-20080318-kdl')
        etc_folder = os.path.join(base_folder, 'etc')
        wav_folder = os.path.join(base_folder, 'wav')

        assert os.path.isfile(os.path.join(etc_folder, 'README'))

        assert os.path.isfile(os.path.join(wav_folder, 'b0019.wav'))
        assert os.path.isfile(os.path.join(wav_folder, 'b0020.wav'))
        assert os.path.isfile(os.path.join(wav_folder, 'b0021.wav'))
        assert os.path.isfile(os.path.join(wav_folder, 'b0022.wav'))
        assert os.path.isfile(os.path.join(wav_folder, 'b0023.wav'))
        assert os.path.isfile(os.path.join(wav_folder, 'b0024.wav'))
        assert os.path.isfile(os.path.join(wav_folder, 'b0025.wav'))
        assert os.path.isfile(os.path.join(wav_folder, 'b0026.wav'))
        assert os.path.isfile(os.path.join(wav_folder, 'b0027.wav'))
        assert os.path.isfile(os.path.join(wav_folder, 'b0028.wav'))

        assert len(extracted) == 1
        assert os.path.join(tmpdir.strpath, 'voxforge_sample') in extracted
예제 #2
0
파일: test_base.py 프로젝트: ynop/audiomate
def ds():
    ds = resources.create_dataset()

    file_1_path = resources.sample_wav_file('wav_1.wav')
    file_2_path = resources.get_resource_path(
        ('audio_formats', 'mp3_2_44_1k_16b.mp3'))
    file_3_path = resources.get_resource_path(
        ('audio_formats', 'flac_1_16k_16b.flac'))
    file_4_path = resources.sample_wav_file('wav_4.wav')

    ds.tracks['wav-1'].path = file_1_path
    ds.tracks['wav_2'].path = file_2_path
    ds.tracks['wav_3'].path = file_3_path
    ds.tracks['wav_4'].path = file_4_path

    return ds
예제 #3
0
def sample_container():
    container_path = resources.get_resource_path(
        ['sample_files', 'audio_container'])
    sample_container = containers.AudioContainer(container_path)
    sample_container.open()
    yield sample_container
    sample_container.close()
예제 #4
0
    def test_download_two_subsets(self, tmpdir):
        target_folder = tmpdir.strpath
        downloader = io.LibriSpeechDownloader(
            subsets=['dev-clean', 'test-clean'])

        with requests_mock.Mocker() as mock:
            # Return any size (doesn't matter, only for prints)
            for name, url in librispeech.SUBSETS.items():
                data_path = resources.get_resource_path([
                    'sample_archives', 'librispeech', '{}.tar.gz'.format(name)
                ])
                with open(data_path, 'rb') as f:
                    data = f.read()
                mock.head(url, headers={'Content-Length': '100'})
                mock.get(url, content=data)

            downloader.download(target_folder)

        for name in ['dev-clean', 'test-clean']:
            assert os.path.isdir(os.path.join(target_folder, name))

        assert os.path.isfile(os.path.join(target_folder, 'BOOKS.TXT'))
        assert os.path.isfile(os.path.join(target_folder, 'CHAPTERS.TXT'))
        assert os.path.isfile(os.path.join(target_folder, 'LICENSE.TXT'))
        assert os.path.isfile(os.path.join(target_folder, 'README.TXT'))
        assert os.path.isfile(os.path.join(target_folder, 'SPEAKERS.TXT'))
예제 #5
0
def sample_audio_list_tar_bz():
    with open(
            resources.get_resource_path([
                'sample_corpora', 'tatoeba_download',
                'sentences_with_audio.tar.bz2'
            ]), 'rb') as f:
        return f.read()
예제 #6
0
    def test_does_utt_match_target_format_with_invalid_format_returns_false(self):
        file_path = resources.get_resource_path(('audio_formats', 'mp3_2_44_1k_16b.mp3'))
        track = tracks.FileTrack('t', file_path)
        utt = tracks.Utterance('u', track)

        c = conversion.WavAudioFileConverter()
        assert not c._does_utt_match_target_format(utt)
예제 #7
0
def tar_data():
    path = resources.get_resource_path([
        'sample_files',
        'cv_corpus_v1.tar.gz'
    ])

    with open(path, 'rb') as f:
        return f.read()
예제 #8
0
def sample_feature_container():
    container_path = resources.get_resource_path(
        ['sample_files', 'feat_container']
    )
    sample_container = containers.FeatureContainer(container_path)
    sample_container.open()
    yield sample_container
    sample_container.close()
예제 #9
0
    def test_load_word_semicolon_separated(self):
        path = resources.get_resource_path(['separator', 'semicolon.txt'])

        lex = Lexicon.load(path, word_sep=';', token_sep=' ')

        assert lex.get('alpha') == [['a', 'l', 'p', 'h', 'a']]
        assert lex.get('bravo') == [['b', 'r', 'a', 'v', 'o']]
        assert lex.get('charlie') == [['c', 'h', 'a', 'r', 'l', 'i', 'e']]
예제 #10
0
def zip_data():
    path = resources.get_resource_path([
        'sample_files',
        'zip_sample_with_subfolder.zip'
    ])

    with open(path, 'rb') as f:
        return f.read()
예제 #11
0
def create_sample_dataset(temp_dir):
    ds = audiomate.Corpus(str(temp_dir))

    file_1_path = resources.sample_wav_file('wav_1.wav')
    file_2_path = resources.sample_wav_file('wav_2.wav')
    file_3_path = resources.get_resource_path(
        ['audio_formats', 'flac_1_16k_16b.flac'])

    file_1 = ds.new_file(file_1_path, track_idx='wav_1')
    file_2 = ds.new_file(file_2_path, track_idx='wav_2')
    file_3 = ds.new_file(file_3_path, track_idx='wav_3')

    issuer_1 = Speaker('spk-1', gender=Gender.MALE)
    issuer_2 = Speaker('spk-2', gender=Gender.FEMALE)
    issuer_3 = Issuer('spk-3')

    ds.import_issuers([issuer_1, issuer_2, issuer_3])

    # 2.5951875
    utt_1 = ds.new_utterance('utt-1', file_1.idx, issuer_idx=issuer_1.idx)
    utt_2 = ds.new_utterance('utt-2',
                             file_2.idx,
                             issuer_idx=issuer_2.idx,
                             start=0,
                             end=1.5)
    utt_3 = ds.new_utterance('utt-3',
                             file_2.idx,
                             issuer_idx=issuer_2.idx,
                             start=1.5,
                             end=2.5)
    # 5.0416875
    utt_4 = ds.new_utterance('utt-4', file_3.idx, issuer_idx=issuer_3.idx)

    utt_1.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who am i')]))
    utt_2.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who are you')]))
    utt_3.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who is he')]))
    utt_4.set_label_list(
        LabelList(audiomate.corpus.LL_WORD_TRANSCRIPT,
                  labels=[Label('who are they')]))

    train_filter = subview.MatchingUtteranceIdxFilter(
        utterance_idxs={'utt-1', 'utt-2', 'utt-3'})
    sv_train = subview.Subview(ds, filter_criteria=[train_filter])

    dev_filter = subview.MatchingUtteranceIdxFilter(utterance_idxs={'utt-4'})
    sv_dev = subview.Subview(ds, filter_criteria=[dev_filter])

    ds.import_subview('train', sv_train)
    ds.import_subview('dev', sv_dev)

    return ds
예제 #12
0
    def test_write_wav_scp(self, writer, tmpdir):
        ds = resources.create_dataset()
        path = tmpdir.strpath
        writer.save(ds, path)

        content = textfile.read_separated_lines(os.path.join(path, 'wav.scp'),
                                                separator=' ',
                                                max_columns=2)

        wav_base = resources.get_resource_path(['wav_files'])
        wav_base = os.path.abspath(wav_base)

        assert content[0][0] == 'wav-1'
        assert content[0][1] == os.path.join(wav_base, 'wav_1.wav')
        assert content[1][0] == 'wav_2'
        assert content[1][1] == os.path.join(wav_base, 'wav_2.wav')
        assert content[2][0] == 'wav_3'
        assert content[2][1] == os.path.join(wav_base, 'wav_3.wav')
        assert content[3][0] == 'wav_4'
        assert content[3][1] == os.path.join(wav_base, 'wav_4.wav')
예제 #13
0
def sample_tgz_content():
    with open(
            resources.get_resource_path(
                ['sample_files', 'voxforge_sample.tgz']), 'rb') as f:
        return f.read()
예제 #14
0
def sample_response():
    with open(
            resources.get_resource_path(
                ['sample_files', 'voxforge_response.html']), 'r') as f:
        return f.read()
예제 #15
0
def sample_sentence_list_path():
    return resources.get_resource_path(['sample_corpora', 'tatoeba_download', 'sentences.csv'])
예제 #16
0
def sample_feature_container():
    container = assets.FeatureContainer(
        resources.get_resource_path(['sample_files', 'feat_container']))
    container.open()
    yield container
    container.close()
예제 #17
0
def sample_tar_bz2_path():
    return resources.get_resource_path(['sample_files', 'sentences.tar.bz2'])
예제 #18
0
def sample_zip_path():
    return resources.get_resource_path(['sample_files', 'zip_sample.zip'])
예제 #19
0
def sample_audio_content():
    with open(resources.get_resource_path(['wav_files', 'wav_2.wav']), 'rb') as f:
        return f.read()
예제 #20
0
def sample_zip_data():
    with open(resources.get_resource_path(['sample_files', 'zip_sample.zip']),
              'rb') as f:
        return f.read()