Ejemplos de TatoebaDownloader en Python, ejemplos de audiomate.corpus.io.TatoebaDownloader en Python

Ejemplo n.º 1

0

Mostrar archivo

    def test_download_with_filter_license(self, sample_audio_list_tar_bz,
                                          sample_sentence_list_tar_bz,
                                          sample_audio_content, tmpdir):
        downloader = io.TatoebaDownloader(include_licenses=['CC BY-NC-ND 3.0'])

        with requests_mock.Mocker() as mock:
            mock.get(tatoeba.AUDIO_LIST_URL, content=sample_audio_list_tar_bz)
            mock.get(tatoeba.SENTENCE_LIST_URL,
                     content=sample_sentence_list_tar_bz)

            mock.get('https://audio.tatoeba.org/sentences/eng/141.mp3',
                     content=sample_audio_content)
            mock.get('https://audio.tatoeba.org/sentences/fra/247.mp3',
                     content=sample_audio_content)
            mock.get('https://audio.tatoeba.org/sentences/epo/1355.mp3',
                     content=sample_audio_content)
            mock.get('https://audio.tatoeba.org/sentences/deu/6286.mp3',
                     content=sample_audio_content)
            mock.get('https://audio.tatoeba.org/sentences/ita/6921520.mp3',
                     content=sample_audio_content)

            downloader.download(tmpdir.strpath)

            assert os.path.isfile(os.path.join(tmpdir.strpath, 'meta.txt'))

            assert not os.path.isfile(
                os.path.join(tmpdir.strpath, 'audio', 'eng', '141.mp3'))
            assert not os.path.isfile(
                os.path.join(tmpdir.strpath, 'audio', 'fra', '247.mp3'))
            assert not os.path.isfile(
                os.path.join(tmpdir.strpath, 'audio', 'epo', '1355.mp3'))
            assert not os.path.isfile(
                os.path.join(tmpdir.strpath, 'audio', 'deu', '6286.mp3'))
            assert os.path.isfile(
                os.path.join(tmpdir.strpath, 'audio', 'ita', '6921520.mp3'))

Ejemplo n.º 2

0

Mostrar archivo

Archivo: test_tatoeba.py Proyecto: toddrme2178/audiomate

    def test_load_audio_list_filter_license(self, sample_audio_list_path):
        downloader = io.TatoebaDownloader(include_licenses=['CC BY-NC 4.0'])
        entries = downloader._load_audio_list(sample_audio_list_path)

        assert len(entries) == 3

        assert entries['247'] == ['gretelen', 'CC BY-NC 4.0', None]
        assert entries['6286'] == ['Phoenix', 'CC BY-NC 4.0', None]
        assert entries['2952354'] == ['pencil', 'CC BY-NC 4.0', None]

Ejemplo n.º 3

0

Mostrar archivo

Archivo: test_tatoeba.py Proyecto: toddrme2178/audiomate

    def test_load_audio_list(self, sample_audio_list_path):
        downloader = io.TatoebaDownloader()
        entries = downloader._load_audio_list(sample_audio_list_path)

        assert len(entries) == 5

        assert entries['247'] == ['gretelen', 'CC BY-NC 4.0', None]
        assert entries['1881'] == ['CK', 'CC BY-NC-ND 3.0', 'http://www.manythings.org/tatoeba']
        assert entries['6286'] == ['Phoenix', 'CC BY-NC 4.0', None]
        assert entries['2952354'] == ['pencil', 'CC BY-NC 4.0', None]
        assert entries['6921520'] == ['CK', 'CC BY-NC-ND 3.0', 'http://www.manythings.org/tatoeba']

Ejemplo n.º 4

0

Mostrar archivo

Archivo: test_tatoeba.py Proyecto: toddrme2178/audiomate

    def test_load_sentence_list_filter_languages(self, sample_sentence_list_path):
        downloader = io.TatoebaDownloader(include_languages=['deu', 'eng'])
        entries = downloader._load_sentence_list(sample_sentence_list_path)

        assert len(entries) == 5

        assert entries['141'] == ['eng', 'I want you to tell me why you did that.']
        assert entries['511'] == ['deu', 'Wer will heiße Schokolade?']
        assert entries['524'] == ['deu', 'Das ist zu teuer!']
        assert entries['6286'] == ['deu', 'Ich denke, ich habe genug gehört.']
        assert entries['299609'] == ['eng', 'He washes his car at least once a week.']

Ejemplo n.º 5

0

Mostrar archivo

Archivo: test_tatoeba.py Proyecto: toddrme2178/audiomate

    def test_load_audio_list_all(self, sample_audio_list_path):
        downloader = io.TatoebaDownloader(include_empty_licence=True)
        entries = downloader._load_audio_list(sample_audio_list_path)

        assert len(entries) == 7

        assert entries['141'] == ['BraveSentry', None, None]
        assert entries['247'] == ['gretelen', 'CC BY-NC 4.0', None]
        assert entries['1355'] == ['Nero', None, None]
        assert entries['1881'] == ['CK', 'CC BY-NC-ND 3.0', 'http://www.manythings.org/tatoeba']
        assert entries['6286'] == ['Phoenix', 'CC BY-NC 4.0', None]
        assert entries['2952354'] == ['pencil', 'CC BY-NC 4.0', None]
        assert entries['6921520'] == ['CK', 'CC BY-NC-ND 3.0', 'http://www.manythings.org/tatoeba']

Ejemplo n.º 6

0

Mostrar archivo

Archivo: test_tatoeba.py Proyecto: toddrme2178/audiomate

    def test_load_sentence_list(self, sample_sentence_list_path):
        downloader = io.TatoebaDownloader()
        entries = downloader._load_sentence_list(sample_sentence_list_path)

        assert len(entries) == 8

        assert entries['141'] == ['eng', 'I want you to tell me why you did that.']
        assert entries['247'] == ['fra', 'Comment ça, je suis trop vieille pour ce poste ?']
        assert entries['511'] == ['deu', 'Wer will heiße Schokolade?']
        assert entries['524'] == ['deu', 'Das ist zu teuer!']
        assert entries['1355'] == ['epo', 'Mi panikis la homojn.']
        assert entries['6286'] == ['deu', 'Ich denke, ich habe genug gehört.']
        assert entries['299609'] == ['eng', 'He washes his car at least once a week.']
        assert entries['6921520'] == ['ita', 'Ho una zia che abita a Osaka.']

Ejemplo n.º 7

0

Mostrar archivo

Archivo: download_data.py Proyecto: DanBmh/deepspeech-german

def main():
    parser = argparse.ArgumentParser(description="Prepare data for training.")
    parser.add_argument("target_path", type=str)
    parser.add_argument("--tuda", action="store_true")
    parser.add_argument("--voxforge", action="store_true")
    parser.add_argument("--swc", action="store_true")
    parser.add_argument("--mailabs", action="store_true")
    parser.add_argument("--common_voice", action="store_true")
    parser.add_argument("--tatoeba", action="store_true")
    parser.add_argument("--zamia_speech", action="store_true")
    args = parser.parse_args()

    if args.tuda:
        print("Downloading tuda ...")
        dl = io.TudaDownloader()
        dl.download(os.path.join(args.target_path, "tuda"))

    if args.voxforge:
        print("Downloading voxforge ...")
        dl = io.VoxforgeDownloader(lang="de")
        dl.download(os.path.join(args.target_path, "voxforge"))

    if args.swc:
        print("Downloading swc ...")
        dl = io.SWCDownloader(lang="de")
        dl.download(os.path.join(args.target_path, "swc"))

    if args.mailabs:
        print("Downloading mailabs ...")
        dl = io.MailabsDownloader(tags=["de_DE"])
        dl.download(os.path.join(args.target_path, "mailabs"))

    if args.common_voice:
        print("Downloading common-voice ...")
        dl = io.CommonVoiceDownloader(lang="de")
        dl.download(os.path.join(args.target_path, "common_voice"))

    if args.tatoeba:
        print("Downloading tatoeba ...")
        dl = io.TatoebaDownloader(include_languages=["deu"])
        dl.download(os.path.join(args.target_path, "tatoeba"))

    if args.zamia_speech:
        print("Downloading zamia-speech ...")
        dl = io.ZamiaSpeechDownloader(lang="de")
        dl.download(os.path.join(args.target_path, "zamia_speech"))

Ejemplo n.º 8

0

Mostrar archivo

Archivo: test_tatoeba.py Proyecto: ynop/audiomate

    def test_download_with_filter_lang(self, sample_audio_list_tar_bz,
                                       sample_sentence_list_tar_bz,
                                       sample_audio_content, tmpdir):
        downloader = io.TatoebaDownloader(include_languages=['deu', 'eng'])

        with requests_mock.Mocker() as mock:
            # Return any size (doesn't matter, only for prints)
            mock.head(requests_mock.ANY, headers={'Content-Length': '100'})

            mock.get(tatoeba.AUDIO_LIST_URL, content=sample_audio_list_tar_bz)
            mock.get(tatoeba.SENTENCE_LIST_URL,
                     content=sample_sentence_list_tar_bz)

            mock.get('https://audio.tatoeba.org/sentences/eng/141.mp3',
                     content=sample_audio_content)
            mock.get('https://audio.tatoeba.org/sentences/fra/247.mp3',
                     content=sample_audio_content)
            mock.get('https://audio.tatoeba.org/sentences/epo/1355.mp3',
                     content=sample_audio_content)
            mock.get('https://audio.tatoeba.org/sentences/deu/6286.mp3',
                     content=sample_audio_content)
            mock.get('https://audio.tatoeba.org/sentences/ita/6921520.mp3',
                     content=sample_audio_content)

            downloader.download(tmpdir.strpath)

            assert os.path.isfile(os.path.join(tmpdir.strpath, 'meta.txt'))

            assert not os.path.isfile(
                os.path.join(tmpdir.strpath, 'audio', 'eng', '141.mp3'))
            assert not os.path.isfile(
                os.path.join(tmpdir.strpath, 'audio', 'fra', '247.mp3'))
            assert not os.path.isfile(
                os.path.join(tmpdir.strpath, 'audio', 'epo', '1355.mp3'))
            assert os.path.isfile(
                os.path.join(tmpdir.strpath, 'audio', 'deu', '6286.mp3'))
            assert not os.path.isfile(
                os.path.join(tmpdir.strpath, 'audio', 'ita', '6921520.mp3'))