Beispiel #1
0
def prepare_dataset(url, md5sum, target_dir, annotation_path):
    """Download, unpack and create manifest file."""
    data_dir = os.path.join(target_dir, 'data_thchs30')
    if not os.path.exists(data_dir):
        filepath = download(url, md5sum, target_dir)
        unpack(filepath, target_dir)
        os.remove(filepath)
    else:
        print(
            "Skip downloading and unpacking. THCHS-30 data already exists in %s."
            % target_dir)
    create_annotation_text(data_dir, annotation_path)
def prepare_dataset(url, md5sum, target_dir, annotation_path):
    """Download, unpack and create manifest file."""
    data_dir = os.path.join(target_dir, 'ST-CMDS-20170001_1-OS')
    if not os.path.exists(data_dir):
        filepath = download(url, md5sum, target_dir)
        unpack(filepath, target_dir)
        os.remove(filepath)
    else:
        print(
            "Skip downloading and unpacking. Free ST-Chinese-Mandarin-Corpus data already exists in %s."
            % target_dir)
    create_annotation_text(data_dir, annotation_path)
Beispiel #3
0
def prepare_dataset(url, md5sum, target_dir, manifest_path):
    """Download, unpack and create summmary manifest file.
    """
    if not os.path.exists(os.path.join(target_dir, "LibriSpeech")):
        # download
        filepath = download(url, md5sum, target_dir)
        # unpack
        unpack(filepath, target_dir)
    else:
        print("Skip downloading and unpacking. Data already exists in %s." %
              target_dir)
    # create manifest json file
    create_manifest(target_dir, manifest_path)
Beispiel #4
0
def download_and_unpack(target_dir, url):
    wget_args = '-q -l 1 -N -nd -c -e robots=off -A tgz -r -np'
    tgz_dir = os.path.join(target_dir, 'tgz')
    exit_code = download_multi(url, tgz_dir, wget_args)
    if exit_code != 0:
        print('Download tgz audio files failed with exit code %d.' % exit_code)
    else:
        print('Download done, start unpacking ...')
        audio_dir = os.path.join(target_dir, 'audio')
        for root, dirs, files in os.walk(tgz_dir):
            for file in files:
                print(file)
                if file.endswith('.tgz'):
                    unpack(os.path.join(root, file), audio_dir)
def prepare_dataset(url, md5sum, target_dir, manifest_path):
    """Download, unpack and create manifest file."""
    data_dir = os.path.join(target_dir, 'data_aishell')
    if not os.path.exists(data_dir):
        filepath = download(url, md5sum, target_dir)
        unpack(filepath, target_dir)
        # unpack all audio tar files
        audio_dir = os.path.join(data_dir, 'wav')
        for subfolder, _, filelist in sorted(os.walk(audio_dir)):
            for ftar in filelist:
                unpack(os.path.join(subfolder, ftar), subfolder, True)
    else:
        print("Skip downloading and unpacking. Data already exists in %s." %
              target_dir)
    create_manifest(data_dir, manifest_path)