def prepare_dataset(url, md5sum, target_dir, annotation_path): """Download, unpack and create manifest file.""" data_dir = os.path.join(target_dir, 'data_thchs30') if not os.path.exists(data_dir): filepath = download(url, md5sum, target_dir) unpack(filepath, target_dir) os.remove(filepath) else: print( "Skip downloading and unpacking. THCHS-30 data already exists in %s." % target_dir) create_annotation_text(data_dir, annotation_path)
def prepare_dataset(url, md5sum, target_dir, annotation_path): """Download, unpack and create manifest file.""" data_dir = os.path.join(target_dir, 'ST-CMDS-20170001_1-OS') if not os.path.exists(data_dir): filepath = download(url, md5sum, target_dir) unpack(filepath, target_dir) os.remove(filepath) else: print( "Skip downloading and unpacking. Free ST-Chinese-Mandarin-Corpus data already exists in %s." % target_dir) create_annotation_text(data_dir, annotation_path)
def prepare_dataset(url, md5sum, target_dir, manifest_path): """Download, unpack and create summmary manifest file. """ if not os.path.exists(os.path.join(target_dir, "LibriSpeech")): # download filepath = download(url, md5sum, target_dir) # unpack unpack(filepath, target_dir) else: print("Skip downloading and unpacking. Data already exists in %s." % target_dir) # create manifest json file create_manifest(target_dir, manifest_path)
def prepare_dataset(url, md5sum, target_dir, manifest_path): """Download, unpack and create manifest file.""" data_dir = os.path.join(target_dir, 'data_aishell') if not os.path.exists(data_dir): filepath = download(url, md5sum, target_dir) unpack(filepath, target_dir) # unpack all audio tar files audio_dir = os.path.join(data_dir, 'wav') for subfolder, _, filelist in sorted(os.walk(audio_dir)): for ftar in filelist: unpack(os.path.join(subfolder, ftar), subfolder, True) else: print("Skip downloading and unpacking. Data already exists in %s." % target_dir) create_manifest(data_dir, manifest_path)