def preprocess_ljspeech(args): in_dir = os.path.join(args.base_dir, 'LJSpeech-1.0') # out_dir = os.path.join(args.base_dir, args.output) # os.makedirs(out_dir, exist_ok=True) ljspeech.build_from_path(in_dir, args.base_dir, args.output, args.num_workers, tqdm=tqdm)
def main(): print('initializing preprocessing..') parser = argparse.ArgumentParser() parser.add_argument('--base_dir', default='') parser.add_argument( '--hparams', default='', help= 'Hyperparameter overrides as a comma-separated list of name=value pairs' ) parser.add_argument('--dataset', default='MultiSets') parser.add_argument('--output', default='training_data') parser.add_argument('--n_jobs', type=int, default=cpu_count()) args = parser.parse_args() # Prepare directories # in_dir = os.path.join(args.base_dir, args.dataset) # out_dir = os.path.join(args.base_dir, args.output) in_dir = args.base_dir out_dir = args.output mel_dir = os.path.join(out_dir, 'mels') wav_dir = os.path.join(out_dir, 'audio') lin_dir = os.path.join(out_dir, 'linear') os.makedirs(mel_dir, exist_ok=True) os.makedirs(wav_dir, exist_ok=True) os.makedirs(lin_dir, exist_ok=True) # Process dataset if args.dataset == 'LJSpeech-1.1': metadata = ljspeech.build_from_path(hparams, in_dir, mel_dir, lin_dir, wav_dir, args.n_jobs, tqdm=tqdm) elif args.dataset == 'DataBaker': use_prosody = False metadata = databaker.build_from_path(hparams, in_dir, use_prosody, mel_dir, lin_dir, wav_dir, args.n_jobs, tqdm=tqdm) elif args.dataset == 'MultiSets': metadata = multisets.build_from_path(hparams, in_dir, mel_dir, lin_dir, wav_dir, args.n_jobs, tqdm=tqdm) else: raise ValueError('Unsupported dataset provided: {} '.format( args.dataset)) # Write metadata to 'train.txt' for training write_metadata(metadata, out_dir)
def preprocess_ljspeech(args): in_dir = os.path.join(args.base_dir, 'LJSpeech-1.1') # 'LJSpeech-1.1' out_dir = os.path.join(args.base_dir, args.output) # 'training' os.makedirs(out_dir, exist_ok=True) # 경로 존재해도 에러 보이지마마 metadata = ljspeech.build_from_path( in_dir, out_dir, args.num_workers, tqdm=tqdm) write_metadata(metadata, out_dir)
def preprocess_ljspeech(args): in_dir = os.path.join(args.base_dir, 'database/LJSpeech-1.0') out_dir = os.path.join(args.base_dir, args.output) os.makedirs(out_dir, exist_ok=True) metadata = ljspeech.build_from_path(in_dir, out_dir, args.num_workers, tqdm=tqdm) write_metadata(metadata, out_dir)
def preprocess_ljspeech(args, hparams): in_dir = os.path.join(args.base_dir, 'LJSpeech-1.1') out_dir = os.path.join(args.base_dir, args.output) os.makedirs(out_dir, exist_ok=True) metadata = ljspeech.build_from_path(in_dir, out_dir, hparams, args.num_workers, tqdm=tqdm) write_metadata(metadata[:-args.validation_size - args.test_size], out_dir) if args.validation_size > 0: write_validation( metadata[-args.validation_size - args.test_size:-args.test_size], out_dir) if args.test_size > 0: write_validation(metadata[-args.test_size:], out_dir, filename='test.txt')
def test_build_from_path(): in_dir = r"D:\git\tacotron\data" out_dir = r"D:\git\tacotron\data\specs" build_from_path(in_dir=in_dir, out_dir=out_dir)
def preprocess_blizzard(args): in_dir = os.path.join(args.base_dir, 'Blizzard2012') out_dir = os.path.join(args.base_dir, args.output) os.makedirs(out_dir, exist_ok=True) metadata = blizzard.build_from_path(in_dir, out_dir, args.num_workers, tqdm=tqdm) write_metadata(metadata, out_dir) def preprocess_ljspeech(args): in_dir = os.path.join(os.getcwd(), 'LJSpeech-1.0') out_dir = os.path.join(os.getcwd(), args.output) if not os.path.exists(out_dir): os.makedirs(out_dir) # os.makedirs(out_dir, exist_ok=True) metadata = ljspeech.build_from_path(in_dir, out_dir, args.num_workers, tqdm=tqdm) write_metadata(metadata, out_dir) def write_metadata(metadata, out_dir): with open(os.path.join(out_dir, 'train.txt'), 'wb') as f: for m in metadata: f.write('|'.join([str(x) for x in m]) + '\n') frames = sum([m[2] for m in metadata]) hours = frames * hparams.frame_shift_ms / (3600 * 1000) print('Wrote %d utterances, %d frames (%.2f hours)' % (len(metadata), frames, hours)) print('Max input length: %d' % max(len(m[3]) for m in metadata)) print('Max output length: %d' % max(m[2] for m in metadata)) def main():
def main(): print('initializing preprocessing..') parser = argparse.ArgumentParser() parser.add_argument('--base_dir', default='') parser.add_argument( '--hparams', default='', help= 'Hyperparameter overrides as a comma-separated list of name=value pairs' ) parser.add_argument('--dataset', default='chunchun') parser.add_argument('--output', default='training_data') parser.add_argument('--n_jobs', type=int, default=cpu_count()) args = parser.parse_args() modified_hp = hparams.parse(args.hparams) # Prepare directories in_dir = os.path.join(args.base_dir, args.dataset) out_dir = os.path.join(args.base_dir, args.output) mel_dir = os.path.join(out_dir, 'mels') wav_dir = os.path.join(out_dir, 'audio') lin_dir = os.path.join(out_dir, 'linear') os.makedirs(mel_dir, exist_ok=True) os.makedirs(wav_dir, exist_ok=True) os.makedirs(lin_dir, exist_ok=True) # Process dataset metadata = [] if args.dataset == 'chunchun': use_prosody = True in_dir = os.path.join(args.base_dir, 'chunchun/english/chunchun_english_lj') print('processing chunchun CN.../n') metadata_1 = chunchun_EN.build_from_path(modified_hp, 0, 0, in_dir, mel_dir, lin_dir, wav_dir, args.n_jobs, tqdm=tqdm) in_dir = os.path.join(args.base_dir, 'chunchun/chinese/chunchun_8k_all_v4') print('processing chunchun EN.../n') metadata_2 = chunchun_CN.build_from_path_CN(modified_hp, 0, 1, in_dir, use_prosody, mel_dir, lin_dir, wav_dir, args.n_jobs, tqdm=tqdm) metadata = metadata_1 + metadata_2 elif args.dataset == 'all': use_prosody = False in_dir = os.path.join(args.base_dir, 'LJSpeech-1.1') print('processing LJSpeech-1.1.../n') metadata_1 = ljspeech.build_from_path(modified_hp, 0, 0, in_dir, mel_dir, lin_dir, wav_dir, args.n_jobs, tqdm=tqdm) in_dir = os.path.join(args.base_dir, 'DataBaker') print('processing DataBaker.../n') metadata_2 = databaker.build_from_path_CN(modified_hp, 1, 1, in_dir, use_prosody, mel_dir, lin_dir, wav_dir, args.n_jobs, tqdm=tqdm) in_dir = os.path.join(args.base_dir, 'TTS.HUawei.zhcmn.F.Deng') print('processing TTS.HUawei.zhcmn.F.Deng.../n') metadata_3 = Huawei.build_from_path_CN(modified_hp, 2, 1, in_dir, use_prosody, mel_dir, lin_dir, wav_dir, args.n_jobs, tqdm=tqdm) in_dir = os.path.join(args.base_dir, 'TTS.Huawei.enus.F.XuYue') print('processing TTS.Huawei.enus.F.XuYue.../n') metadata_4 = Huawei.build_from_path_EN(modified_hp, 3, 0, in_dir, mel_dir, lin_dir, wav_dir, args.n_jobs, tqdm=tqdm) in_dir = os.path.join(args.base_dir, 'TTS.THCoSS.zhcmn.F.M/TH-CoSS/data/03FR00') print('processing TTS.THCoSS.zhcmn.F.M 03FR00.../n') metadata_5 = thcoss.build_from_path(modified_hp, 4, 1, in_dir, 'a', mel_dir, lin_dir, wav_dir, args.n_jobs, tqdm=tqdm) in_dir = os.path.join(args.base_dir, 'TTS.THCoSS.zhcmn.F.M/TH-CoSS/data/03MR00') print('processing TTS.THCoSS.zhcmn.F.M 03MR00.../n') metadata_6 = thcoss.build_from_path(modified_hp, 5, 1, in_dir, 'b', mel_dir, lin_dir, wav_dir, args.n_jobs, tqdm=tqdm) in_dir = os.path.join(args.base_dir, 'TTS.Pachira.zhcmn.enus.F.DB1/zh-cmn') print('processing TTS.Pachira.zhcmn.enus.F.DB1/zh-cmn.../n') metadata_7 = thcoss.build_from_path_simple(modified_hp, 6, 1, in_dir, mel_dir, lin_dir, wav_dir, args.n_jobs, tqdm=tqdm) in_dir = os.path.join(args.base_dir, 'TTS.DataBaker.enus.M.DB1') print('processing TTS.DataBaker.enus.M.DB1.../n') metadata_8 = databaker.build_from_path_EN(modified_hp, 7, 0, in_dir, 'x', mel_dir, lin_dir, wav_dir, args.n_jobs, tqdm=tqdm) in_dir = os.path.join(args.base_dir, 'TTS.DataBaker.enus.F.DB1') print('processing TTS.DataBaker.enus.F.DB1.../n') metadata_9 = databaker.build_from_path_EN(modified_hp, 8, 0, in_dir, 'y', mel_dir, lin_dir, wav_dir, args.n_jobs, tqdm=tqdm) in_dir = os.path.join(args.base_dir, 'TTS.DataBaker.enus.F.DB2') print('processing TTS.DataBaker.enus.F.DB2.../n') metadata_10 = databaker.build_from_path_EN(modified_hp, 9, 0, in_dir, 'z', mel_dir, lin_dir, wav_dir, args.n_jobs, tqdm=tqdm) metadata = metadata_1 + metadata_2 + metadata_3 + metadata_4 + metadata_5 + metadata_6 + metadata_7 + metadata_8 + metadata_9 + metadata_10 elif args.dataset == 'LJSpeech-1.1': metadata = ljspeech.build_from_path(modified_hp, in_dir, mel_dir, lin_dir, wav_dir, args.n_jobs, tqdm=tqdm) elif args.dataset == 'DataBaker': use_prosody = False metadata = databaker.build_from_path_CN(modified_hp, in_dir, use_prosody, mel_dir, lin_dir, wav_dir, args.n_jobs, tqdm=tqdm) elif args.dataset == 'THCoSS': use_prosody = True metadata = thcoss.build_from_path(modified_hp, in_dir, use_prosody, mel_dir, lin_dir, wav_dir, args.n_jobs, tqdm=tqdm) else: raise ValueError('Unsupported dataset provided: {} '.format( args.dataset)) # Write metadata to 'train.txt' for training write_metadata(metadata, out_dir)
def main(): print('Initializing preprocessing..') parser = argparse.ArgumentParser() parser.add_argument('--indir', required=True) parser.add_argument('--dataset', default='MultiSets') parser.add_argument('--outdir', default='training_data') parser.add_argument('--config_file', default='./datasets/config16k.json') parser.add_argument('--n_jobs', type=int, default=cpu_count()) args = parser.parse_args() # Prepare directories in_dir = args.indir out_dir = args.outdir mel_dir = os.path.join(out_dir, 'mels') wav_dir = os.path.join(out_dir, 'audio') lin_dir = os.path.join(out_dir, 'linear') os.makedirs(mel_dir, exist_ok=True) os.makedirs(wav_dir, exist_ok=True) os.makedirs(lin_dir, exist_ok=True) # Process dataset if args.dataset == 'LJSpeech': metadata = ljspeech.build_from_path(in_dir, mel_dir, lin_dir, wav_dir, args.config_file, args.n_jobs, tqdm=tqdm) elif args.dataset == 'DataBaker': use_prosody = True metadata = databaker.build_from_path(in_dir, use_prosody, mel_dir, lin_dir, wav_dir, args.config_file, args.n_jobs, tqdm=tqdm) elif args.dataset == 'MultiSets': metadata = multisets.build_from_path(in_dir, mel_dir, lin_dir, wav_dir, args.config_file, args.n_jobs, tqdm=tqdm) elif args.dataset == 'AIShell-3': use_prosody = False metadata = aishell3.build_from_path(in_dir, use_prosody, mel_dir, lin_dir, wav_dir, args.config_file, args.n_jobs, tqdm=tqdm) else: raise ValueError('Unsupported dataset provided: {} '.format( args.dataset)) # Write metadata to 'train.txt' for training with open(args.config_file, 'r') as f: sr = json.load(f)["sr"] write_metadata(metadata, out_dir, sr)