def prepare_run(args): modified_hp = hparams.parse(args.hparams) os.environ["TF_CPP_MIN_LOG_LEVEL"] = str(args.tf_log_level) run_name = args.name log_dir = os.path.join(args.models_dir, "logs-{}".format(run_name)) os.makedirs(log_dir, exist_ok=True) all_images = get_image_list('train', args.data_root) all_test_images = get_image_list('val', args.data_root) modified_hp.add_hparam('all_images', all_images) modified_hp.add_hparam('all_test_images', all_test_images) ## add speaker-specific parameters modified_hp.add_hparam('fps', int(args.fps)) modified_hp.add_hparam('T', int(args.window_size * args.fps)) modified_hp.add_hparam('mel_step_size', int(args.window_size * 80)) assert (modified_hp.mel_step_size % modified_hp.outputs_per_step == 0),\ 'Mel step size should be a multiple of outputs per step, change either of them to meet this condition' modified_hp.add_hparam( 'max_iters', modified_hp.mel_step_size // modified_hp.outputs_per_step) print('Training on {} hours'.format( len(all_images) / (3600. * modified_hp.fps))) print('Validating on {} hours'.format( len(all_test_images) / (3600. * modified_hp.fps))) return log_dir, modified_hp
def prepare_run(args): modified_hp = hparams.parse(args.hparams) os.environ["TF_CPP_MIN_LOG_LEVEL"] = str(args.tf_log_level) run_name = args.name log_dir = os.path.join(args.models_dir, "logs-{}".format(run_name)) os.makedirs(log_dir, exist_ok=True) infolog.init(os.path.join(log_dir, "Terminal_train_log"), run_name, args.slack_url) return log_dir, modified_hp
def main(): parser = argparse.ArgumentParser( description= "Preprocesses audio files from datasets, encodes them as mel spectrograms " "and writes them to the disk. Audio files are also saved, to be used by the " "vocoder for training.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("datasets_root", type=Path, help=\ "Path to the directory containing your LibriSpeech/TTS datasets.") parser.add_argument("-o", "--out_dir", type=Path, default=argparse.SUPPRESS, help=\ "Path to the output directory that will contain the mel spectrograms, the audios and the " "embeds. Defaults to <datasets_root>/SV2TTS/synthesizer/") parser.add_argument("-n", "--n_processes", type=int, default=8, help=\ "Number of processes in parallel.") parser.add_argument("-s", "--skip_existing", action="store_true", help=\ "Whether to overwrite existing files with the same name. Useful if the preprocessing was " "interrupted.") parser.add_argument("--hparams", type=str, default="", help=\ "Hyperparameter overrides as a comma-separated list of name-value pairs") parser.add_argument("-d", "--datasets", type=str, default="librispeech_other") args = parser.parse_args() args.datasets = args.datasets.split(",") # Process the arguments if not hasattr(args, "out_dir"): args.out_dir = args.datasets_root.joinpath("SV2TTS", "synthesizer") # Create directories assert args.datasets_root.exists() args.out_dir.mkdir(exist_ok=True, parents=True) # Preprocess the dataset print_args(args, parser) args.hparams = hparams.parse(args.hparams) preprocess_func = { "custom": preprocess_custom, "librispeech_other": preprocess_librispeech, } args = vars(args) for dataset in args.pop("datasets"): print("Preprocessing %s" % dataset) preprocess_func[dataset](**args)
def main(): print('initializing preprocessing..') parser = argparse.ArgumentParser() parser.add_argument('--base_dir', default='') parser.add_argument('--hparams', default='', help='Hyperparameter overrides as a comma-separated list of name=value pairs') parser.add_argument('--dataset', default='wave_zlm') parser.add_argument('--language', default='en_US') parser.add_argument('--voice', default='female') parser.add_argument('--reader', default='mary_ann') parser.add_argument('--merge_books', default='False') parser.add_argument('--book', default='northandsouth') parser.add_argument('--output', default='./SV2TTS/training_data_zlm') parser.add_argument('--n_jobs', type=int, default=cpu_count()) args = parser.parse_args() modified_hp = hparams.parse(args.hparams) assert args.merge_books in ('False', 'True') run_preprocess(args, modified_hp)
def main(): class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter): pass parser = argparse.ArgumentParser( description= "Creates ground-truth aligned (GTA) spectrograms from the vocoder.", formatter_class=MyFormatter) parser.add_argument("datasets_root", type=str, help=\ "Path to the directory containing your SV2TTS directory. If you specify both --in_dir and " "--out_dir, this argument won't be used.") parser.add_argument("--model_dir", type=str, default="synthesizer/saved_models/logs-pretrained/", help=\ "Path to the pretrained model directory.") parser.add_argument("-i", "--in_dir", type=str, default=argparse.SUPPRESS, help= \ "Path to the synthesizer directory that contains the mel spectrograms, the wavs and the " "embeds. Defaults to <datasets_root>/SV2TTS/synthesizer/.") parser.add_argument("-o", "--out_dir", type=str, default=argparse.SUPPRESS, help= \ "Path to the output vocoder directory that will contain the ground truth aligned mel " "spectrograms. Defaults to <datasets_root>/SV2TTS/vocoder/.") parser.add_argument( "--hparams", default="", help="Hyperparameter overrides as a comma-separated list of name=value " "pairs") parser.add_argument("-gpuid", "--gpu_id", type=str, default='0', help= \ "Select the GPU to run the code") args = parser.parse_args() print_args(args, parser) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id modified_hp = hparams.parse(args.hparams) if not hasattr(args, "in_dir"): args.in_dir = os.path.join(args.datasets_root, "SV2TTS", "synthesizer") if not hasattr(args, "out_dir"): args.out_dir = os.path.join(args.datasets_root, "SV2TTS", "vocoder") run_synthesis(args.in_dir, args.out_dir, args.model_dir, modified_hp)
"and writes them to the disk. Audio files are also saved, to be used by the " "vocoder for training.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("datasets_root", type=Path, help=\ "Path to the directory containing your LibriSpeech/TTS datasets.") parser.add_argument("-o", "--out_dir", type=Path, default=argparse.SUPPRESS, help=\ "Path to the output directory that will contain the mel spectrograms, the audios and the " "embeds. Defaults to <datasets_root>/SV2TTS/synthesizer/") parser.add_argument("-n", "--n_processes", type=int, default=None, help=\ "Number of processes in parallel.") parser.add_argument("-s", "--skip_existing", action="store_true", help=\ "Whether to overwrite existing files with the same name. Useful if the preprocessing was " "interrupted.") parser.add_argument("--hparams", type=str, default="", help=\ "Hyperparameter overrides as a comma-separated list of name-value pairs") args = parser.parse_args() # Process the arguments if not hasattr(args, "out_dir"): args.out_dir = args.datasets_root.joinpath("SV2TTS", "synthesizer") # Create directories assert args.datasets_root.exists() args.out_dir.mkdir(exist_ok=True, parents=True) # Preprocess the dataset print_args(args, parser) args.hparams = hparams.parse(args.hparams) # preprocess_librispeech(**vars(args)) preprocess_KSponSpeech(**vars(args))
description= "Creates ground-truth aligned (GTA) spectrograms from the vocoder.", formatter_class=MyFormatter) parser.add_argument("datasets_root", type=str, help=\ "Path to the directory containing your SV2TTS directory. If you specify both --in_dir and " "--out_dir, this argument won't be used.") parser.add_argument("--model_dir", type=str, default="synthesizer/saved_models/logs-pretrained/", help=\ "Path to the pretrained model directory.") parser.add_argument("-i", "--in_dir", type=str, default=argparse.SUPPRESS, help= \ "Path to the synthesizer directory that contains the mel spectrograms, the wavs and the " "embeds. Defaults to <datasets_root>/SV2TTS/synthesizer/.") parser.add_argument("-o", "--out_dir", type=str, default=argparse.SUPPRESS, help= \ "Path to the output vocoder directory that will contain the ground truth aligned mel " "spectrograms. Defaults to <datasets_root>/SV2TTS/vocoder/.") parser.add_argument( "--hparams", default="", help="Hyperparameter overrides as a comma-separated list of name=value " "pairs") args = parser.parse_args() print_args(args, parser) modified_hp = hparams.parse(args.hparams) if not hasattr(args, "in_dir"): args.in_dir = os.path.join(args.datasets_root, "SV2TTS", "synthesizer") if not hasattr(args, "out_dir"): args.out_dir = os.path.join(args.datasets_root, "SV2TTS", "vocoder") run_synthesis(args.in_dir, args.out_dir, args.model_dir, modified_hp)