コード例 #1
0
ファイル: train.py プロジェクト: voxlogic/Lip2Wav
def prepare_run(args):
    modified_hp = hparams.parse(args.hparams)
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = str(args.tf_log_level)
    run_name = args.name
    log_dir = os.path.join(args.models_dir, "logs-{}".format(run_name))
    os.makedirs(log_dir, exist_ok=True)
    all_images = get_image_list('train', args.data_root)
    all_test_images = get_image_list('val', args.data_root)

    modified_hp.add_hparam('all_images', all_images)
    modified_hp.add_hparam('all_test_images', all_test_images)

    ## add speaker-specific parameters
    modified_hp.add_hparam('fps', int(args.fps))
    modified_hp.add_hparam('T', int(args.window_size * args.fps))
    modified_hp.add_hparam('mel_step_size', int(args.window_size * 80))
    assert (modified_hp.mel_step_size % modified_hp.outputs_per_step == 0),\
    'Mel step size should be a multiple of outputs per step, change either of them to meet this condition'

    modified_hp.add_hparam(
        'max_iters', modified_hp.mel_step_size // modified_hp.outputs_per_step)

    print('Training on {} hours'.format(
        len(all_images) / (3600. * modified_hp.fps)))
    print('Validating on {} hours'.format(
        len(all_test_images) / (3600. * modified_hp.fps)))

    return log_dir, modified_hp
コード例 #2
0
def prepare_run(args):
    modified_hp = hparams.parse(args.hparams)
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = str(args.tf_log_level)
    run_name = args.name
    log_dir = os.path.join(args.models_dir, "logs-{}".format(run_name))
    os.makedirs(log_dir, exist_ok=True)
    infolog.init(os.path.join(log_dir, "Terminal_train_log"), run_name, args.slack_url)
    return log_dir, modified_hp
コード例 #3
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Preprocesses audio files from datasets, encodes them as mel spectrograms "
        "and writes them to  the disk. Audio files are also saved, to be used by the "
        "vocoder for training.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("datasets_root", type=Path, help=\
        "Path to the directory containing your LibriSpeech/TTS datasets.")
    parser.add_argument("-o", "--out_dir", type=Path, default=argparse.SUPPRESS, help=\
        "Path to the output directory that will contain the mel spectrograms, the audios and the "
        "embeds. Defaults to <datasets_root>/SV2TTS/synthesizer/")
    parser.add_argument("-n", "--n_processes", type=int, default=8, help=\
        "Number of processes in parallel.")
    parser.add_argument("-s", "--skip_existing", action="store_true", help=\
        "Whether to overwrite existing files with the same name. Useful if the preprocessing was "
        "interrupted.")
    parser.add_argument("--hparams", type=str, default="", help=\
        "Hyperparameter overrides as a comma-separated list of name-value pairs")
    parser.add_argument("-d",
                        "--datasets",
                        type=str,
                        default="librispeech_other")
    args = parser.parse_args()
    args.datasets = args.datasets.split(",")

    # Process the arguments
    if not hasattr(args, "out_dir"):
        args.out_dir = args.datasets_root.joinpath("SV2TTS", "synthesizer")

    # Create directories
    assert args.datasets_root.exists()
    args.out_dir.mkdir(exist_ok=True, parents=True)

    # Preprocess the dataset
    print_args(args, parser)
    args.hparams = hparams.parse(args.hparams)

    preprocess_func = {
        "custom": preprocess_custom,
        "librispeech_other": preprocess_librispeech,
    }
    args = vars(args)

    for dataset in args.pop("datasets"):
        print("Preprocessing %s" % dataset)
        preprocess_func[dataset](**args)
コード例 #4
0
def main():
	print('initializing preprocessing..')
	parser = argparse.ArgumentParser()
	parser.add_argument('--base_dir', default='')
	parser.add_argument('--hparams', default='',
		help='Hyperparameter overrides as a comma-separated list of name=value pairs')
	parser.add_argument('--dataset', default='wave_zlm')
	parser.add_argument('--language', default='en_US')
	parser.add_argument('--voice', default='female')
	parser.add_argument('--reader', default='mary_ann')
	parser.add_argument('--merge_books', default='False')
	parser.add_argument('--book', default='northandsouth')
	parser.add_argument('--output', default='./SV2TTS/training_data_zlm')
	parser.add_argument('--n_jobs', type=int, default=cpu_count())
	args = parser.parse_args()

	modified_hp = hparams.parse(args.hparams)

	assert args.merge_books in ('False', 'True')

	run_preprocess(args, modified_hp)
コード例 #5
0
def main():
    class MyFormatter(argparse.ArgumentDefaultsHelpFormatter,
                      argparse.RawDescriptionHelpFormatter):
        pass

    parser = argparse.ArgumentParser(
        description=
        "Creates ground-truth aligned (GTA) spectrograms from the vocoder.",
        formatter_class=MyFormatter)
    parser.add_argument("datasets_root", type=str, help=\
        "Path to the directory containing your SV2TTS directory. If you specify both --in_dir and "
        "--out_dir, this argument won't be used.")
    parser.add_argument("--model_dir", type=str,
                        default="synthesizer/saved_models/logs-pretrained/", help=\
        "Path to the pretrained model directory.")
    parser.add_argument("-i", "--in_dir", type=str, default=argparse.SUPPRESS, help= \
        "Path to the synthesizer directory that contains the mel spectrograms, the wavs and the "
        "embeds. Defaults to  <datasets_root>/SV2TTS/synthesizer/.")
    parser.add_argument("-o", "--out_dir", type=str, default=argparse.SUPPRESS, help= \
        "Path to the output vocoder directory that will contain the ground truth aligned mel "
        "spectrograms. Defaults to <datasets_root>/SV2TTS/vocoder/.")
    parser.add_argument(
        "--hparams",
        default="",
        help="Hyperparameter overrides as a comma-separated list of name=value "
        "pairs")
    parser.add_argument("-gpuid", "--gpu_id", type=str, default='0', help= \
        "Select the GPU to run the code")
    args = parser.parse_args()
    print_args(args, parser)

    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
    modified_hp = hparams.parse(args.hparams)

    if not hasattr(args, "in_dir"):
        args.in_dir = os.path.join(args.datasets_root, "SV2TTS", "synthesizer")
    if not hasattr(args, "out_dir"):
        args.out_dir = os.path.join(args.datasets_root, "SV2TTS", "vocoder")

    run_synthesis(args.in_dir, args.out_dir, args.model_dir, modified_hp)
コード例 #6
0
        "and writes them to  the disk. Audio files are also saved, to be used by the "
        "vocoder for training.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("datasets_root", type=Path, help=\
        "Path to the directory containing your LibriSpeech/TTS datasets.")
    parser.add_argument("-o", "--out_dir", type=Path, default=argparse.SUPPRESS, help=\
        "Path to the output directory that will contain the mel spectrograms, the audios and the "
        "embeds. Defaults to <datasets_root>/SV2TTS/synthesizer/")
    parser.add_argument("-n", "--n_processes", type=int, default=None, help=\
        "Number of processes in parallel.")
    parser.add_argument("-s", "--skip_existing", action="store_true", help=\
        "Whether to overwrite existing files with the same name. Useful if the preprocessing was "
        "interrupted.")
    parser.add_argument("--hparams", type=str, default="", help=\
        "Hyperparameter overrides as a comma-separated list of name-value pairs")
    args = parser.parse_args()

    # Process the arguments
    if not hasattr(args, "out_dir"):
        args.out_dir = args.datasets_root.joinpath("SV2TTS", "synthesizer")

    # Create directories
    assert args.datasets_root.exists()
    args.out_dir.mkdir(exist_ok=True, parents=True)

    # Preprocess the dataset
    print_args(args, parser)
    args.hparams = hparams.parse(args.hparams)
    # preprocess_librispeech(**vars(args))
    preprocess_KSponSpeech(**vars(args))
コード例 #7
0
        description=
        "Creates ground-truth aligned (GTA) spectrograms from the vocoder.",
        formatter_class=MyFormatter)
    parser.add_argument("datasets_root", type=str, help=\
        "Path to the directory containing your SV2TTS directory. If you specify both --in_dir and "
        "--out_dir, this argument won't be used.")
    parser.add_argument("--model_dir", type=str,
                        default="synthesizer/saved_models/logs-pretrained/", help=\
        "Path to the pretrained model directory.")
    parser.add_argument("-i", "--in_dir", type=str, default=argparse.SUPPRESS, help= \
        "Path to the synthesizer directory that contains the mel spectrograms, the wavs and the "
        "embeds. Defaults to  <datasets_root>/SV2TTS/synthesizer/.")
    parser.add_argument("-o", "--out_dir", type=str, default=argparse.SUPPRESS, help= \
        "Path to the output vocoder directory that will contain the ground truth aligned mel "
        "spectrograms. Defaults to <datasets_root>/SV2TTS/vocoder/.")
    parser.add_argument(
        "--hparams",
        default="",
        help="Hyperparameter overrides as a comma-separated list of name=value "
        "pairs")
    args = parser.parse_args()
    print_args(args, parser)
    modified_hp = hparams.parse(args.hparams)

    if not hasattr(args, "in_dir"):
        args.in_dir = os.path.join(args.datasets_root, "SV2TTS", "synthesizer")
    if not hasattr(args, "out_dir"):
        args.out_dir = os.path.join(args.datasets_root, "SV2TTS", "vocoder")

    run_synthesis(args.in_dir, args.out_dir, args.model_dir, modified_hp)