Beispiel #1
0
target_dir.mkdir(exist_ok=True)
train_target_dir.mkdir(exist_ok=True)
val_target_dir.mkdir(exist_ok=True)
train_predictions_dir.mkdir(exist_ok=True)
val_predictions_dir.mkdir(exist_ok=True)
config_manager.dump_config()
script_batch_size = 5 * config['batch_size']
val_has_files = len([
    batch_file for batch_file in val_predictions_dir.iterdir()
    if batch_file.suffix == '.npy'
])
train_has_files = len([
    batch_file for batch_file in train_predictions_dir.iterdir()
    if batch_file.suffix == '.npy'
])
model = config_manager.load_model()
running_predictions = args.recompute_pred or (val_has_files
                                              == 0) or (train_has_files == 0)
if running_predictions:
    if args.store_predictions:
        print(
            '\nWARNING: storing predictions can take a lot of disk space (~40GB)\n'
        )
    train_meta = config_manager.train_datadir / 'train_metafile.txt'
    test_meta = config_manager.train_datadir / 'test_metafile.txt'
    train_samples, _ = load_files(
        metafile=str(train_meta),
        meldir=str(meldir),
        num_samples=config['n_samples'])  # (phonemes, mel)
    val_samples, _ = load_files(
        metafile=str(test_meta),
                                  clear_logs=args.clear_logs,
                                  clear_weights=args.clear_weights)
config_manager.dump_config()
config_manager.print_config()

train_samples, _ = load_files(metafile=str(config_manager.train_datadir / 'train_metafile.txt'),
                              meldir=str(config_manager.train_datadir / 'mels'),
                              num_samples=config['n_samples'])  # (phonemes, mel)
val_samples, _ = load_files(metafile=str(config_manager.train_datadir / 'test_metafile.txt'),
                            meldir=str(config_manager.train_datadir / 'mels'),
                            num_samples=config['n_samples'])  # (phonemes, text, mel)

# get model, prepare data for model, create datasets
pretrained = True
if pretrained:
    model = config_manager.load_model(str(config_manager.weights_dir /'ckpt-90'))
else:
    model = config_manager.get_model()

config_manager.compile_model(model)
data_prep = DataPrepper(config=config,
                        tokenizer=model.text_pipeline.tokenizer)

test_list = [data_prep(s) for s in val_samples]
train_dataset = Dataset(samples=train_samples,
                        preprocessor=data_prep,
                        batch_size=config['batch_size'],
                        mel_channels=config['mel_channels'],
                        shuffle=True)
val_dataset = Dataset(samples=val_samples,
                      preprocessor=data_prep,