target_dir.mkdir(exist_ok=True) train_target_dir.mkdir(exist_ok=True) val_target_dir.mkdir(exist_ok=True) train_predictions_dir.mkdir(exist_ok=True) val_predictions_dir.mkdir(exist_ok=True) config_manager.dump_config() script_batch_size = 5 * config['batch_size'] val_has_files = len([ batch_file for batch_file in val_predictions_dir.iterdir() if batch_file.suffix == '.npy' ]) train_has_files = len([ batch_file for batch_file in train_predictions_dir.iterdir() if batch_file.suffix == '.npy' ]) model = config_manager.load_model() running_predictions = args.recompute_pred or (val_has_files == 0) or (train_has_files == 0) if running_predictions: if args.store_predictions: print( '\nWARNING: storing predictions can take a lot of disk space (~40GB)\n' ) train_meta = config_manager.train_datadir / 'train_metafile.txt' test_meta = config_manager.train_datadir / 'test_metafile.txt' train_samples, _ = load_files( metafile=str(train_meta), meldir=str(meldir), num_samples=config['n_samples']) # (phonemes, mel) val_samples, _ = load_files( metafile=str(test_meta),
clear_logs=args.clear_logs, clear_weights=args.clear_weights) config_manager.dump_config() config_manager.print_config() train_samples, _ = load_files(metafile=str(config_manager.train_datadir / 'train_metafile.txt'), meldir=str(config_manager.train_datadir / 'mels'), num_samples=config['n_samples']) # (phonemes, mel) val_samples, _ = load_files(metafile=str(config_manager.train_datadir / 'test_metafile.txt'), meldir=str(config_manager.train_datadir / 'mels'), num_samples=config['n_samples']) # (phonemes, text, mel) # get model, prepare data for model, create datasets pretrained = True if pretrained: model = config_manager.load_model(str(config_manager.weights_dir /'ckpt-90')) else: model = config_manager.get_model() config_manager.compile_model(model) data_prep = DataPrepper(config=config, tokenizer=model.text_pipeline.tokenizer) test_list = [data_prep(s) for s in val_samples] train_dataset = Dataset(samples=train_samples, preprocessor=data_prep, batch_size=config['batch_size'], mel_channels=config['mel_channels'], shuffle=True) val_dataset = Dataset(samples=val_samples, preprocessor=data_prep,