train_has_files = len([ batch_file for batch_file in train_predictions_dir.iterdir() if batch_file.suffix == '.npy' ]) model = config_manager.load_model() running_predictions = args.recompute_pred or (val_has_files == 0) or (train_has_files == 0) if running_predictions: if args.store_predictions: print( '\nWARNING: storing predictions can take a lot of disk space (~40GB)\n' ) train_meta = config_manager.train_datadir / 'train_metafile.txt' test_meta = config_manager.train_datadir / 'test_metafile.txt' train_samples, _ = load_files( metafile=str(train_meta), meldir=str(meldir), num_samples=config['n_samples']) # (phonemes, mel) val_samples, _ = load_files( metafile=str(test_meta), meldir=str(meldir), num_samples=config['n_samples']) # (phonemes, text, mel) # get model, prepare data for model, create datasets data_prep = DataPrepper(config=config, tokenizer=model.text_pipeline.tokenizer) script_batch_size = 5 * config['batch_size'] # faster parallel computation train_dataset = Dataset(samples=train_samples, preprocessor=data_prep, batch_size=script_batch_size, shuffle=False,
action='store_true', help="deletes weights under this config's folder.") parser.add_argument('--session_name', dest='session_name', default=None) args = parser.parse_args() config_manager = ConfigManager(config_path=args.config, model_kind='autoregressive', session_name=args.session_name) config = config_manager.config config_manager.create_remove_dirs(clear_dir=args.clear_dir, clear_logs=args.clear_logs, clear_weights=args.clear_weights) config_manager.dump_config() config_manager.print_config() train_samples, _ = load_files( metafile=str(config_manager.train_datadir / 'train_metafile.txt'), meldir=str(config_manager.train_datadir / 'mels'), num_samples=config['n_samples']) # (phonemes, mel) val_samples, _ = load_files( metafile=str(config_manager.train_datadir / 'test_metafile.txt'), meldir=str(config_manager.train_datadir / 'mels'), num_samples=config['n_samples']) # (phonemes, text, mel) # get model, prepare data for model, create datasets model = config_manager.get_model() config_manager.compile_model(model) data_prep = DataPrepper(config=config, tokenizer=model.text_pipeline.tokenizer) test_list = [data_prep(s) for s in val_samples] train_dataset = Dataset(samples=train_samples, preprocessor=data_prep, batch_size=config['batch_size'],