def main(): """Run `tts` model training directly by a `config.json` file.""" # init trainer args train_args = TrainingArgs() parser = train_args.init_argparse(arg_prefix="") # override trainer args from comman-line args args, config_overrides = parser.parse_known_args() train_args.parse_args(args) # load config.json and register if args.config_path or args.continue_path: if args.config_path: # init from a file config = load_config(args.config_path) if len(config_overrides) > 0: config.parse_known_args(config_overrides, relaxed_parser=True) elif args.continue_path: # continue from a prev experiment config = load_config( os.path.join(args.continue_path, "config.json")) if len(config_overrides) > 0: config.parse_known_args(config_overrides, relaxed_parser=True) else: # init from console args from TTS.config.shared_configs import BaseTrainingConfig # pylint: disable=import-outside-toplevel config_base = BaseTrainingConfig() config_base.parse_known_args(config_overrides) config = register_config(config_base.model)() # load training samples if "feature_path" in config and config.feature_path: # load pre-computed features print(f" > Loading features from: {config.feature_path}") eval_samples, train_samples = load_wav_feat_data( config.data_path, config.feature_path, config.eval_split_size) else: # load data raw wav files eval_samples, train_samples = load_wav_data(config.data_path, config.eval_split_size) # setup audio processor ap = AudioProcessor(**config.audio) # init the model from config model = setup_model(config) # init the trainer and 🚀 trainer = Trainer( train_args, config, config.output_path, model=model, train_samples=train_samples, eval_samples=eval_samples, training_assets={"audio_processor": ap}, parse_command_line_args=False, ) trainer.fit()
def main(): """Run 🐸TTS trainer from terminal. This is also necessary to run DDP training by ```distribute.py```""" args, config, output_path, _, c_logger, dashboard_logger = init_training( sys.argv) trainer = Trainer(args, config, output_path, c_logger, dashboard_logger, cudnn_benchmark=False) trainer.fit()
def main(): try: args, config, output_path, _, c_logger, dashboard_logger = init_training( sys.argv) trainer = Trainer(args, config, output_path, c_logger, dashboard_logger) trainer.fit() except KeyboardInterrupt: remove_experiment_folder(output_path) try: sys.exit(0) except SystemExit: os._exit(0) # pylint: disable=protected-access except Exception: # pylint: disable=broad-except remove_experiment_folder(output_path) traceback.print_exc() sys.exit(1)
optimizer="Adam", lr_scheduler=None, lr=3e-5, ) # init audio processor ap = AudioProcessor(**config.audio.to_dict()) # load training samples train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True) # init speaker manager for multi-speaker training # it mainly handles speaker-id to speaker-name for the model and the data-loader speaker_manager = SpeakerManager() speaker_manager.set_speaker_ids_from_data(train_samples + eval_samples) # init model model = Tacotron2(config, speaker_manager) # init the trainer and 🚀 trainer = Trainer( TrainingArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples, training_assets={"audio_processor": ap}, ) trainer.fit()
def main(): """Run `tts` model training directly by a `config.json` file.""" # init trainer args train_args = TrainingArgs() parser = train_args.init_argparse(arg_prefix="") # override trainer args from comman-line args args, config_overrides = parser.parse_known_args() train_args.parse_args(args) # load config.json and register if args.config_path or args.continue_path: if args.config_path: # init from a file config = load_config(args.config_path) if len(config_overrides) > 0: config.parse_known_args(config_overrides, relaxed_parser=True) elif args.continue_path: # continue from a prev experiment config = load_config( os.path.join(args.continue_path, "config.json")) if len(config_overrides) > 0: config.parse_known_args(config_overrides, relaxed_parser=True) else: # init from console args from TTS.config.shared_configs import BaseTrainingConfig # pylint: disable=import-outside-toplevel config_base = BaseTrainingConfig() config_base.parse_known_args(config_overrides) config = register_config(config_base.model)() # load training samples train_samples, eval_samples = load_tts_samples(config.datasets, eval_split=True) # setup audio processor ap = AudioProcessor(**config.audio) # init speaker manager if check_config_and_model_args(config, "use_speaker_embedding", True): speaker_manager = SpeakerManager(data_items=train_samples + eval_samples) if hasattr(config, "model_args"): config.model_args.num_speakers = speaker_manager.num_speakers else: config.num_speakers = speaker_manager.num_speakers elif check_config_and_model_args(config, "use_d_vector_file", True): if check_config_and_model_args(config, "use_speaker_encoder_as_loss", True): speaker_manager = SpeakerManager( d_vectors_file_path=config.model_args.d_vector_file, encoder_model_path=config.model_args. speaker_encoder_model_path, encoder_config_path=config.model_args. speaker_encoder_config_path, use_cuda=torch.cuda.is_available(), ) else: speaker_manager = SpeakerManager( d_vectors_file_path=get_from_config_or_model_args( config, "d_vector_file")) config.num_speakers = speaker_manager.num_speakers if hasattr(config, "model_args"): config.model_args.num_speakers = speaker_manager.num_speakers else: speaker_manager = None if check_config_and_model_args(config, "use_language_embedding", True): language_manager = LanguageManager(config=config) if hasattr(config, "model_args"): config.model_args.num_languages = language_manager.num_languages else: config.num_languages = language_manager.num_languages else: language_manager = None # init the model from config model = setup_model(config, speaker_manager, language_manager) # init the trainer and 🚀 trainer = Trainer( train_args, config, config.output_path, model=model, train_samples=train_samples, eval_samples=eval_samples, training_assets={"audio_processor": ap}, parse_command_line_args=False, ) trainer.fit()