# If characters are not defined in the config, default characters are passed to the config tokenizer, config = TTSTokenizer.init_from_config(config) # LOAD DATA SAMPLES # Each sample is a list of ```[text, audio_file_path, speaker_name]``` # You can define your custom sample loader returning the list of samples. # Or define your custom formatter and pass it to the `load_tts_samples`. # Check `TTS.tts.datasets.load_tts_samples` for more details. train_samples, eval_samples = load_tts_samples( dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size, ) # INITIALIZE THE MODEL # Models take a config object and a speaker manager as input # Config defines the details of the model like the number of layers, the size of the embedding, etc. # Speaker manager is used by multi-speaker models. model = GlowTTS(config, ap, tokenizer, speaker_manager=None) # INITIALIZE THE TRAINER # Trainer provides a generic API to train all the 🐸TTS models with all its perks like mixed-precision training, # distributed training, etc. trainer = Trainer( TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples ) # AND... 3,2,1... 🚀 trainer.fit()
# If characters are not defined in the config, default characters are passed to the config tokenizer, config = TTSTokenizer.init_from_config(config) # LOAD DATA SAMPLES # Each sample is a list of ```[text, audio_file_path, speaker_name]``` # You can define your custom sample loader returning the list of samples. # Or define your custom formatter and pass it to the `load_tts_samples`. # Check `TTS.tts.datasets.load_tts_samples` for more details. train_samples, eval_samples = load_tts_samples( dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size, ) # init model model = ForwardTTS(config, ap, tokenizer) # INITIALIZE THE TRAINER # Trainer provides a generic API to train all the 🐸TTS models with all its perks like mixed-precision training, # distributed training, etc. trainer = Trainer(TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples) # AND... 3,2,1... 🚀 trainer.fit()
) # download dataset if not already present if not os.path.exists(config.data_path): print("Downloading dataset") download_path = os.path.abspath( os.path.join(os.path.abspath(config.data_path), "../../")) download_thorsten_de(download_path) # init audio processor ap = AudioProcessor(**config.audio.to_dict()) # load training samples eval_samples, train_samples = load_wav_data(config.data_path, config.eval_split_size) # init model model = Wavegrad(config) # init the trainer and 🚀 trainer = Trainer( TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples, training_assets={"audio_processor": ap}, ) trainer.fit()