def load(self, t_checkpoint_path, v_checkpoint_path, t_config_path=None, v_config_path=None, model_name='tacotron'): if t_checkpoint_path.endswith('.pt'): self.model_name = 'nvidia' print('Constructing model: %s' % self.model_name) # set-up params hparams = create_hparams() # load model from checkpoint self.model = Tacotron2(hparams) self.model.load_state_dict(torch.load(t_checkpoint_path, map_location='cpu')['state_dict']) _ = self.model.eval() else: # elif t_checkpoint_path.endswith('.pth.tar'): self.model_name = 'coqui' print('Constructing model: %s' % self.model_name) # load tts config and audio processor self.tts_config = load_config(t_config_path) self.tts_model = setup_tts_model(config=self.tts_config) self.tts_model.load_checkpoint(self.tts_config, t_checkpoint_path, eval=True) self.ap = AudioProcessor(verbose=False, **self.tts_config.audio) # load vocoder config and audio processor vocoder_config = load_config(v_config_path) self.vocoder_ap = AudioProcessor(verbose=False, **vocoder_config.audio) # Load neurips MelGAN for mel2audio synthesis self.vocoder = torch.hub.load('descriptinc/melgan-neurips', 'load_melgan') melgan_ckpt = torch.load(v_checkpoint_path, map_location='cpu') self.vocoder.mel2wav.load_state_dict(melgan_ckpt)
def _load_tts(self, tts_checkpoint: str, tts_config_path: str, use_cuda: bool) -> None: """Load the TTS model. 1. Load the model config. 2. Init the model from the config. 3. Load the model weights. 4. Move the model to the GPU if CUDA is enabled. 5. Init the speaker manager in the model. Args: tts_checkpoint (str): path to the model checkpoint. tts_config_path (str): path to the model config file. use_cuda (bool): enable/disable CUDA use. """ # pylint: disable=global-statement self.tts_config = load_config(tts_config_path) if self.tts_config["use_phonemes"] and self.tts_config["phonemizer"] is None: raise ValueError("Phonemizer is not defined in the TTS config.") self.tts_model = setup_tts_model(config=self.tts_config) if not self.encoder_checkpoint: self._set_speaker_encoder_paths_from_tts_config() self.tts_model.load_checkpoint(self.tts_config, tts_checkpoint, eval=True) if use_cuda: self.tts_model.cuda() if self.encoder_checkpoint and hasattr(self.tts_model, "speaker_manager"): self.tts_model.speaker_manager.init_encoder(self.encoder_checkpoint, self.encoder_config, use_cuda)
def _load_tts(self, tts_checkpoint: str, tts_config_path: str, use_cuda: bool) -> None: """Load the TTS model. 1. Load the model config. 2. Init the AudioProcessor. 3. Init the model from the config. 4. Move the model to the GPU if CUDA is enabled. 5. Init the speaker manager for the model. Args: tts_checkpoint (str): path to the model checkpoint. tts_config_path (str): path to the model config file. use_cuda (bool): enable/disable CUDA use. """ # pylint: disable=global-statement self.tts_config = load_config(tts_config_path) self.use_phonemes = self.tts_config.use_phonemes self.ap = AudioProcessor(verbose=False, **self.tts_config.audio) speaker_manager = self._init_speaker_manager() language_manager = self._init_language_manager() if not self.encoder_checkpoint: self._set_speaker_encoder_paths_from_tts_config() speaker_manager = self._init_speaker_encoder(speaker_manager) if language_manager is not None: self.tts_model = setup_tts_model( config=self.tts_config, speaker_manager=speaker_manager, language_manager=language_manager, ) else: self.tts_model = setup_tts_model(config=self.tts_config, speaker_manager=speaker_manager) self.tts_model.load_checkpoint(self.tts_config, tts_checkpoint, eval=True) if use_cuda: self.tts_model.cuda()
def _load_tts(self, tts_checkpoint: str, tts_config_path: str, use_cuda: bool) -> None: """Load the TTS model. Args: tts_checkpoint (str): path to the model checkpoint. tts_config_path (str): path to the model config file. use_cuda (bool): enable/disable CUDA use. """ # pylint: disable=global-statement self.tts_config = load_config(tts_config_path) self.use_phonemes = self.tts_config.use_phonemes self.ap = AudioProcessor(verbose=False, **self.tts_config.audio) self.tts_model = setup_tts_model(config=self.tts_config) self.tts_model.load_checkpoint(self.tts_config, tts_checkpoint, eval=True) if use_cuda: self.tts_model.cuda() self._set_tts_speaker_file()
def _load_tts(self, tts_checkpoint: str, tts_config_path: str, use_cuda: bool) -> None: """Load the TTS model. Args: tts_checkpoint (str): path to the model checkpoint. tts_config_path (str): path to the model config file. use_cuda (bool): enable/disable CUDA use. """ # pylint: disable=global-statement self.tts_config = load_config(tts_config_path) # Patch stats_path stats_path = self.tts_config["audio"].get("stats_path", "") if stats_path and (not os.path.isfile(stats_path)): stats_path = os.path.join(os.path.dirname(tts_checkpoint), os.path.split(stats_path)[1]) self.tts_config["audio"]["stats_path"] = stats_path # Patch speakers file speakers_file = self.tts_config.get("model_args", {}).get("speakers_file", "") if speakers_file and (not os.path.isfile(speakers_file)): speakers_file = os.path.join(os.path.dirname(tts_checkpoint), os.path.split(speakers_file)[1]) self.tts_config["model_args"]["speakers_file"] = speakers_file self.use_phonemes = self.tts_config.use_phonemes self.ap = AudioProcessor(verbose=False, **self.tts_config.audio) self.tts_model = setup_tts_model(config=self.tts_config) self.tts_model.load_checkpoint(self.tts_config, tts_checkpoint, eval=True) if use_cuda: self.tts_model.cuda() self._set_tts_speaker_file()