def __init__(self, cfg: DictConfig, trainer: 'Trainer' = None): if isinstance(cfg, dict): cfg = OmegaConf.create(cfg) super().__init__(cfg=cfg, trainer=trainer) schema = OmegaConf.structured(FastSpeech2Config) # ModelPT ensures that cfg is a DictConfig, but do this second check in case ModelPT changes if isinstance(cfg, dict): cfg = OmegaConf.create(cfg) elif not isinstance(cfg, DictConfig): raise ValueError(f"cfg was type: {type(cfg)}. Expected either a dict or a DictConfig") # Ensure passed cfg is compliant with schema OmegaConf.merge(cfg, schema) self.pitch = cfg.add_pitch_predictor self.energy = cfg.add_energy_predictor self.duration_coeff = cfg.duration_coeff self.audio_to_melspec_preprocessor = instantiate(self._cfg.preprocessor) self.encoder = instantiate(self._cfg.encoder) self.mel_decoder = instantiate(self._cfg.decoder) self.variance_adapter = instantiate(self._cfg.variance_adaptor) self.loss = L2MelLoss() self.mseloss = torch.nn.MSELoss() self.durationloss = DurationLoss() self.log_train_images = False # Parser and mappings are used for inference only. self.parser = parsers.make_parser(name='en') with open(cfg.mappings_filepath, 'r') as f: mappings = json.load(f) self.word2phones = mappings['word2phones'] self.phone2idx = mappings['phone2idx']
def __init__(self, cfg: DictConfig, trainer: 'Trainer' = None): if isinstance(cfg, dict): cfg = OmegaConf.create(cfg) super().__init__(cfg=cfg, trainer=trainer) self.audio_to_melspec_precessor = instantiate(cfg.preprocessor) self.encoder = instantiate(cfg.encoder) self.variance_adapter = instantiate(cfg.variance_adaptor) self.generator = instantiate(cfg.generator) self.multiperioddisc = MultiPeriodDiscriminator() self.multiscaledisc = MultiScaleDiscriminator() self.melspec_fn = instantiate(cfg.preprocessor, highfreq=None, use_grads=True) self.mel_val_loss = L1MelLoss() self.durationloss = DurationLoss() self.feat_matching_loss = FeatureMatchingLoss() self.disc_loss = DiscriminatorLoss() self.gen_loss = GeneratorLoss() self.mseloss = torch.nn.MSELoss() self.energy = cfg.add_energy_predictor self.pitch = cfg.add_pitch_predictor self.mel_loss_coeff = cfg.mel_loss_coeff self.pitch_loss_coeff = cfg.pitch_loss_coeff self.energy_loss_coeff = cfg.energy_loss_coeff self.splice_length = cfg.splice_length self.use_energy_pred = False self.use_pitch_pred = False self.log_train_images = False self.logged_real_samples = False self._tb_logger = None self.sample_rate = cfg.sample_rate self.hop_size = cfg.hop_size # Parser and mappings are used for inference only. self.parser = parsers.make_parser(name='en') if 'mappings_filepath' in cfg: mappings_filepath = cfg.get('mappings_filepath') else: logging.error( "ERROR: You must specify a mappings.json file in the config file under model.mappings_filepath." ) mappings_filepath = self.register_artifact('mappings_filepath', mappings_filepath) with open(mappings_filepath, 'r') as f: mappings = json.load(f) self.word2phones = mappings['word2phones'] self.phone2idx = mappings['phone2idx']