Esempio n. 1
0
    def from_pretrained(cls, config, checkpoint_path):
        """Build a ConditionalWaveNet model from a pretrained model.

        Parameters
        ----------        
        config: yacs.config.CfgNode
            model configs
        
        checkpoint_path: Path or str
            the path of pretrained model checkpoint, without extension name
        
        Returns
        -------
        ConditionalWaveNet
            The model built from pretrained result.
        """
        model = cls(upsample_factors=config.model.upsample_factors,
                    n_stack=config.model.n_stack,
                    n_loop=config.model.n_loop,
                    residual_channels=config.model.residual_channels,
                    output_dim=config.model.output_dim,
                    n_mels=config.data.n_mels,
                    filter_size=config.model.filter_size,
                    loss_type=config.model.loss_type,
                    log_scale_min=config.model.log_scale_min)
        layer_tools.summary(model)
        checkpoint.load_parameters(model, checkpoint_path=checkpoint_path)
        return model
Esempio n. 2
0
    with dg.guard(place):
        model_config = config["model"]
        upsampling_factors = model_config["upsampling_factors"]
        encoder = UpsampleNet(upsampling_factors)

        n_loop = model_config["n_loop"]
        n_layer = model_config["n_layer"]
        residual_channels = model_config["residual_channels"]
        output_dim = model_config["output_dim"]
        loss_type = model_config["loss_type"]
        log_scale_min = model_config["log_scale_min"]
        decoder = WaveNet(n_loop, n_layer, residual_channels, output_dim,
                          n_mels, filter_size, loss_type, log_scale_min)

        model = ConditionalWavenet(encoder, decoder)
        summary(model)

        # load model parameters
        checkpoint_dir = os.path.join(args.output, "checkpoints")
        if args.checkpoint:
            iteration = io.load_parameters(model,
                                           checkpoint_path=args.checkpoint)
        else:
            iteration = io.load_parameters(model,
                                           checkpoint_dir=checkpoint_dir,
                                           iteration=args.iteration)
        assert iteration > 0, "A trained model is needed."

        # WARNING: don't forget to remove weight norm to re-compute each wrapped layer's weight
        # removing weight norm also speeds up computation
        for layer in model.sublayers():
Esempio n. 3
0
        use_memory_mask = model_config["use_memory_mask"]
        query_position_rate = model_config["query_position_rate"]
        key_position_rate = model_config["key_position_rate"]
        window_backward = model_config["window_backward"]
        window_ahead = model_config["window_ahead"]
        key_projection = model_config["key_projection"]
        value_projection = model_config["value_projection"]
        dv3 = make_model(
            n_speakers, speaker_dim, speaker_embed_std, embed_dim, padding_idx,
            embedding_std, max_positions, n_vocab, freeze_embedding,
            filter_size, encoder_channels, n_mels, decoder_channels, r,
            trainable_positional_encodings, use_memory_mask,
            query_position_rate, key_position_rate, window_backward,
            window_ahead, key_projection, value_projection, downsample_factor,
            linear_dim, use_decoder_states, converter_channels, dropout)
        summary(dv3)

        # =========================loss=========================
        loss_config = config["loss"]
        masked_weight = loss_config["masked_loss_weight"]
        priority_freq = loss_config["priority_freq"]  # Hz
        priority_bin = int(priority_freq / (0.5 * sample_rate) * linear_dim)
        priority_freq_weight = loss_config["priority_freq_weight"]
        binary_divergence_weight = loss_config["binary_divergence_weight"]
        guided_attention_sigma = loss_config["guided_attention_sigma"]
        criterion = TTSLoss(
            masked_weight=masked_weight,
            priority_bin=priority_bin,
            priority_weight=priority_freq_weight,
            binary_divergence_weight=binary_divergence_weight,
            guided_attention_sigma=guided_attention_sigma,