Exemple #1
0
def _get_model(n_speakers=1, speaker_embed_dim=None):
    model = build_deepvoice3(
        n_vocab=n_vocab,
        embed_dim=256,
        mel_dim=num_mels,
        linear_dim=num_freq,
        r=outputs_per_step,
        padding_idx=padding_idx,
        n_speakers=n_speakers,
        speaker_embed_dim=speaker_embed_dim,
    )
    return model
Exemple #2
0
def build_model():
    model = build_deepvoice3(n_vocab=_frontend.n_vocab,
                             embed_dim=hparams.text_embed_dim,
                             mel_dim=hparams.num_mels,
                             linear_dim=hparams.fft_size // 2 + 1,
                             r=hparams.outputs_per_step,
                             padding_idx=hparams.padding_idx,
                             dropout=hparams.dropout,
                             kernel_size=hparams.kernel_size,
                             encoder_channels=hparams.encoder_channels,
                             decoder_channels=hparams.decoder_channels,
                             converter_channels=hparams.converter_channels,
                             use_memory_mask=hparams.use_memory_mask,
                             trainable_positional_encodings=hparams.trainable_positional_encodings
                             )
    return model
def _get_model(n_speakers=1, speaker_embed_dim=None):
    model = build_deepvoice3(
        n_vocab=n_vocab,
        embed_dim=256,
        mel_dim=num_mels,
        linear_dim=num_freq,
        r=outputs_per_step,
        padding_idx=padding_idx,
        n_speakers=n_speakers,
        speaker_embed_dim=speaker_embed_dim,
        dropout=1 - 0.95,
        kernel_size=5,
        encoder_channels=128,
        decoder_channels=256,
        converter_channels=256,
    )
    return model
    max_decoder_steps = int(args["--max-decoder-steps"])
    file_name_suffix = args["--file-name-suffix"]

    # Override hyper parameters
    hparams.parse(args["--hparams"])
    assert hparams.name == "deepvoice3"

    _frontend = getattr(frontend, hparams.frontend)

    # Model
    model = build_deepvoice3(n_vocab=_frontend.n_vocab,
                             embed_dim=256,
                             mel_dim=hparams.num_mels,
                             linear_dim=hparams.num_freq,
                             r=hparams.outputs_per_step,
                             padding_idx=hparams.padding_idx,
                             dropout=hparams.dropout,
                             kernel_size=hparams.kernel_size,
                             encoder_channels=hparams.encoder_channels,
                             decoder_channels=hparams.decoder_channels,
                             converter_channels=hparams.converter_channels,
                             )

    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint["state_dict"])
    model.decoder.max_decoder_steps = max_decoder_steps
    model.make_generation_fast_()

    os.makedirs(dst_dir, exist_ok=True)

    with open(text_list_file_path, "rb") as f:
        lines = f.readlines()