def test_taco():
    B, T_out, D_out = 2, 400, 80
    r = 5
    T_encoder = T_out // r

    texts = ["Thank you very much.", "Hello"]
    seqs = [
        np.array(text_to_sequence(t, ["english_cleaners"]), dtype=np.int)
        for t in texts
    ]
    input_lengths = np.array([len(s) for s in seqs])
    max_len = np.max(input_lengths)
    seqs = np.array([_pad(s, max_len) for s in seqs])

    x = torch.LongTensor(seqs)
    y = torch.rand(B, T_out, D_out)
    x = Variable(x)
    y = Variable(y)

    model = Tacotron(n_vocab=len(symbols), r=r)

    print("Encoder input shape: ", x.size())
    print("Decoder input shape: ", y.size())
    a, b, c = model(x, y, input_lengths=input_lengths)
    print("Mel shape:", a.size())
    print("Linear shape:", b.size())
    print("Attention shape:", c.size())

    assert c.size() == (B, T_encoder, max_len)

    # Test greddy decoding
    a, b, c = model(x, input_lengths=input_lengths)
Example #2
0
    # Dataset and Dataloader setup
    dataset = PyTorchDataset(X, Mel, Y)
    data_loader = data_utils.DataLoader(dataset,
                                        batch_size=hparams.batch_size,
                                        num_workers=hparams.num_workers,
                                        shuffle=True,
                                        collate_fn=collate_fn,
                                        pin_memory=hparams.pin_memory)

    # Model
    model = Tacotron(
        n_vocab=len(symbols),
        embedding_dim=256,
        mel_dim=hparams.num_mels,
        linear_dim=hparams.num_freq,
        r=hparams.outputs_per_step,
        padding_idx=hparams.padding_idx,
        use_memory_mask=hparams.use_memory_mask,
    )
    optimizer = optim.Adam(model.parameters(),
                           lr=hparams.initial_learning_rate,
                           betas=(hparams.adam_beta1, hparams.adam_beta2),
                           weight_decay=hparams.weight_decay)

    # Load checkpoint
    if checkpoint_path:
        print("Load checkpoint from: {}".format(checkpoint_path))
        checkpoint = torch.load(checkpoint_path)
        model.load_state_dict(checkpoint["state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer"])
            D -= hp.mask_nth_mgc_for_adv_loss
        if hp.discriminator_linguistic_condition:
            D = D + X_data_min.shape[-1]
        hp.discriminator_params["in_dim"] = D
    
    dataset = PyTorchDataset(X, Mel, Y)
    data_loader = data_utils.DataLoader(
        dataset, batch_size=hparams.batch_size,
        num_workers=hparams.num_workers, shuffle=True,
        collate_fn=collate_fn, pin_memory=hparams.pin_memory)

    # Models
    model_g = Tacotron(n_vocab = len(symbols),
                     embedding_dim = 256,
                     mel_dim = hparams.num_mels,
                     linear_dim = hparams.num_freq,
                     r = hparams.outputs_per_step,
                     padding_idx = hparams.padding_idx,
                     use_memory_mask = hparams.use_memory_mask,
                     )
    model_d = getattr(gantts_models, hp.discriminator)(**hp.discriminator_params)
    print("Generator:", model_g)
    print("Discriminator:", model_d)

    if use_cuda:
        model_g, model_d = model_g.cuda(), model_d.cuda()

    # Optimizers
    optimizer_g = optim.Adam( model.parameters(),
                           lr = hparams.initial_learning_rate, betas = ( hparams.adam_beta1, hparams.adam_beta2 ),
                           weight_decay = hparams.weight_decay )
    optimizer_d = getattr(optim, hp.optimizer_d)(model_d.parameters(),