Beispiel #1
0
def test_functional_mlpg():
    static_dim = 2
    T = 5

    for windows in _get_windows_set():
        torch.manual_seed(1234)
        means = torch.rand(T, static_dim * len(windows))
        variances = torch.ones(static_dim * len(windows))

        y = G.mlpg(means.numpy(), variances.numpy(), windows)
        y = Variable(torch.from_numpy(y), requires_grad=False)

        means = Variable(means, requires_grad=True)

        # mlpg
        y_hat = AF.mlpg(means, variances, windows)
        assert np.allclose(y.data.numpy(), y_hat.data.numpy())

        # Test backward pass
        nn.MSELoss()(y_hat, y).backward()

        # unit_variance_mlpg
        R = torch.from_numpy(G.unit_variance_mlpg_matrix(windows, T))
        y_hat = AF.unit_variance_mlpg(R, means)
        assert np.allclose(y.data.numpy(), y_hat.data.numpy())

        nn.MSELoss()(y_hat, y).backward()

        # Test 3D tensor inputs
        y_hat = AF.unit_variance_mlpg(R, means.view(1, -1, means.size(-1)))
        assert np.allclose(
            y.data.numpy(), y_hat.data.view(-1, static_dim).numpy())

        nn.MSELoss()(y_hat.view(-1, static_dim), y).backward()
Beispiel #2
0
def test_multi_stream_mlpg():
    windows = [
        (0, 0, np.array([1.0])),
        (1, 1, np.array([-0.5, 0.0, 0.5])),
        (1, 1, np.array([1.0, -2.0, 1.0])),
    ]
    in_dim = 187
    T = 100
    R = unit_variance_mlpg_matrix(windows, T)
    R = torch.from_numpy(R)

    batch_size = 32
    x = Variable(torch.rand(batch_size, T, in_dim))

    stream_sizes = [180, 3, 1, 3]
    has_dynamic_features = [True, True, False, True]
    y = multi_stream_mlpg(x, R, stream_sizes, has_dynamic_features)
    assert y.size() == (batch_size, T, 60 + 1 + 1 + 1)

    mgc = y[:, :, : 60]
    lf0 = y[:, :, 60]
    vuv = y[:, :, 61]
    bap = y[:, :, 62]

    assert (unit_variance_mlpg(R, x[:, :, : 180]) == mgc).data.all()
    assert (unit_variance_mlpg(R, x[:, :, 180: 180 + 3]).squeeze(-1) == lf0).data.all()
    assert (x[:, :, 183] == vuv).data.all()
    assert (unit_variance_mlpg(R, x[:, :, 184: 184 + 3]).squeeze(-1) == bap).data.all()

    static_features = get_static_features(
        x, len(windows), stream_sizes, has_dynamic_features)
    assert static_features.size() == y.size()
Beispiel #3
0
def test_multi_stream_mlpg():
    windows = [
        (0, 0, np.array([1.0])),
        (1, 1, np.array([-0.5, 0.0, 0.5])),
        (1, 1, np.array([1.0, -2.0, 1.0])),
    ]
    in_dim = 187
    T = 100
    R = unit_variance_mlpg_matrix(windows, T)
    R = torch.from_numpy(R)

    batch_size = 32
    x = Variable(torch.rand(batch_size, T, in_dim))

    stream_sizes = [180, 3, 1, 3]
    has_dynamic_features = [True, True, False, True]
    y = multi_stream_mlpg(x, R, stream_sizes, has_dynamic_features)
    assert y.size() == (batch_size, T, 60 + 1 + 1 + 1)

    mgc = y[:, :, :60]
    lf0 = y[:, :, 60]
    vuv = y[:, :, 61]
    bap = y[:, :, 62]

    assert (unit_variance_mlpg(R, x[:, :, :180]) == mgc).data.all()
    assert (unit_variance_mlpg(R, x[:, :, 180:180 + 3]) == lf0).data.all()
    assert (x[:, :, 183] == vuv).data.all()
    assert (unit_variance_mlpg(R, x[:, :, 184:184 + 3]) == bap).data.all()

    static_features = get_static_features(x, len(windows), stream_sizes,
                                          has_dynamic_features)
    assert static_features.size() == y.size()
Beispiel #4
0
def test_vc_from_path(model, path, data_mean, data_std, diffvc=True):
    model.eval()

    fs, x = wavfile.read(path)
    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period)
    f0 = pyworld.stonemask(x, f0, timeaxis, fs)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)
    alpha = pysptk.util.mcepalpha(fs)
    mc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]
    static_dim = mc.shape[-1]
    mc = P.modspec_smoothing(mc, fs / hop_length, cutoff=50)
    mc = P.delta_features(mc, hp.windows).astype(np.float32)

    T = mc.shape[0]

    inputs = mc[:, :static_dim].copy()

    # Normalization
    mc_scaled = P.scale(mc, data_mean, data_std)

    # Apply model
    mc_scaled = Variable(torch.from_numpy(mc_scaled))
    R = unit_variance_mlpg_matrix(hp.windows, T)
    R = torch.from_numpy(R)
    y_hat, y_hat_static = model(mc_scaled, R)
    mc_static_pred = y_hat_static.data.cpu().numpy().reshape(-1, static_dim)

    # Denormalize
    mc_static_pred = P.inv_scale(mc_static_pred, data_mean[:static_dim],
                                 data_std[:static_dim])

    outputs = mc_static_pred.copy()

    if diffvc:
        mc_static_pred = mc_static_pred - mc[:, :static_dim]

    mc = np.hstack((c0[:, None], mc_static_pred))
    if diffvc:
        mc[:, 0] = 0  # remove power coefficients
        engine = Synthesizer(MLSADF(order=hp.order, alpha=alpha),
                             hopsize=hop_length)
        b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
        waveform = engine.synthesis(x, b)
    else:
        fftlen = pyworld.get_cheaptrick_fft_size(fs)
        spectrogram = pysptk.mc2sp(mc.astype(np.float64),
                                   alpha=alpha,
                                   fftlen=fftlen)
        waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs,
                                      hp.frame_period)

    return waveform, inputs, outputs
Beispiel #5
0
def test_unit_variance_mlpg():
    static_dim = 2
    T = 10

    for windows in _get_windows_set():
        means = np.random.rand(T, static_dim * len(windows))
        variances = np.ones(static_dim * len(windows))
        y = G.mlpg(means, variances, windows)

        R = G.unit_variance_mlpg_matrix(windows, T)
        y_hat = R.dot(G.reshape_means(means, static_dim))
        assert np.allclose(y_hat, y)
Beispiel #6
0
def test_unit_variance_mlpg_gradcheck():
    static_dim = 2
    T = 10

    for windows in _get_windows_set():
        torch.manual_seed(1234)
        # Meens, input for MLPG
        means = Variable(torch.rand(T, static_dim * len(windows)),
                         requires_grad=True)

        # Input for UnitVarianceMLPG
        reshaped_means = G.reshape_means(
            means.data.clone().numpy(), static_dim)
        reshaped_means = Variable(torch.from_numpy(reshaped_means),
                                  requires_grad=True)

        # Compute MLPG matrix
        R = G.unit_variance_mlpg_matrix(windows, T).astype(np.float32)
        R = torch.from_numpy(R)

        # UnitVarianceMLPG can take input with both means and reshaped_means
        y1 = UnitVarianceMLPG(R)(means)
        y2 = UnitVarianceMLPG(R)(reshaped_means)

        # Unit variances
        variances = torch.ones(static_dim * len(windows)
                               ).expand(T, static_dim * len(windows))
        y_hat = MLPG(variances, windows)(means)

        # Make sure UnitVarianceMLPG and MLPG can get same result
        # if we use unit variances
        for y in [y1, y2]:
            assert np.allclose(y.data.numpy(), y_hat.data.numpy())

        # Grad check
        inputs = (reshaped_means,)
        assert gradcheck(UnitVarianceMLPG(R),
                         inputs, eps=1e-3, atol=1e-3)

        inputs = (means,)
        assert gradcheck(UnitVarianceMLPG(R),
                         inputs, eps=1e-3, atol=1e-3)
Beispiel #7
0
def test_model():
    windows = [
        (0, 0, np.array([1.0])),
        (1, 1, np.array([-0.5, 0.0, 0.5])),
    ]

    model = In2OutHighwayNet()
    print(model)
    assert model.include_parameter_generation()

    in_dim = 118
    static_dim = in_dim // 2
    T = 100
    x = Variable(torch.rand(1, T, in_dim))
    R = unit_variance_mlpg_matrix(windows, T)
    R = torch.from_numpy(R)
    _, y = model(x, R)

    print(y.size())
    assert y.size(-1) == static_dim

    # Mini batch
    batch_size = 32
    x = Variable(torch.rand(batch_size, T, in_dim))
    _, y_hat = model(x, R)
    y = Variable(torch.rand(batch_size, T, static_dim), requires_grad=False)

    lengths = [np.random.randint(50, T - 1)
               for _ in range(batch_size - 1)] + [T]
    lengths = Variable(torch.LongTensor(lengths), requires_grad=False)
    print(x.size(), y.size(), lengths.size())
    MaskedMSELoss()(y_hat, y, lengths).backward()
    print(y.size())
    assert y.size(-1) == static_dim
    assert y.size(0) == batch_size

    # cuda
    if torch.cuda.is_available():
        model = model.cuda()
        x = x.cuda()
        R = R.cuda()
        _, y_hat = model(x, R)
Beispiel #8
0
def test_model():
    windows = [
        (0, 0, np.array([1.0])),
        (1, 1, np.array([-0.5, 0.0, 0.5])),
    ]

    model = In2OutHighwayNet()
    print(model)
    assert model.include_parameter_generation()

    in_dim = 118
    static_dim = in_dim // 2
    T = 100
    x = Variable(torch.rand(1, T, in_dim))
    R = unit_variance_mlpg_matrix(windows, T)
    R = torch.from_numpy(R)
    _, y = model(x, R)

    print(y.size())
    assert y.size(-1) == static_dim

    # Mini batch
    batch_size = 32
    x = Variable(torch.rand(batch_size, T, in_dim))
    _, y_hat = model(x, R)
    y = Variable(torch.rand(batch_size, T, static_dim), requires_grad=False)

    lengths = [np.random.randint(50, T - 1) for _ in range(batch_size - 1)] + [T]
    lengths = Variable(torch.LongTensor(lengths), requires_grad=False)
    print(x.size(), y.size(), lengths.size())
    MaskedMSELoss()(y_hat, y, lengths).backward()
    print(y.size())
    assert y.size(-1) == static_dim
    assert y.size(0) == batch_size

    # cuda
    if torch.cuda.is_available():
        model = model.cuda()
        x = x.cuda()
        R = R.cuda()
        _, y_hat = model(x, R)
Beispiel #9
0
def test_minibatch_unit_variance_mlpg_gradcheck():
    static_dim = 2
    T = 5

    for windows in _get_windows_set():
        batch_size = 5
        torch.manual_seed(1234)

        # Prepare inputs
        means = torch.rand(T, static_dim * len(windows))
        means_expanded = means.expand(
            batch_size, means.shape[0], means.shape[1])
        reshaped_means = torch.from_numpy(
            G.reshape_means(means.numpy(), static_dim))
        reshaped_means_expanded = reshaped_means.expand(
            batch_size, reshaped_means.shape[0], reshaped_means.shape[1])

        # Target
        y = G.mlpg(means.numpy(), np.ones(static_dim * len(windows)), windows)
        y = Variable(torch.from_numpy(y), requires_grad=False)
        y_expanded = y.expand(batch_size, y.size(0), y.size(1))

        # Pack into variables
        means = Variable(means, requires_grad=True)
        means_expanded = Variable(means_expanded, requires_grad=True)
        reshaped_means = Variable(reshaped_means, requires_grad=True)
        reshaped_means_expanded = Variable(
            reshaped_means_expanded, requires_grad=True)

        # Case 1: 2d with reshaped means
        R = torch.from_numpy(G.unit_variance_mlpg_matrix(windows, T))
        y_hat1 = AF.unit_variance_mlpg(R, reshaped_means)

        # Case 2: 3d with reshaped means
        y_hat2 = AF.unit_variance_mlpg(R, reshaped_means_expanded)
        for i in range(batch_size):
            assert np.allclose(y_hat1.data.numpy(), y_hat2[i].data.numpy())

        nn.MSELoss()(y_hat1, y).backward()
        nn.MSELoss()(y_hat2, y_expanded).backward()

        # Check grad consistency
        for i in range(batch_size):
            grad1 = reshaped_means.grad.data.numpy()
            grad2 = reshaped_means_expanded.grad[i].data.numpy()
            assert np.allclose(grad1, grad2)

        # Case 3: 2d with non-reshaped input
        y_hat3 = AF.unit_variance_mlpg(R, means)

        # Case 4: 3d with non-reshaped input
        y_hat4 = AF.unit_variance_mlpg(R, means_expanded)

        for i in range(batch_size):
            assert np.allclose(y_hat1.data.numpy(), y_hat3.data.numpy())
            assert np.allclose(y_hat3.data.numpy(), y_hat4[i].data.numpy())

        nn.MSELoss()(y_hat3, y).backward()
        nn.MSELoss()(y_hat4, y_expanded).backward()

        # Check grad consistency
        for i in range(batch_size):
            grad1 = means.grad.data.numpy()
            grad2 = means_expanded.grad[i].data.numpy()
            assert np.allclose(grad1, grad2)
Beispiel #10
0
def train_loop(models,
               optimizers,
               dataset_loaders,
               w_d=0.0,
               mse_w=0.0,
               mge_w=1.0,
               update_d=True,
               update_g=True,
               reference_discriminator=None):
    model_g, model_d = models
    optimizer_g, optimizer_d = optimizers
    if use_cuda:
        model_g, model_d = model_g.cuda(), model_d.cuda()
        if reference_discriminator is not None:
            reference_discriminator = reference_discriminator.cuda()
            reference_discriminator.eval()
    model_g.train()
    model_d.train()

    Y_data_mean = dataset_loaders["train"].dataset.Y_data_mean
    Y_data_std = dataset_loaders["train"].dataset.Y_data_std
    Y_data_mean = torch.from_numpy(Y_data_mean)
    Y_data_std = torch.from_numpy(Y_data_std)
    if use_cuda:
        Y_data_mean = Y_data_mean.cuda()
        Y_data_std = Y_data_std.cuda()

    E_loss_mge = 1
    E_loss_adv = 1
    is_acoustic = hp.name == "acoustic"
    global global_epoch
    for global_epoch in tqdm(range(global_epoch + 1, hp.nepoch + 1)):
        # LR schedule
        if hp.lr_decay_schedule and update_g:
            optimizer_g = exp_lr_scheduler(optimizer_g,
                                           global_epoch - 1,
                                           hp.nepoch,
                                           init_lr=hp.optimizer_g_params["lr"],
                                           lr_decay_epoch=hp.lr_decay_epoch)
        if hp.lr_decay_schedule and update_d:
            optimizer_d = exp_lr_scheduler(optimizer_d,
                                           global_epoch - 1,
                                           hp.nepoch,
                                           init_lr=hp.optimizer_d_params["lr"],
                                           lr_decay_epoch=hp.lr_decay_epoch)

        for phase in ["train", "test"]:
            running_loss = {
                "generator": 0.0,
                "mse": 0.0,
                "mge": 0.0,
                "loss_real_d": 0.0,
                "loss_fake_d": 0.0,
                "loss_adv": 0.0,
                "discriminator": 0.0
            }
            running_metrics = {}
            real_correct_count, fake_correct_count = 0, 0
            regard_fake_as_natural = 0
            N = len(dataset_loaders[phase])
            total_num_frames = 0
            for x, y, lengths in dataset_loaders[phase]:
                # Sort by lengths. This is needed for pytorch's PackedSequence
                sorted_lengths, indices = torch.sort(lengths.view(-1),
                                                     dim=0,
                                                     descending=True)
                sorted_lengths = sorted_lengths.long()
                max_len = sorted_lengths[0]

                # Get sorted batch
                x, y = x[indices], y[indices]

                # MLPG paramgen matrix
                # TODO: create this only if it's needed
                R = unit_variance_mlpg_matrix(hp.windows, max_len)
                R = torch.from_numpy(R)

                if use_cuda:
                    x, y, R = x.cuda(), y.cuda(), R.cuda()
                    sorted_lengths = sorted_lengths.cuda()

                # Pack into variables
                x, y = Variable(x), Variable(y)
                sorted_lengths = Variable(sorted_lengths)

                # Static features
                y_static = get_static_features(y, len(hp.windows),
                                               hp.stream_sizes,
                                               hp.has_dynamic_features)

                # Num frames in batch
                total_num_frames += sorted_lengths.float().sum().data[0]

                # Mask
                mask = sequence_mask(sorted_lengths).unsqueeze(-1)

                # Reset optimizers state
                optimizer_g.zero_grad()
                optimizer_d.zero_grad()

                # Apply model (generator)
                y_hat, y_hat_static = apply_generator(x, R, sorted_lengths)

                # Compute spoofing rate
                if reference_discriminator is not None:
                    if hp.adversarial_streams is not None:
                        y_hat_static_ref = get_selected_static_stream(
                            y_hat_static)
                    else:
                        y_hat_static_ref = y_hat_static
                    target = reference_discriminator(y_hat_static_ref,
                                                     lengths=sorted_lengths)
                    # Count samples classified as natural, while inputs are
                    # actually generated.
                    regard_fake_as_natural += ((target > 0.5).float() *
                                               mask).sum().data[0]

                ### Update discriminator ###
                # Natural: 1, Genrated: 0
                if update_d:
                    loss_d, loss_fake_d, loss_real_d, _real_correct_count,\
                        _fake_correct_count = update_discriminator(
                            model_d, optimizer_d, y_static, y_hat_static,
                            sorted_lengths, mask, phase)
                    running_loss["discriminator"] += loss_d
                    running_loss["loss_fake_d"] += loss_fake_d
                    running_loss["loss_real_d"] += loss_real_d
                    real_correct_count += _real_correct_count
                    fake_correct_count += _fake_correct_count

                ### Update generator ###
                if update_g:
                    adv_w = w_d * float(
                        np.clip(E_loss_mge / E_loss_adv, 0, 1e+3))
                    # update generator $step times for adversarial training
                    # TODO: configuarable
                    step = 2 if update_d and phase == "train" else 1
                    while True:
                        loss_mse, loss_mge, loss_adv, loss_g = update_generator(
                            model_g,
                            model_d,
                            optimizer_g,
                            y,
                            y_hat,
                            y_static,
                            y_hat_static,
                            adv_w,
                            sorted_lengths,
                            mask,
                            phase,
                            mse_w=mse_w,
                            mge_w=mge_w)
                        step -= 1
                        if step <= 0:
                            break
                        # Update outputs
                        y_hat, y_hat_static = apply_generator(
                            x, R, sorted_lengths)

                    running_loss["mse"] += loss_mse
                    running_loss["mge"] += loss_mge
                    running_loss["loss_adv"] += loss_adv
                    running_loss["generator"] += loss_g

                    # Distotions
                    distortions = compute_distortions(y_static.data,
                                                      y_hat_static.data,
                                                      Y_data_mean, Y_data_std,
                                                      sorted_lengths.data)
                    for k, v in distortions.items():
                        try:
                            running_metrics[k] += float(v)
                        except KeyError:
                            running_metrics[k] = float(v)

            # Update expectation
            # NOTE: E_loss_mge is not exactly same as E[L_mge(y,y_hat)]
            # in thier papers, since we add MSE term in the loss.
            # It will be same if mse_w = 0 and mge_w = 1.
            if update_d and update_g and phase == "train":
                E_loss_mge = (mse_w * running_loss["mse"] +
                              mge_w * running_loss["mge"]) / N
                E_loss_adv = running_loss["loss_adv"] / N
                log_value("E(mge)", E_loss_mge, global_epoch)
                log_value("E(adv)", E_loss_adv, global_epoch)
                log_value("MGE/ADV loss weight", E_loss_mge / E_loss_adv,
                          global_epoch)

            # Log loss
            for ty, enabled in [("mse", update_g), ("mge", update_g),
                                ("discriminator", update_d),
                                ("loss_real_d", update_d),
                                ("loss_fake_d", update_d),
                                ("loss_adv", update_g and update_d),
                                ("generator", update_g)]:
                if enabled:
                    ave_loss = running_loss[ty] / N
                    log_value("{} {} loss".format(phase, ty), ave_loss,
                              global_epoch)

            # Log eval metrics
            for k, v in running_metrics.items():
                log_value("{} {} metric".format(phase, k), v / N, global_epoch)

            # Log discriminator classification accuracy
            if update_d:
                log_value("Real {} acc".format(phase),
                          real_correct_count / total_num_frames, global_epoch)
                log_value("Fake {} acc".format(phase),
                          fake_correct_count / total_num_frames, global_epoch)

            # Log spoofing rate for generated features by reference model
            if reference_discriminator is not None:
                log_value("{} spoofing rate".format(phase),
                          regard_fake_as_natural / total_num_frames,
                          global_epoch)

        # Save checkpoints
        if global_epoch % checkpoint_interval == 0:
            for model, optimizer, enabled, name in [
                (model_g, optimizer_g, update_g, "Generator"),
                (model_d, optimizer_d, update_d, "Discriminator")
            ]:
                if enabled:
                    save_checkpoint(model, optimizer, global_epoch,
                                    checkpoint_dir, name)

    return 0
Beispiel #11
0
def benchmark_mlpg(static_dim=59, T=100, batch_size=10, use_cuda=True):
    if use_cuda and not torch.cuda.is_available():
        return

    windows = _get_windows_set()[-1]
    np.random.seed(1234)
    torch.manual_seed(1234)
    means = np.random.rand(T, static_dim * len(windows)).astype(np.float32)
    variances = np.ones(static_dim * len(windows))
    reshaped_means = G.reshape_means(means, static_dim)

    # Ppseud target
    y = G.mlpg(means, variances, windows).astype(np.float32)

    # Pack into variables
    means = Variable(torch.from_numpy(means), requires_grad=True)
    reshaped_means = Variable(torch.from_numpy(reshaped_means),
                              requires_grad=True)
    y = Variable(torch.from_numpy(y), requires_grad=False)
    criterion = nn.MSELoss()

    # Case 1: MLPG
    since = time.time()
    for _ in range(batch_size):
        y_hat = AF.mlpg(means, torch.from_numpy(variances), windows)
        L = criterion(y_hat, y)
        assert np.allclose(y_hat.data.numpy(), y.data.numpy())
        L.backward()  # slow!
    elapsed_mlpg = time.time() - since

    # Case 2: UnitVarianceMLPG
    since = time.time()
    if use_cuda:
        y = y.cuda()
    R = G.unit_variance_mlpg_matrix(windows, T)
    R = torch.from_numpy(R)
    # Assuming minibatch are zero-ppaded, we only need to create MLPG matrix
    # per-minibatch, not per-utterance.
    if use_cuda:
        R = R.cuda()
    for _ in range(batch_size):
        if use_cuda:
            means = means.cpu()
            means = means.cuda()

        y_hat = AF.unit_variance_mlpg(R, means)
        L = criterion(y_hat, y)
        assert np.allclose(y_hat.cpu().data.numpy(),
                           y.cpu().data.numpy(),
                           atol=1e-5)
        L.backward()
    elapsed_unit_variance_mlpg = time.time() - since

    ratio = elapsed_mlpg / elapsed_unit_variance_mlpg

    print(
        "MLPG vs UnitVarianceMLPG (static_dim, T, batch_size, use_cuda) = ({}):"
        .format((static_dim, T, batch_size, use_cuda)))
    if ratio > 1:
        s = "faster"
        sys.stdout.write(OKGREEN)
    else:
        s = "slower"
        sys.stdout.write(FAIL)
    print(
        "UnitVarianceMLPG, {:4f} times {}. Elapsed times {:4f} / {:4f}".format(
            ratio, s, elapsed_mlpg, elapsed_unit_variance_mlpg))

    print(ENDC)
Beispiel #12
0
def test_vc_from_path(model, x, fs, data_mean, data_std, diffvc=True):
    model.eval()

    hop_length = int(fs * (hp.frame_period * 0.001))
    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period)
    f0 = pyworld.stonemask(x, f0, timeaxis, fs)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)
    alpha = pysptk.util.mcepalpha(fs)
    mc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]
    static_dim = mc.shape[-1]
    mc = P.modspec_smoothing(mc, fs / hop_length, cutoff=50)
    mc = P.delta_features(mc, hp.windows).astype(np.float32)

    T = mc.shape[0]

    inputs = mc[:, :static_dim].copy()

    # Normalization
    mc_scaled = P.scale(mc, data_mean, data_std)

    mc_scaled = Variable(torch.from_numpy(mc_scaled))
    lengths = [len(mc_scaled)]

    # Add batch axis
    mc_scaled = mc_scaled.view(1, -1, mc_scaled.size(-1))

    # For MLPG
    R = unit_variance_mlpg_matrix(hp.windows, T)
    R = torch.from_numpy(R)

    # Apply model
    if model.include_parameter_generation():
        # Case: models include parameter generation in itself
        # Mulistream features cannot be used in this case
        y_hat, y_hat_static = model(mc_scaled, R, lengths=lengths)
    else:
        # Case: generic models (can be sequence model)
        assert hp.has_dynamic_features is not None
        y_hat = model(mc_scaled, lengths=lengths)
        y_hat_static = multi_stream_mlpg(
            y_hat, R, hp.stream_sizes, hp.has_dynamic_features)

    mc_static_pred = y_hat_static.data.cpu().numpy().reshape(-1, static_dim)

    # Denormalize
    mc_static_pred = P.inv_scale(
        mc_static_pred, data_mean[:static_dim], data_std[:static_dim])

    outputs = mc_static_pred.copy()

    if diffvc:
        mc_static_pred = mc_static_pred - mc[:, :static_dim]

    mc = np.hstack((c0[:, None], mc_static_pred))
    if diffvc:
        mc[:, 0] = 0  # remove power coefficients
        engine = Synthesizer(MLSADF(order=hp.order, alpha=alpha),
                             hopsize=hop_length)
        b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
        waveform = engine.synthesis(x, b)
    else:
        fftlen = pyworld.get_cheaptrick_fft_size(fs)
        spectrogram = pysptk.mc2sp(
            mc.astype(np.float64), alpha=alpha, fftlen=fftlen)
        waveform = pyworld.synthesize(
            f0, spectrogram, aperiodicity, fs, hp.frame_period)

    return waveform, inputs, outputs
Beispiel #13
0
def test_vc_from_path(model, x, fs, data_mean, data_std, diffvc=True):
    model.eval()

    hop_length = int(fs * (hp.frame_period * 0.001))
    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period)
    f0 = pyworld.stonemask(x, f0, timeaxis, fs)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)
    alpha = pysptk.util.mcepalpha(fs)
    mc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]
    static_dim = mc.shape[-1]
    mc = P.modspec_smoothing(mc, fs / hop_length, cutoff=50)
    mc = P.delta_features(mc, hp.windows).astype(np.float32)

    T = mc.shape[0]

    inputs = mc[:, :static_dim].copy()

    # Normalization
    mc_scaled = P.scale(mc, data_mean, data_std)

    mc_scaled = Variable(torch.from_numpy(mc_scaled))
    lengths = [len(mc_scaled)]

    # Add batch axis
    mc_scaled = mc_scaled.view(1, -1, mc_scaled.size(-1))

    # For MLPG
    R = unit_variance_mlpg_matrix(hp.windows, T)
    R = torch.from_numpy(R)

    # Apply model
    if model.include_parameter_generation():
        # Case: models include parameter generation in itself
        # Mulistream features cannot be used in this case
        y_hat, y_hat_static = model(mc_scaled, R, lengths=lengths)
    else:
        # Case: generic models (can be sequence model)
        assert hp.has_dynamic_features is not None
        y_hat = model(mc_scaled, lengths=lengths)
        y_hat_static = multi_stream_mlpg(y_hat, R, hp.stream_sizes,
                                         hp.has_dynamic_features)

    mc_static_pred = y_hat_static.data.cpu().numpy().reshape(-1, static_dim)

    # Denormalize
    mc_static_pred = P.inv_scale(mc_static_pred, data_mean[:static_dim],
                                 data_std[:static_dim])

    outputs = mc_static_pred.copy()

    if diffvc:
        mc_static_pred = mc_static_pred - mc[:, :static_dim]

    mc = np.hstack((c0[:, None], mc_static_pred))
    if diffvc:
        mc[:, 0] = 0  # remove power coefficients
        engine = Synthesizer(MLSADF(order=hp.order, alpha=alpha),
                             hopsize=hop_length)
        b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
        waveform = engine.synthesis(x, b)
    else:
        fftlen = pyworld.get_cheaptrick_fft_size(fs)
        spectrogram = pysptk.mc2sp(mc.astype(np.float64),
                                   alpha=alpha,
                                   fftlen=fftlen)
        waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs,
                                      hp.frame_period)

    return waveform, inputs, outputs