Esempio n. 1
0
def test_get_static_stream_sizes():
    stream_sizes = [180, 3, 1, 3]
    has_dynamic_features = [True, True, False, True]
    num_windows = 3

    static_stream_sizes = get_static_stream_sizes(stream_sizes, has_dynamic_features, num_windows)
    print(static_stream_sizes)
    assert np.all(static_stream_sizes == [60, 1, 1, 1])
Esempio n. 2
0
def test_get_static_stream_sizes():
    stream_sizes = [180, 3, 1, 3]
    has_dynamic_features = [True, True, False, True]
    num_windows = 3

    static_stream_sizes = get_static_stream_sizes(stream_sizes,
                                                  has_dynamic_features,
                                                  num_windows)
    print(static_stream_sizes)
    assert np.all(static_stream_sizes == [60, 1, 1, 1])
Esempio n. 3
0
def get_selected_static_stream(y_hat_static):
    static_stream_sizes = get_static_stream_sizes(hp.stream_sizes,
                                                  hp.has_dynamic_features,
                                                  len(hp.windows))
    y_hat_selected = select_streams(y_hat_static,
                                    static_stream_sizes,
                                    streams=hp.adversarial_streams)
    # 0-th mgc with adversarial trainging affects speech quality
    # ref: saito17asja_gan.pdf
    if hp.mask_0th_mgc_for_adv_loss:
        assert hp == hparams.tts_acoustic
        y_hat_selected = y_hat_selected[:, :, 1:]
    return y_hat_selected
Esempio n. 4
0
def split_streams(y_static, Y_data_mean, Y_data_std):
    # static domain
    mgc_dim, lf0_dim, vuv_dim, bap_dim = get_static_stream_sizes(
        hp.stream_sizes, hp.has_dynamic_features, len(hp.windows))
    mgc_start_idx = 0
    lf0_start_idx = mgc_dim
    vuv_start_idx = lf0_start_idx + lf0_dim
    bap_start_idx = vuv_start_idx + vuv_dim
    mgc = y_static[:, :, :lf0_start_idx]
    lf0 = y_static[:, :, lf0_start_idx:vuv_start_idx]
    vuv = y_static[:, :, vuv_start_idx]
    bap = y_static[:, :, bap_start_idx:]

    return inv_scale(mgc, lf0, vuv, bap, Y_data_mean, Y_data_std)
Esempio n. 5
0
        X_data_min, X_data_max = P.minmax(X[phase])
        Y_data_mean, Y_data_var = P.meanvar(Y[phase])
        Y_data_std = np.sqrt(Y_data_var)

        np.save(join(data_dir, "X_{}_data_min".format(ty)), X_data_min)
        np.save(join(data_dir, "X_{}_data_max".format(ty)), X_data_max)
        np.save(join(data_dir, "Y_{}_data_mean".format(ty)), Y_data_mean)
        np.save(join(data_dir, "Y_{}_data_var".format(ty)), Y_data_var)

        if hp.generator_params["in_dim"] is None:
            hp.generator_params["in_dim"] = X_data_min.shape[-1]
        if hp.generator_params["out_dim"] is None:
            hp.generator_params["out_dim"] = Y_data_mean.shape[-1]
        if hp.discriminator_params["in_dim"] is None:
            sizes = get_static_stream_sizes(hp.stream_sizes,
                                            hp.has_dynamic_features,
                                            len(hp.windows))
            hp.discriminator_params["in_dim"] = int(np.sum(sizes))
        dataset_loaders = get_tts_data_loaders(X, Y, X_data_min, X_data_max,
                                               Y_data_mean, Y_data_std)

    # Models
    model_g = getattr(gantts.models, hp.generator)(**hp.generator_params)
    model_d = getattr(gantts.models,
                      hp.discriminator)(**hp.discriminator_params)
    print("Generator:", model_g)
    print("Discriminator:", model_d)

    # Reference discriminator model to compute spoofing rate
    if checkpoint_path_r is not None:
        reference_discriminator = getattr(