def test_get_static_stream_sizes(): stream_sizes = [180, 3, 1, 3] has_dynamic_features = [True, True, False, True] num_windows = 3 static_stream_sizes = get_static_stream_sizes(stream_sizes, has_dynamic_features, num_windows) print(static_stream_sizes) assert np.all(static_stream_sizes == [60, 1, 1, 1])
def get_selected_static_stream(y_hat_static): static_stream_sizes = get_static_stream_sizes(hp.stream_sizes, hp.has_dynamic_features, len(hp.windows)) y_hat_selected = select_streams(y_hat_static, static_stream_sizes, streams=hp.adversarial_streams) # 0-th mgc with adversarial trainging affects speech quality # ref: saito17asja_gan.pdf if hp.mask_0th_mgc_for_adv_loss: assert hp == hparams.tts_acoustic y_hat_selected = y_hat_selected[:, :, 1:] return y_hat_selected
def split_streams(y_static, Y_data_mean, Y_data_std): # static domain mgc_dim, lf0_dim, vuv_dim, bap_dim = get_static_stream_sizes( hp.stream_sizes, hp.has_dynamic_features, len(hp.windows)) mgc_start_idx = 0 lf0_start_idx = mgc_dim vuv_start_idx = lf0_start_idx + lf0_dim bap_start_idx = vuv_start_idx + vuv_dim mgc = y_static[:, :, :lf0_start_idx] lf0 = y_static[:, :, lf0_start_idx:vuv_start_idx] vuv = y_static[:, :, vuv_start_idx] bap = y_static[:, :, bap_start_idx:] return inv_scale(mgc, lf0, vuv, bap, Y_data_mean, Y_data_std)
X_data_min, X_data_max = P.minmax(X[phase]) Y_data_mean, Y_data_var = P.meanvar(Y[phase]) Y_data_std = np.sqrt(Y_data_var) np.save(join(data_dir, "X_{}_data_min".format(ty)), X_data_min) np.save(join(data_dir, "X_{}_data_max".format(ty)), X_data_max) np.save(join(data_dir, "Y_{}_data_mean".format(ty)), Y_data_mean) np.save(join(data_dir, "Y_{}_data_var".format(ty)), Y_data_var) if hp.generator_params["in_dim"] is None: hp.generator_params["in_dim"] = X_data_min.shape[-1] if hp.generator_params["out_dim"] is None: hp.generator_params["out_dim"] = Y_data_mean.shape[-1] if hp.discriminator_params["in_dim"] is None: sizes = get_static_stream_sizes(hp.stream_sizes, hp.has_dynamic_features, len(hp.windows)) hp.discriminator_params["in_dim"] = int(np.sum(sizes)) dataset_loaders = get_tts_data_loaders(X, Y, X_data_min, X_data_max, Y_data_mean, Y_data_std) # Models model_g = getattr(gantts.models, hp.generator)(**hp.generator_params) model_d = getattr(gantts.models, hp.discriminator)(**hp.discriminator_params) print("Generator:", model_g) print("Discriminator:", model_d) # Reference discriminator model to compute spoofing rate if checkpoint_path_r is not None: reference_discriminator = getattr(