コード例 #1
0
def test_real_metrics():
    _, source = example_file_data_sources_for_acoustic_model()
    X = FileSourceDataset(source)
    lengths = [len(x) for x in X]
    X = X.asarray()

    mgc = X[:, :, :source.mgc_dim // 3]
    lf0 = X[:, :, source.lf0_start_idx]
    vuv = (X[:, :, source.vuv_start_idx] > 0).astype(np.int)
    bap = X[:, :, source.bap_start_idx]

    mgc_tgt = mgc + 0.01
    lf0_tgt = lf0 + 0.01
    vuv_tgt = vuv.copy()
    bap_tgt = bap + 0.01

    mcd = metrics.melcd(mgc, mgc_tgt, lengths)
    bap_mcd = metrics.melcd(bap, bap_tgt, lengths)
    lf0_mse = metrics.lf0_mean_squared_error(lf0, vuv, lf0_tgt, vuv_tgt,
                                             lengths)
    vuv_err = metrics.vuv_error(vuv, vuv_tgt)
    assert mcd > 0
    assert bap_mcd > 0
    assert lf0_mse > 0
    assert vuv_err == 0.0
コード例 #2
0
def compute_distortions(y_static,
                        y_hat_static,
                        Y_data_mean,
                        Y_data_std,
                        lengths=None):
    if hp.name == "acoustic":
        mgc, lf0, vuv, bap = split_streams(y_static, Y_data_mean, Y_data_std)
        mgc_hat, lf0_hat, vuv_hat, bap_hat = split_streams(
            y_hat_static, Y_data_mean, Y_data_std)
        try:
            f0_mse = metrics.lf0_mean_squared_error(lf0,
                                                    vuv,
                                                    lf0_hat,
                                                    vuv_hat,
                                                    lengths=lengths,
                                                    linear_domain=True)
        except ZeroDivisionError:
            f0_mse = np.nan

        distortions = {
            "mcd": metrics.melcd(mgc[:, :, 1:],
                                 mgc_hat[:, :, 1:],
                                 lengths=lengths),
            "bap_mcd": metrics.melcd(bap, bap_hat, lengths=lengths) / 10.0,
            "f0_rmse": np.sqrt(f0_mse),
            "vuv_err": metrics.vuv_error(vuv, vuv_hat, lengths=lengths),
        }
    elif hp.name == "duration":
        y_static_invscale = P.inv_scale(y_static, Y_data_mean, Y_data_std)
        y_hat_static_invscale = P.inv_scale(y_hat_static, Y_data_mean,
                                            Y_data_std)
        distortions = {
            "dur_rmse":
            math.sqrt(
                metrics.mean_squared_error(y_static_invscale,
                                           y_hat_static_invscale,
                                           lengths=lengths))
        }
    elif hp.name == "vc":
        static_dim = hp.order
        y_static_invscale = P.inv_scale(y_static, Y_data_mean[:static_dim],
                                        Y_data_std[:static_dim])
        y_hat_static_invscale = P.inv_scale(y_hat_static,
                                            Y_data_mean[:static_dim],
                                            Y_data_std[:static_dim])
        distortions = {
            "mcd":
            metrics.melcd(y_static_invscale,
                          y_hat_static_invscale,
                          lengths=lengths)
        }
    else:
        assert False

    return distortions
コード例 #3
0
def test_f0_mse():
    np.random.seed(1234)
    T = 100
    x = np.random.rand(T, 1)
    y = x.copy()

    x_vuv = np.hstack((np.zeros(2), np.ones(T - 2)))
    y_vuv = np.hstack((np.ones(T - 2), np.zeros(2)))

    assert metrics.lf0_mean_squared_error(x, x_vuv, y, y_vuv) == 0
    assert metrics.lf0_mean_squared_error(x, x_vuv, y, y_vuv) == 0

    # batch
    x1 = np.random.rand(32, T, 1)
    y1 = np.random.rand(32, T, 1)
    x1_vuv = np.tile(x_vuv, (32, 1))
    y1_vuv = np.tile(x_vuv, (32, 1))
    x2 = torch.rand(32, T, 1)
    y2 = torch.rand(32, T, 1)
    x2_vuv = torch.from_numpy(x1_vuv).clone()
    y2_vuv = torch.from_numpy(y1_vuv).clone()

    f = metrics.lf0_mean_squared_error
    for linear_domain in [True, False]:
        for x, x_vuv, y, y_vuv in [(x1, x1_vuv, y1, y1_vuv),
                                   (x2, x2_vuv, y2, y2_vuv)]:
            lengths = [x.shape[1]] * len(x)
            np.testing.assert_almost_equal(f(x,
                                             x_vuv,
                                             y,
                                             y_vuv,
                                             lengths,
                                             linear_domain=linear_domain),
                                           f(x,
                                             x_vuv,
                                             y,
                                             y_vuv,
                                             linear_domain=linear_domain),
                                           decimal=5)
            assert f(x, x_vuv, y, y_vuv, linear_domain=linear_domain) > 0
コード例 #4
0
ファイル: train_util.py プロジェクト: r9y9/nnsvs
def compute_distortions(pred_out_feats, out_feats, lengths, out_scaler,
                        model_config):
    """Compute distortion measures between predicted and ground-truth acoustic features


    Args:
        pred_out_feats (nn.Tensor): predicted acoustic features
        out_feats (nn.Tensor): ground-truth acoustic features
        lengths (nn.Tensor): lengths of the sequences
        out_scaler (nn.Module): scaler to denormalize features
        model_config (dict): model configuration

    Returns:
        dict: a dict that includes MCD for mgc/bap, V/UV error and F0 RMSE
    """
    out_feats = out_scaler.inverse_transform(out_feats)
    pred_out_feats = out_scaler.inverse_transform(pred_out_feats)
    out_streams = get_static_features(
        out_feats,
        model_config.num_windows,
        model_config.stream_sizes,
        model_config.has_dynamic_features,
    )
    pred_out_streams = get_static_features(
        pred_out_feats,
        model_config.num_windows,
        model_config.stream_sizes,
        model_config.has_dynamic_features,
    )

    assert len(out_streams) >= 4
    mgc, lf0, vuv, bap = out_streams[0], out_streams[1], out_streams[
        2], out_streams[3]
    pred_mgc, pred_lf0, pred_vuv, pred_bap = (
        pred_out_streams[0],
        pred_out_streams[1],
        pred_out_streams[2],
        pred_out_streams[3],
    )

    # binarize vuv
    vuv, pred_vuv = (vuv > 0.5).float(), (pred_vuv > 0.5).float()

    dist = {
        "ObjEval_MGC_MCD":
        metrics.melcd(mgc[:, :, 1:], pred_mgc[:, :, 1:], lengths=lengths),
        "ObjEval_BAP_MCD":
        metrics.melcd(bap, pred_bap, lengths=lengths) / 10.0,
        "ObjEval_VUV_ERR":
        metrics.vuv_error(vuv, pred_vuv, lengths=lengths),
    }

    try:
        f0_mse = metrics.lf0_mean_squared_error(lf0,
                                                vuv,
                                                pred_lf0,
                                                pred_vuv,
                                                lengths=lengths,
                                                linear_domain=True)
        dist["ObjEval_F0_RMSE"] = np.sqrt(f0_mse)
    except ZeroDivisionError:
        pass

    return dist