Exemple #1
0
    def convert(self, feature, mlpg=True, diff=False):
        windows = delta.DELTA_WINDOWS
        if not mlpg:
            windows = windows[0:1]
        paramgen = MLPG(self.gmm, windows=windows, diff=diff)

        return paramgen.transform(feature)
Exemple #2
0
    def transform(self, XY):
        X, Y = XY
        assert X.ndim == 3 and Y.ndim == 3

        longer_features = X if X.shape[1] > Y.shape[1] else Y

        Xc = X.copy()  # this will be updated iteratively
        X_aligned = np.zeros_like(longer_features)
        Y_aligned = np.zeros_like(longer_features)
        refined_paths = np.empty(len(X), dtype=np.object)

        for idx in range(self.n_iter):
            for idx, (x, y) in enumerate(zip(Xc, Y)):
                x, y = trim_zeros_frames(x), trim_zeros_frames(y)
                dist, path = fastdtw(x, y, radius=self.radius, dist=self.dist)
                dist /= (len(x) + len(y))
                pathx = list(map(lambda l: l[0], path))
                pathy = list(map(lambda l: l[1], path))

                refined_paths[idx] = pathx
                x, y = x[pathx], y[pathy]

                max_len = max(len(x), len(y))
                if max_len > X_aligned.shape[1] or max_len > Y_aligned.shape[1]:
                    pad_size = max(max_len - X_aligned.shape[1],
                                   max_len > Y_aligned.shape[1])
                    X_aligned = np.pad(X_aligned, [(0, 0), (0, pad_size),
                                                   (0, 0)],
                                       mode="constant",
                                       constant_values=0)
                    Y_aligned = np.pad(Y_aligned, [(0, 0), (0, pad_size),
                                                   (0, 0)],
                                       mode="constant",
                                       constant_values=0)

                X_aligned[idx][:len(x)] = x
                Y_aligned[idx][:len(y)] = y
                if self.verbose > 0:
                    print("{}, distance: {}".format(idx, dist))

            # Fit
            gmm = GaussianMixture(n_components=self.n_components_gmm,
                                  covariance_type="full",
                                  max_iter=self.max_iter_gmm)
            XY = np.concatenate((X_aligned, Y_aligned),
                                axis=-1).reshape(-1, X.shape[-1] * 2)
            gmm.fit(XY)
            windows = [(0, 0, np.array([1.0]))]  # no delta
            paramgen = MLPG(gmm, windows=windows)
            for idx in range(len(Xc)):
                x = trim_zeros_frames(Xc[idx])
                Xc[idx][:len(x)] = paramgen.transform(x)

        # Finally we can get aligned X
        for idx in range(len(X_aligned)):
            x = X[idx][refined_paths[idx]]
            X_aligned[idx][:len(x)] = x

        return X_aligned, Y_aligned
Exemple #3
0
def test_one_utt(src_path, tgt_path, disable_mlpg=False, diffvc=True):
    # GMM-based parameter generation is provided by the library in `baseline` module
    if disable_mlpg:
        # Force disable MLPG
        paramgen = MLPG(gmm, windows=[(0, 0, np.array([1.0]))], diff=diffvc)
    else:
        paramgen = MLPG(gmm, windows=windows, diff=diffvc)

    fs, x = wavfile.read(src_path)
    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=frame_period)
    f0 = pyworld.stonemask(x, f0, timeaxis, fs)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)

    pdb.set_trace()

    mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]
    if use_delta:
        mc = delta_features(mc, windows)
    mc = paramgen.transform(mc)
    if disable_mlpg and mc.shape[-1] != static_dim:
        mc = mc[:, :static_dim]
    assert mc.shape[-1] == static_dim
    mc = np.hstack((c0[:, None], mc))
    if diffvc:
        mc[:, 0] = 0  # remove power coefficients
        engine = Synthesizer(MLSADF(order=order, alpha=alpha),
                             hopsize=hop_length)
        b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
        waveform = engine.synthesis(x, b)
    else:
        spectrogram = pysptk.mc2sp(mc.astype(np.float64),
                                   alpha=alpha,
                                   fftlen=fftlen)
        waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs,
                                      frame_period)

    return waveform
Exemple #4
0
def test_gmmmap_swap():
    from nnmnkwii.baseline.gmm import MLPG

    static_dim = 2
    T = 10
    windows = _get_windows_set()[-1]

    np.random.seed(1234)
    src_mc = np.random.rand(T, static_dim * len(windows))
    tgt_mc = np.random.rand(T, static_dim * len(windows))

    # pseudo parallel data
    XY = np.concatenate((src_mc, tgt_mc), axis=-1)
    gmm = GaussianMixture(n_components=4)
    gmm.fit(XY)

    paramgen = MLPG(gmm, windows=windows, swap=False)
    swap_paramgen = MLPG(gmm, windows=windows, swap=True)

    mc_converted1 = paramgen.transform(src_mc)
    mc_converted2 = swap_paramgen.transform(tgt_mc)

    src_mc = src_mc[:, :static_dim]
    tgt_mc = tgt_mc[:, :static_dim]

    assert norm(tgt_mc - mc_converted1) < norm(src_mc - mc_converted1)
    assert norm(tgt_mc - mc_converted2) > norm(src_mc - mc_converted2)
Exemple #5
0
def test_diffvc():
    from nnmnkwii.baseline.gmm import MLPG

    # MLPG is performed dimention by dimention, so static_dim 1 is enough, 2 just for in
    # case.
    static_dim = 2
    T = 10

    for windows in _get_windows_set():
        np.random.seed(1234)
        src_mc = np.random.rand(T, static_dim * len(windows))
        tgt_mc = np.random.rand(T, static_dim * len(windows))

        # pseudo parallel data
        XY = np.concatenate((src_mc, tgt_mc), axis=-1)
        gmm = GaussianMixture(n_components=4)
        gmm.fit(XY)

        paramgen = MLPG(gmm, windows=windows, diff=False)
        diff_paramgen = MLPG(gmm, windows=windows, diff=True)

        mc_converted1 = paramgen.transform(src_mc)
        mc_converted2 = diff_paramgen.transform(src_mc)

        assert mc_converted1.shape == (T, static_dim)
        assert mc_converted2.shape == (T, static_dim)

        src_mc = src_mc[:, :static_dim]
        tgt_mc = tgt_mc[:, :static_dim]
        assert norm(tgt_mc - mc_converted1) < norm(src_mc - mc_converted1)
Exemple #6
0
def test_one_utt(path_src, path_tgt, disable_mlpg=False, diffvc=True):
    if disable_mlpg:
        paramgen = MLPG(gmm, windows=[(0, 0, np.array([1.0]))], diff=diffvc)
    else:
        paramgen = MLPG(gmm, windows=windows, diff=diffvc)

    x, fs_ = sf.read(path_src)
    x = x.astype(np.float64)
    f0, time_axis = pyworld.dio(x, fs_, frame_period=frame_period)
    f0 = pyworld.stonemask(x, f0, time_axis, fs_)
    spectrogram = pyworld.cheaptrick(x, f0, time_axis, fs_)
    aperiodicity = pyworld.d4c(x, f0, time_axis, fs_)

    mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]
    if use_delta:
        mc = delta_features(mc, windows)
    mc = paramgen.transform(mc)
    if disable_mlpg and mc.shape[-1] != static_dim:
        mc = mc[:, :static_dim]
    assert mc.shape[-1] == static_dim
    mc = np.hstack((c0[:, None], mc))
    if diffvc:
        mc[:, 0] = 0
        engine = Synthesizer(MLSADF(order=order, alpha=alpha),
                             hopsize=hop_length)
        b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
        waveform = engine.synthesis(x, b)
    else:
        spectrogram = pysptk.mc2sp(mc.astype(np.float64),
                                   alpha=alpha,
                                   fftlen=fftlen)
        waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs_,
                                      frame_period)

    return waveform
Exemple #7
0
def parameter_generation_MLPG(gmm, added_delta=True, order=2):
	"""
		Parameters generation for transforming MFCCS
	"""

	windows = [
        (0, 0, np.array([1.0])),
    ]

	if added_delta:
		if order == 1:
			windows = [
	        (0, 0, np.array([1.0])),
	        (1, 1, np.array([-0.5, 0.0, 0.5])),
	    ]

		else:
			windows = [
	        (0, 0, np.array([1.0])),
	        (1, 1, np.array([-0.5, 0.0, 0.5])),
	        (1, 1, np.array([1.0, -2.0, 1.0])),
	    ]

	return MLPG(gmm, windows=windows)