def test_gmmmap_swap(): from nnmnkwii.baseline.gmm import MLPG static_dim = 2 T = 10 windows = _get_windows_set()[-1] np.random.seed(1234) src_mc = np.random.rand(T, static_dim * len(windows)) tgt_mc = np.random.rand(T, static_dim * len(windows)) # pseudo parallel data XY = np.concatenate((src_mc, tgt_mc), axis=-1) gmm = GaussianMixture(n_components=4) gmm.fit(XY) paramgen = MLPG(gmm, windows=windows, swap=False) swap_paramgen = MLPG(gmm, windows=windows, swap=True) mc_converted1 = paramgen.transform(src_mc) mc_converted2 = swap_paramgen.transform(tgt_mc) src_mc = src_mc[:, :static_dim] tgt_mc = tgt_mc[:, :static_dim] assert norm(tgt_mc - mc_converted1) < norm(src_mc - mc_converted1) assert norm(tgt_mc - mc_converted2) > norm(src_mc - mc_converted2)
def test_diffvc(): from nnmnkwii.baseline.gmm import MLPG # MLPG is performed dimention by dimention, so static_dim 1 is enough, 2 just for in # case. static_dim = 2 T = 10 for windows in _get_windows_set(): np.random.seed(1234) src_mc = np.random.rand(T, static_dim * len(windows)) tgt_mc = np.random.rand(T, static_dim * len(windows)) # pseudo parallel data XY = np.concatenate((src_mc, tgt_mc), axis=-1) gmm = GaussianMixture(n_components=4) gmm.fit(XY) paramgen = MLPG(gmm, windows=windows, diff=False) diff_paramgen = MLPG(gmm, windows=windows, diff=True) mc_converted1 = paramgen.transform(src_mc) mc_converted2 = diff_paramgen.transform(src_mc) assert mc_converted1.shape == (T, static_dim) assert mc_converted2.shape == (T, static_dim) src_mc = src_mc[:, :static_dim] tgt_mc = tgt_mc[:, :static_dim] assert norm(tgt_mc - mc_converted1) < norm(src_mc - mc_converted1)
def convert(self, feature, mlpg=True, diff=False): windows = delta.DELTA_WINDOWS if not mlpg: windows = windows[0:1] paramgen = MLPG(self.gmm, windows=windows, diff=diff) return paramgen.transform(feature)
def transform(self, XY): X, Y = XY assert X.ndim == 3 and Y.ndim == 3 longer_features = X if X.shape[1] > Y.shape[1] else Y Xc = X.copy() # this will be updated iteratively X_aligned = np.zeros_like(longer_features) Y_aligned = np.zeros_like(longer_features) refined_paths = np.empty(len(X), dtype=np.object) for idx in range(self.n_iter): for idx, (x, y) in enumerate(zip(Xc, Y)): x, y = trim_zeros_frames(x), trim_zeros_frames(y) dist, path = fastdtw(x, y, radius=self.radius, dist=self.dist) dist /= (len(x) + len(y)) pathx = list(map(lambda l: l[0], path)) pathy = list(map(lambda l: l[1], path)) refined_paths[idx] = pathx x, y = x[pathx], y[pathy] max_len = max(len(x), len(y)) if max_len > X_aligned.shape[1] or max_len > Y_aligned.shape[1]: pad_size = max(max_len - X_aligned.shape[1], max_len > Y_aligned.shape[1]) X_aligned = np.pad(X_aligned, [(0, 0), (0, pad_size), (0, 0)], mode="constant", constant_values=0) Y_aligned = np.pad(Y_aligned, [(0, 0), (0, pad_size), (0, 0)], mode="constant", constant_values=0) X_aligned[idx][:len(x)] = x Y_aligned[idx][:len(y)] = y if self.verbose > 0: print("{}, distance: {}".format(idx, dist)) # Fit gmm = GaussianMixture(n_components=self.n_components_gmm, covariance_type="full", max_iter=self.max_iter_gmm) XY = np.concatenate((X_aligned, Y_aligned), axis=-1).reshape(-1, X.shape[-1] * 2) gmm.fit(XY) windows = [(0, 0, np.array([1.0]))] # no delta paramgen = MLPG(gmm, windows=windows) for idx in range(len(Xc)): x = trim_zeros_frames(Xc[idx]) Xc[idx][:len(x)] = paramgen.transform(x) # Finally we can get aligned X for idx in range(len(X_aligned)): x = X[idx][refined_paths[idx]] X_aligned[idx][:len(x)] = x return X_aligned, Y_aligned
def test_one_utt(src_path, tgt_path, disable_mlpg=False, diffvc=True): # GMM-based parameter generation is provided by the library in `baseline` module if disable_mlpg: # Force disable MLPG paramgen = MLPG(gmm, windows=[(0, 0, np.array([1.0]))], diff=diffvc) else: paramgen = MLPG(gmm, windows=windows, diff=diffvc) fs, x = wavfile.read(src_path) x = x.astype(np.float64) f0, timeaxis = pyworld.dio(x, fs, frame_period=frame_period) f0 = pyworld.stonemask(x, f0, timeaxis, fs) spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs) aperiodicity = pyworld.d4c(x, f0, timeaxis, fs) pdb.set_trace() mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha) c0, mc = mc[:, 0], mc[:, 1:] if use_delta: mc = delta_features(mc, windows) mc = paramgen.transform(mc) if disable_mlpg and mc.shape[-1] != static_dim: mc = mc[:, :static_dim] assert mc.shape[-1] == static_dim mc = np.hstack((c0[:, None], mc)) if diffvc: mc[:, 0] = 0 # remove power coefficients engine = Synthesizer(MLSADF(order=order, alpha=alpha), hopsize=hop_length) b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha) waveform = engine.synthesis(x, b) else: spectrogram = pysptk.mc2sp(mc.astype(np.float64), alpha=alpha, fftlen=fftlen) waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs, frame_period) return waveform
def test_one_utt(path_src, path_tgt, disable_mlpg=False, diffvc=True): if disable_mlpg: paramgen = MLPG(gmm, windows=[(0, 0, np.array([1.0]))], diff=diffvc) else: paramgen = MLPG(gmm, windows=windows, diff=diffvc) x, fs_ = sf.read(path_src) x = x.astype(np.float64) f0, time_axis = pyworld.dio(x, fs_, frame_period=frame_period) f0 = pyworld.stonemask(x, f0, time_axis, fs_) spectrogram = pyworld.cheaptrick(x, f0, time_axis, fs_) aperiodicity = pyworld.d4c(x, f0, time_axis, fs_) mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha) c0, mc = mc[:, 0], mc[:, 1:] if use_delta: mc = delta_features(mc, windows) mc = paramgen.transform(mc) if disable_mlpg and mc.shape[-1] != static_dim: mc = mc[:, :static_dim] assert mc.shape[-1] == static_dim mc = np.hstack((c0[:, None], mc)) if diffvc: mc[:, 0] = 0 engine = Synthesizer(MLSADF(order=order, alpha=alpha), hopsize=hop_length) b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha) waveform = engine.synthesis(x, b) else: spectrogram = pysptk.mc2sp(mc.astype(np.float64), alpha=alpha, fftlen=fftlen) waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs_, frame_period) return waveform