def add_delta_features(mfcc, order=2): """ Extracts the 1st or 2nd order delta features from MFCCs """ windows = [] if order == 1: windows = [ (0, 0, np.array([1.0])), (1, 1, np.array([-0.5, 0.0, 0.5])), ] else: windows = [ (0, 0, np.array([1.0])), (1, 1, np.array([-0.5, 0.0, 0.5])), (1, 1, np.array([1.0, -2.0, 1.0])), ] return apply_each2d_trim(delta_features, mfcc, windows)
def test_function_utils(): def dummmy_func2d(x): return x + 1 T, D = 10, 24 np.random.seed(1234) X = np.random.rand(2, T, D) lengths = [60, 100] # Paddd case Y = apply_each2d_padded(dummmy_func2d, X, lengths) for i, l in enumerate(lengths): assert np.allclose(X[i][:l] + 1, Y[i][:l]) assert np.all(Y[i][l:] == 0) # Trim for i, l in enumerate(lengths): X[i][l:] = 0 Y = apply_each2d_trim(dummmy_func2d, X) for i, l in enumerate(lengths): assert np.allclose(X[i][:l] + 1, Y[i][:l]) assert np.all(Y[i][l:] == 0)
# # idx = 22 # any # plot_parallel(X[idx],Y[idx]) # Alignment X_aligned, Y_aligned = DTWAligner(verbose=0, dist=melcd).transform((X, Y)) # plot_parallel(X_aligned[idx],Y_aligned[idx]) # Drop 1st (power) dimension X_aligned, Y_aligned = X_aligned[:, :, 1:], Y_aligned[:, :, 1:] # Append delta features static_dim = X_aligned.shape[-1] if use_delta: X_aligned = apply_each2d_trim(delta_features, X_aligned, windows) Y_aligned = apply_each2d_trim(delta_features, Y_aligned, windows) # plot_parallel(X_aligned[idx],Y_aligned[idx]) # Finally, we get joint feature matrix XY = np.concatenate((X_aligned, Y_aligned), axis=-1).reshape(-1, X_aligned.shape[-1] * 2) print(XY.shape) XY = remove_zeros_frames(XY) print(XY.shape) # Model gmm = GaussianMixture( n_components=64, covariance_type="full", max_iter=100, verbose=1)
speakers=["slt"], max_files=max_files) # Build dataset as 3D tensor (NxTxD) X = FileSourceDataset(source).asarray(padded_length=1200) Y = FileSourceDataset(target).asarray(padded_length=1200) # Alignment X, Y = DTWAligner(verbose=0, dist=melcd).transform((X, Y)) # Drop 1st dimention X, Y = X[:, :, 1:], Y[:, :, 1:] static_dim = X.shape[-1] X = apply_each2d_trim(delta_features, X, windows) Y = apply_each2d_trim(delta_features, Y, windows) # Joint features XY = np.concatenate((X, Y), axis=-1).reshape(-1, X.shape[-1] * 2) XY = remove_zeros_frames(XY) print(XY.shape) gmm = GaussianMixture(n_components=2, covariance_type="full", max_iter=100, verbose=1) gmm.fit(XY) # Parameter generation paramgen = MLPG(gmm, windows=windows, diff=True)
def make_expected_dataset(data_root, use_delta): from pathlib import Path from nnmnkwii.datasets import PaddedFileSourceDataset from nnmnkwii.datasets.cmu_arctic import CMUArcticWavFileDataSource from nnmnkwii.metrics import melcd from nnmnkwii.preprocessing import (delta_features, remove_zeros_frames, trim_zeros_frames) from nnmnkwii.preprocessing.alignment import DTWAligner from nnmnkwii.util import apply_each2d_trim max_files = 100 # number of utterances to be used. test_size = 0.03 windows = DELTA_WINDOWS class MyFileDataSource(CMUArcticWavFileDataSource): def __init__(self, *args, **kwargs): super(MyFileDataSource, self).__init__(*args, **kwargs) self.test_paths = None def collect_files(self): paths = [ Path(path) for path in super(MyFileDataSource, self).collect_files() ] paths_train, paths_test = train_test_split(paths, test_size=test_size, random_state=1234) # keep paths for later testing self.test_paths = paths_test return paths_train def collect_features(self, path): feature = kwiiyatta.analyze_wav(path) s = trim_zeros_frames(feature.spectrum_envelope) return feature.mel_cepstrum.data[:len(s)] # トリムするフレームが手前にずれてるのでは? clb_source = MyFileDataSource(data_root=data_root, speakers=["clb"], max_files=max_files) slt_source = MyFileDataSource(data_root=data_root, speakers=["slt"], max_files=max_files) X = PaddedFileSourceDataset(clb_source, 1200).asarray() Y = PaddedFileSourceDataset(slt_source, 1200).asarray() # Alignment X_aligned, Y_aligned = DTWAligner(verbose=0, dist=melcd).transform((X, Y)) # Drop 1st (power) dimension X_aligned, Y_aligned = X_aligned[:, :, 1:], Y_aligned[:, :, 1:] if use_delta: X_aligned = apply_each2d_trim(delta_features, X_aligned, windows) Y_aligned = apply_each2d_trim(delta_features, Y_aligned, windows) XY = (np.concatenate((X_aligned, Y_aligned), axis=-1).reshape(-1, X_aligned.shape[-1] * 2)) return remove_zeros_frames(XY)