Beispiel #1
0
def add_delta_features(mfcc, order=2):
	"""
		Extracts the 1st or 2nd order delta features from MFCCs
	"""

	windows = []
	if order == 1:
		windows = [
        (0, 0, np.array([1.0])),
        (1, 1, np.array([-0.5, 0.0, 0.5])),
    ]

	else:
		windows = [
        (0, 0, np.array([1.0])),
        (1, 1, np.array([-0.5, 0.0, 0.5])),
        (1, 1, np.array([1.0, -2.0, 1.0])),
    ]

	return apply_each2d_trim(delta_features, mfcc, windows)
Beispiel #2
0
def test_function_utils():
    def dummmy_func2d(x):
        return x + 1

    T, D = 10, 24
    np.random.seed(1234)
    X = np.random.rand(2, T, D)
    lengths = [60, 100]

    # Paddd case
    Y = apply_each2d_padded(dummmy_func2d, X, lengths)
    for i, l in enumerate(lengths):
        assert np.allclose(X[i][:l] + 1, Y[i][:l])
        assert np.all(Y[i][l:] == 0)

    # Trim
    for i, l in enumerate(lengths):
        X[i][l:] = 0
    Y = apply_each2d_trim(dummmy_func2d, X)
    for i, l in enumerate(lengths):
        assert np.allclose(X[i][:l] + 1, Y[i][:l])
        assert np.all(Y[i][l:] == 0)
Beispiel #3
0
#
# idx = 22 # any
# plot_parallel(X[idx],Y[idx])

# Alignment
X_aligned, Y_aligned = DTWAligner(verbose=0, dist=melcd).transform((X, Y))

# plot_parallel(X_aligned[idx],Y_aligned[idx])

# Drop 1st (power) dimension
X_aligned, Y_aligned = X_aligned[:, :, 1:], Y_aligned[:, :, 1:]

# Append delta features
static_dim = X_aligned.shape[-1]
if use_delta:
    X_aligned = apply_each2d_trim(delta_features, X_aligned, windows)
    Y_aligned = apply_each2d_trim(delta_features, Y_aligned, windows)

# plot_parallel(X_aligned[idx],Y_aligned[idx])

# Finally, we get joint feature matrix
XY = np.concatenate((X_aligned, Y_aligned), axis=-1).reshape(-1, X_aligned.shape[-1] * 2)
print(XY.shape)

XY = remove_zeros_frames(XY)
print(XY.shape)

# Model
gmm = GaussianMixture(
    n_components=64, covariance_type="full", max_iter=100, verbose=1)
Beispiel #4
0
                                     speakers=["slt"],
                                     max_files=max_files)

# Build dataset as 3D tensor (NxTxD)
X = FileSourceDataset(source).asarray(padded_length=1200)
Y = FileSourceDataset(target).asarray(padded_length=1200)

# Alignment
X, Y = DTWAligner(verbose=0, dist=melcd).transform((X, Y))

# Drop 1st dimention
X, Y = X[:, :, 1:], Y[:, :, 1:]

static_dim = X.shape[-1]

X = apply_each2d_trim(delta_features, X, windows)
Y = apply_each2d_trim(delta_features, Y, windows)

# Joint features
XY = np.concatenate((X, Y), axis=-1).reshape(-1, X.shape[-1] * 2)
XY = remove_zeros_frames(XY)
print(XY.shape)
gmm = GaussianMixture(n_components=2,
                      covariance_type="full",
                      max_iter=100,
                      verbose=1)

gmm.fit(XY)

# Parameter generation
paramgen = MLPG(gmm, windows=windows, diff=True)
Beispiel #5
0
def make_expected_dataset(data_root, use_delta):
    from pathlib import Path
    from nnmnkwii.datasets import PaddedFileSourceDataset
    from nnmnkwii.datasets.cmu_arctic import CMUArcticWavFileDataSource
    from nnmnkwii.metrics import melcd
    from nnmnkwii.preprocessing import (delta_features, remove_zeros_frames,
                                        trim_zeros_frames)
    from nnmnkwii.preprocessing.alignment import DTWAligner
    from nnmnkwii.util import apply_each2d_trim

    max_files = 100  # number of utterances to be used.
    test_size = 0.03

    windows = DELTA_WINDOWS

    class MyFileDataSource(CMUArcticWavFileDataSource):
        def __init__(self, *args, **kwargs):
            super(MyFileDataSource, self).__init__(*args, **kwargs)
            self.test_paths = None

        def collect_files(self):
            paths = [
                Path(path)
                for path in super(MyFileDataSource, self).collect_files()
            ]
            paths_train, paths_test = train_test_split(paths,
                                                       test_size=test_size,
                                                       random_state=1234)

            # keep paths for later testing
            self.test_paths = paths_test

            return paths_train

        def collect_features(self, path):
            feature = kwiiyatta.analyze_wav(path)
            s = trim_zeros_frames(feature.spectrum_envelope)
            return feature.mel_cepstrum.data[:len(s)]  # トリムするフレームが手前にずれてるのでは?

    clb_source = MyFileDataSource(data_root=data_root,
                                  speakers=["clb"],
                                  max_files=max_files)
    slt_source = MyFileDataSource(data_root=data_root,
                                  speakers=["slt"],
                                  max_files=max_files)

    X = PaddedFileSourceDataset(clb_source, 1200).asarray()
    Y = PaddedFileSourceDataset(slt_source, 1200).asarray()

    # Alignment
    X_aligned, Y_aligned = DTWAligner(verbose=0, dist=melcd).transform((X, Y))

    # Drop 1st (power) dimension
    X_aligned, Y_aligned = X_aligned[:, :, 1:], Y_aligned[:, :, 1:]

    if use_delta:
        X_aligned = apply_each2d_trim(delta_features, X_aligned, windows)
        Y_aligned = apply_each2d_trim(delta_features, Y_aligned, windows)

    XY = (np.concatenate((X_aligned, Y_aligned),
                         axis=-1).reshape(-1, X_aligned.shape[-1] * 2))

    return remove_zeros_frames(XY)