Ejemplo n.º 1
0
def test_trim_remove_zeros_frames():
    fs, x = wavfile.read(example_audio_file())
    frame_period = 5

    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=frame_period)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)

    for mat in [spectrogram, aperiodicity]:
        trimmed = trim_zeros_frames(mat)
        assert trimmed.shape[1] == mat.shape[1]

    for mat in [spectrogram, aperiodicity]:
        trimmed = remove_zeros_frames(mat)
        assert trimmed.shape[1] == mat.shape[1]
Ejemplo n.º 2
0
def make_dataset_to_array(dataset, keys=None):
    if keys is None:
        keys = sorted(dataset.keys())

    data = None
    for key in keys:
        d = dataset[key]
        if isinstance(d, tuple):
            d = np.hstack(d)
        d = remove_zeros_frames(d)
        if data is None:
            data = copy.copy(d)
        else:
            len_data = len(data)
            data.resize(len_data + len(d), d.shape[-1])
            data[len_data:, :] = d
    return data
Ejemplo n.º 3
0
# Drop 1st (power) dimension
X_aligned, Y_aligned = X_aligned[:, :, 1:], Y_aligned[:, :, 1:]

# Append delta features
static_dim = X_aligned.shape[-1]
if use_delta:
    X_aligned = apply_each2d_trim(delta_features, X_aligned, windows)
    Y_aligned = apply_each2d_trim(delta_features, Y_aligned, windows)

# plot_parallel(X_aligned[idx],Y_aligned[idx])

# Finally, we get joint feature matrix
XY = np.concatenate((X_aligned, Y_aligned), axis=-1).reshape(-1, X_aligned.shape[-1] * 2)
print(XY.shape)

XY = remove_zeros_frames(XY)
print(XY.shape)

# Model
gmm = GaussianMixture(
    n_components=64, covariance_type="full", max_iter=100, verbose=1)

gmm.fit(XY)


# Visualize model
# Means
# for k in range(3):
#     plot(gmm.means_[k], linewidth=1.5, label="Mean of mixture {}".format(k+1))
# legend(prop={"size": 16})
Ejemplo n.º 4
0
def make_expected_dataset(data_root, use_delta):
    from pathlib import Path
    from nnmnkwii.datasets import PaddedFileSourceDataset
    from nnmnkwii.datasets.cmu_arctic import CMUArcticWavFileDataSource
    from nnmnkwii.metrics import melcd
    from nnmnkwii.preprocessing import (delta_features, remove_zeros_frames,
                                        trim_zeros_frames)
    from nnmnkwii.preprocessing.alignment import DTWAligner
    from nnmnkwii.util import apply_each2d_trim

    max_files = 100  # number of utterances to be used.
    test_size = 0.03

    windows = DELTA_WINDOWS

    class MyFileDataSource(CMUArcticWavFileDataSource):
        def __init__(self, *args, **kwargs):
            super(MyFileDataSource, self).__init__(*args, **kwargs)
            self.test_paths = None

        def collect_files(self):
            paths = [
                Path(path)
                for path in super(MyFileDataSource, self).collect_files()
            ]
            paths_train, paths_test = train_test_split(paths,
                                                       test_size=test_size,
                                                       random_state=1234)

            # keep paths for later testing
            self.test_paths = paths_test

            return paths_train

        def collect_features(self, path):
            feature = kwiiyatta.analyze_wav(path)
            s = trim_zeros_frames(feature.spectrum_envelope)
            return feature.mel_cepstrum.data[:len(s)]  # トリムするフレームが手前にずれてるのでは?

    clb_source = MyFileDataSource(data_root=data_root,
                                  speakers=["clb"],
                                  max_files=max_files)
    slt_source = MyFileDataSource(data_root=data_root,
                                  speakers=["slt"],
                                  max_files=max_files)

    X = PaddedFileSourceDataset(clb_source, 1200).asarray()
    Y = PaddedFileSourceDataset(slt_source, 1200).asarray()

    # Alignment
    X_aligned, Y_aligned = DTWAligner(verbose=0, dist=melcd).transform((X, Y))

    # Drop 1st (power) dimension
    X_aligned, Y_aligned = X_aligned[:, :, 1:], Y_aligned[:, :, 1:]

    if use_delta:
        X_aligned = apply_each2d_trim(delta_features, X_aligned, windows)
        Y_aligned = apply_each2d_trim(delta_features, Y_aligned, windows)

    XY = (np.concatenate((X_aligned, Y_aligned),
                         axis=-1).reshape(-1, X_aligned.shape[-1] * 2))

    return remove_zeros_frames(XY)