Exemple #1
0
def test_dtw_aligner():
    from nnmnkwii.preprocessing.alignment import DTWAligner, IterativeDTWAligner

    x, fs = librosa.load(example_audio_file(), sr=None)
    assert fs == 16000
    x_fast = librosa.effects.time_stretch(x, 2.0)

    X = _get_mcep(x, fs)
    Y = _get_mcep(x_fast, fs)

    D = X.shape[-1]

    # Create padded pair
    X, Y = adjust_frame_lengths(X, Y, divisible_by=2)

    # Add utterance axis
    X = X.reshape(1, -1, D)
    Y = Y.reshape(1, -1, D)

    X_aligned, Y_aligned = DTWAligner().transform((X, Y))
    assert X_aligned.shape == Y_aligned.shape
    assert np.linalg.norm(X_aligned - Y_aligned) < np.linalg.norm(X - Y)

    X_aligned, Y_aligned = IterativeDTWAligner(n_iter=2,
                                               max_iter_gmm=10,
                                               n_components_gmm=2).transform(
                                                   (X, Y))
    assert X_aligned.shape == Y_aligned.shape
    assert np.linalg.norm(X_aligned - Y_aligned) < np.linalg.norm(X - Y)

    # Custom dist function
    from nnmnkwii.metrics import melcd
    X_aligned, Y_aligned = DTWAligner(dist=melcd).transform((X, Y))
    assert np.linalg.norm(X_aligned - Y_aligned) < np.linalg.norm(X - Y)
Exemple #2
0
def test_adjust_frame_lengths():
    T1 = 10
    T2 = 11
    D = 5

    # 1d and 2d padding
    for (x, y) in [(np.random.rand(T1), np.random.rand(T2)),
                   (np.random.rand(T1, D), np.random.rand(T2, D))]:
        x_hat, y_hat = adjust_frame_lengths(x, y, pad=True)
        assert x_hat.shape == y_hat.shape
        assert x_hat.shape[0] == 11

        x_hat, y_hat = adjust_frame_lengths(x, y, pad=False)
        assert x_hat.shape == y_hat.shape
        assert x_hat.shape[0] == 10

        x_hat, y_hat = adjust_frame_lengths(x, y, pad=True, divisible_by=2)
        assert x_hat.shape == y_hat.shape
        assert x_hat.shape[0] == 12

        x_hat, y_hat = adjust_frame_lengths(x, y, pad=False, divisible_by=2)
        assert x_hat.shape == y_hat.shape
        assert x_hat.shape[0] == 10

        # Divisible
        x_hat, y_hat = adjust_frame_lengths(x, y, pad=False, divisible_by=3)
        assert x_hat.shape == y_hat.shape
        assert x_hat.shape[0] == 9

        x_hat, y_hat = adjust_frame_lengths(x, y, pad=True, divisible_by=3)
        assert x_hat.shape == y_hat.shape
        assert x_hat.shape[0] == 12

    # make sure we do zero padding
    x = np.random.rand(T1)
    y = np.random.rand(T2)
    x_hat, y_hat = adjust_frame_lengths(x, y, pad=True, divisible_by=3)
    assert x_hat[-1] == 0 and y_hat[-1] == 0

    # make sure we passes extra kwargs to np.pad
    x_hat, y_hat = adjust_frame_lengths(x,
                                        y,
                                        pad=True,
                                        divisible_by=3,
                                        mode="constant",
                                        constant_values=1)
    assert x_hat[-1] == 1 and y_hat[-1] == 1
Exemple #3
0
        print("Destination dir for {}: {}".format(speaker, d))
        if not exists(d):
            os.makedirs(d)

    # Convert to arrays
    print("Convert datasets to arrays")
    X, Y = X_dataset.asarray(verbose=1), Y_dataset.asarray(verbose=1)

    # Alignment
    print("Perform alignment")
    X, Y = DTWAligner().transform((X, Y))

    print("Save features to disk")
    for idx, (x, y) in tqdm(enumerate(zip(X, Y))):
        # paths
        src_name = splitext(basename(X_dataset.collected_files[idx][0]))[0]
        tgt_name = splitext(basename(Y_dataset.collected_files[idx][0]))[0]
        src_path = join(dst_dir, "X", src_name)
        tgt_path = join(dst_dir, "Y", tgt_name)

        # Trim and ajast frames
        x = P.trim_zeros_frames(x)
        y = P.trim_zeros_frames(y)
        x, y = P.adjust_frame_lengths(x, y, pad=True, divisible_by=2)

        # Save
        np.save(src_path, x)
        np.save(tgt_path, y)

    sys.exit(0)
        print("Destination dir for {}: {}".format(speaker, d))
        if not exists(d):
            os.makedirs(d)

    # Convert to arrays
    print("Convert datasets to arrays")
    X, Y = X_dataset.asarray(verbose=1), Y_dataset.asarray(verbose=1)

    # Alignment
    print("Perform alignment")
    X, Y = DTWAligner().transform((X, Y))

    print("Save features to disk")
    for idx, (x, y) in tqdm(enumerate(zip(X, Y))):
        # paths
        src_name = splitext(basename(X_dataset.collected_files[idx][0]))[0]
        tgt_name = splitext(basename(Y_dataset.collected_files[idx][0]))[0]
        src_path = join(dst_dir, "X", src_name)
        tgt_path = join(dst_dir, "Y", tgt_name)

        # Trim and ajast frames
        x = P.trim_zeros_frames(x)
        y = P.trim_zeros_frames(y)
        x, y = P.adjust_frame_lengths(x, y, pad=True, divisible_by=2)

        # Save
        np.save(src_path, x)
        np.save(tgt_path, y)

    sys.exit(0)