예제 #1
0
def test_dtw_aligner():
    x, fs = librosa.load(example_audio_file(), sr=None)
    assert fs == 16000
    x_fast = librosa.effects.time_stretch(x, 2.0)

    X = _get_mcep(x, fs)
    Y = _get_mcep(x_fast, fs)

    D = X.shape[-1]

    # Create padded pair
    X, Y = adjast_frame_lengths(X, Y, divisible_by=2)

    # Add utterance axis
    X = X.reshape(1, -1, D)
    Y = Y.reshape(1, -1, D)

    X_aligned, Y_aligned = DTWAligner().transform((X, Y))
    assert X_aligned.shape == Y_aligned.shape
    assert np.linalg.norm(X_aligned - Y_aligned) < np.linalg.norm(X - Y)

    X_aligned, Y_aligned = IterativeDTWAligner(
        n_iter=2, max_iter_gmm=10, n_components_gmm=2).transform((X, Y))
    assert X_aligned.shape == Y_aligned.shape
    assert np.linalg.norm(X_aligned - Y_aligned) < np.linalg.norm(X - Y)

    # Custom dist function
    from nnmnkwii.metrics import melcd
    X_aligned, Y_aligned = DTWAligner(dist=melcd).transform((X, Y))
    assert np.linalg.norm(X_aligned - Y_aligned) < np.linalg.norm(X - Y)
예제 #2
0
def test_adjast_frame_lengths():
    D = 5
    T1 = 10
    T2 = 11

    x = np.random.rand(T1, D)
    y = np.random.rand(T2, D)
    x_hat, y_hat = adjast_frame_lengths(x, y, pad=True)
    assert x_hat.shape == y_hat.shape
    assert x_hat.shape[0] == 11

    x_hat, y_hat = adjast_frame_lengths(x, y, pad=False)
    assert x_hat.shape == y_hat.shape
    assert x_hat.shape[0] == 10

    x_hat, y_hat = adjast_frame_lengths(x, y, pad=True,
                                        divisible_by=2)
    assert x_hat.shape == y_hat.shape
    assert x_hat.shape[0] == 12

    x_hat, y_hat = adjast_frame_lengths(x, y, pad=False,
                                        divisible_by=2)
    assert x_hat.shape == y_hat.shape
    assert x_hat.shape[0] == 10

    # Divisible
    x_hat, y_hat = adjast_frame_lengths(x, y, pad=False,
                                        divisible_by=3)
    assert x_hat.shape == y_hat.shape
    assert x_hat.shape[0] == 9

    x_hat, y_hat = adjast_frame_lengths(x, y, pad=True,
                                        divisible_by=3)
    assert x_hat.shape == y_hat.shape
    assert x_hat.shape[0] == 12
예제 #3
0
        print("Destination dir for {}: {}".format(speaker, d))
        if not exists(d):
            os.makedirs(d)

    # Convert to arrays
    print("Convert datasets to arrays")
    X, Y = X_dataset.asarray(verbose=1), Y_dataset.asarray(verbose=1)

    # Alignment
    print("Perform alignment")
    X, Y = DTWAligner().transform((X, Y))

    print("Save features to disk")
    for idx, (x, y) in tqdm(enumerate(zip(X, Y))):
        # paths
        src_name = splitext(basename(X_dataset.collected_files[idx][0]))[0]
        tgt_name = splitext(basename(Y_dataset.collected_files[idx][0]))[0]
        src_path = join(dst_dir, "X", src_name)
        tgt_path = join(dst_dir, "Y", tgt_name)

        # Trim and ajast frames
        x = P.trim_zeros_frames(x)
        y = P.trim_zeros_frames(y)
        x, y = P.adjast_frame_lengths(x, y, pad=True, divisible_by=2)

        # Save
        np.save(src_path, x)
        np.save(tgt_path, y)

    sys.exit(0)