Example #1
0
def test_sliding_window_for_wav_2_letter_sized_input():
    samp_freq = 16000
    frame_len_ms = 32
    frame_len_samples = samp_freq * frame_len_ms * 0.001
    num_mfcc_feats = 13
    mode_input_size = 296
    stride = 160
    num_fbank_bins = 128
    mel_lo_freq = 0
    mil_hi_freq = 8000
    use_htk = False
    n_FFT = 512

    audio_data = np.zeros(47712, dtype=int)

    mfcc_params = preprocess.MFCCParams(samp_freq, num_fbank_bins, mel_lo_freq,
                                        mil_hi_freq, num_mfcc_feats,
                                        frame_len_samples, use_htk, n_FFT)

    mfcc_inst = preprocess.MFCC(mfcc_params)
    preprocessor = preprocess.Preprocessor(mfcc_inst, mode_input_size, stride)

    input_tensor = preprocessor.extract_features(audio_data)

    assert len(input_tensor[0]) == 39
    assert len(input_tensor) == 296
Example #2
0
def test_mfcc_compute():
    samp_freq = 16000
    frame_len_ms = 32
    frame_len_samples = samp_freq * frame_len_ms * 0.001
    num_mfcc_feats = 13
    num_fbank_bins = 128
    mel_lo_freq = 0
    mil_hi_freq = 8000
    use_htk = False
    n_FFT = 512

    audio_data = np.array(test_wav) / (2**15)

    mfcc_params = preprocess.MFCCParams(samp_freq, num_fbank_bins, mel_lo_freq,
                                        mil_hi_freq, num_mfcc_feats,
                                        frame_len_samples, use_htk, n_FFT)
    mfcc_inst = preprocess.MFCC(mfcc_params)
    mfcc_feats = mfcc_inst.mfcc_compute(audio_data)

    assert np.isclose((mfcc_feats[0]), -834.9656973095651)
    assert np.isclose((mfcc_feats[1]), 21.026915475076322)
    assert np.isclose((mfcc_feats[2]), 18.628541708201688)
    assert np.isclose((mfcc_feats[3]), 7.341153529494758)
    assert np.isclose((mfcc_feats[4]), 18.907974386153214)
    assert np.isclose((mfcc_feats[5]), -5.360387487466194)
    assert np.isclose((mfcc_feats[6]), 6.523572638527085)
    assert np.isclose((mfcc_feats[7]), -11.270643644983316)
    assert np.isclose((mfcc_feats[8]), 8.375177203773777)
    assert np.isclose((mfcc_feats[9]), 12.06721844362991)
    assert np.isclose((mfcc_feats[10]), 8.30815892468875)
    assert np.isclose((mfcc_feats[11]), -13.499911910889917)
    assert np.isclose((mfcc_feats[12]), -18.176121251436165)
Example #3
0
def test_create_mel_filter_bank():
    samp_freq = 16000
    frame_len_ms = 32
    frame_len_samples = samp_freq * frame_len_ms * 0.001
    num_mfcc_feats = 13
    num_fbank_bins = 128
    mel_lo_freq = 0
    mil_hi_freq = 8000
    use_htk = False
    n_FFT = 512

    mfcc_params = preprocess.MFCCParams(samp_freq, num_fbank_bins, mel_lo_freq,
                                        mil_hi_freq, num_mfcc_feats,
                                        frame_len_samples, use_htk, n_FFT)

    mfcc_inst = preprocess.MFCC(mfcc_params)

    mel_filter_bank = mfcc_inst.create_mel_filter_bank()

    assert len(mel_filter_bank) == 128

    assert str(mel_filter_bank[0]) == "[0.02837754]"
    assert str(mel_filter_bank[1]) == "[0.01438901 0.01398853]"
    assert str(mel_filter_bank[2]) == "[0.02877802]"
    assert str(mel_filter_bank[3]) == "[0.04236608]"
    assert str(mel_filter_bank[4]) == "[0.00040047 0.02797707]"
    assert str(mel_filter_bank[5]) == "[0.01478948 0.01358806]"
    assert str(mel_filter_bank[50]) == "[0.03298853]"
    assert str(
        mel_filter_bank[100]
    ) == "[0.00260166 0.00588759 0.00914814 0.00798015 0.00476919 0.00158245]"
Example #4
0
def test_inverse_mel_scale_function_with_htk_false():
    samp_freq = 16000
    frame_len_ms = 32
    frame_len_samples = samp_freq * frame_len_ms * 0.001
    num_mfcc_feats = 13
    num_fbank_bins = 128
    mel_lo_freq = 0
    mil_hi_freq = 8000
    use_htk = False
    n_FFT = 512

    mfcc_params = preprocess.MFCCParams(samp_freq, num_fbank_bins, mel_lo_freq,
                                        mil_hi_freq, num_mfcc_feats,
                                        frame_len_samples, use_htk, n_FFT)

    mfcc_inst = preprocess.MFCC(mfcc_params)

    mel = mfcc_inst.inv_mel_scale(16, False)

    assert np.isclose(mel, 1071.170287494467)
Example #5
0
def test_mel_scale_function_with_htk_true():
    samp_freq = 16000
    frame_len_ms = 32
    frame_len_samples = samp_freq * frame_len_ms * 0.001
    num_mfcc_feats = 13
    num_fbank_bins = 128
    mel_lo_freq = 0
    mil_hi_freq = 8000
    use_htk = False
    n_FFT = 512

    mfcc_params = preprocess.MFCCParams(samp_freq, num_fbank_bins, mel_lo_freq,
                                        mil_hi_freq, num_mfcc_feats,
                                        frame_len_samples, use_htk, n_FFT)

    mfcc_inst = preprocess.MFCC(mfcc_params)

    mel = mfcc_inst.mel_scale(16, True)

    assert np.isclose(mel, 25.470010570730597)
Example #6
0
def test_sliding_window_for_small_num_samples():
    samp_freq = 16000
    frame_len_ms = 32
    frame_len_samples = samp_freq * frame_len_ms * 0.001
    num_mfcc_feats = 13
    mode_input_size = 9
    stride = 160
    num_fbank_bins = 128
    mel_lo_freq = 0
    mil_hi_freq = 8000
    use_htk = False
    n_FFT = 512

    audio_data = np.array(test_wav) / (2**15)

    full_audio_data = np.tile(audio_data, 9)

    mfcc_params = preprocess.MFCCParams(samp_freq, num_fbank_bins, mel_lo_freq,
                                        mil_hi_freq, num_mfcc_feats,
                                        frame_len_samples, use_htk, n_FFT)
    mfcc_inst = preprocess.MFCC(mfcc_params)
    preprocessor = preprocess.Preprocessor(mfcc_inst, mode_input_size, stride)

    input_tensor = preprocessor.extract_features(full_audio_data)

    assert np.isclose(input_tensor[0][0], -3.4660944830426454)
    assert np.isclose(input_tensor[0][1], 0.3587718932127629)
    assert np.isclose(input_tensor[0][2], 0.3480551325669172)
    assert np.isclose(input_tensor[0][3], 0.2976191917228921)
    assert np.isclose(input_tensor[0][4], 0.3493037340849936)
    assert np.isclose(input_tensor[0][5], 0.2408643285767937)
    assert np.isclose(input_tensor[0][6], 0.2939659585037282)
    assert np.isclose(input_tensor[0][7], 0.2144552669573928)
    assert np.isclose(input_tensor[0][8], 0.302239565899944)
    assert np.isclose(input_tensor[0][9], 0.3187368787077345)
    assert np.isclose(input_tensor[0][10], 0.3019401051295793)
    assert np.isclose(input_tensor[0][11], 0.20449412797602678)

    assert np.isclose(input_tensor[0][38], -0.18751440767749533)