Ejemplo n.º 1
0
def test_htk_compat(audio):
    p1 = PlpProcessor(use_energy=True, htk_compat=False,
                      dither=0).process(audio)
    p2 = PlpProcessor(use_energy=True, htk_compat=True,
                      dither=0).process(audio)
    assert p1.data[:, 0] == pytest.approx(p2.data[:, -1])

    p1 = PlpProcessor(use_energy=False, htk_compat=False,
                      dither=0).process(audio)
    p2 = PlpProcessor(use_energy=False, htk_compat=True,
                      dither=0).process(audio)
    assert p1.data[:, 0] == pytest.approx(p2.data[:, -1])
Ejemplo n.º 2
0
def get_plp_dd(wav_fn, norm):
    """Return the MFCCs with deltas and delta-deltas for a audio file."""
    audio = Audio.load(wav_fn)
    processor = PlpProcessor(sample_rate=audio.sample_rate, window_type="hamming",frame_length=0.025, frame_shift=0.01,
                              low_freq=0, vtln_low=60, vtln_high=7200, high_freq=audio.sample_rate/2)
    plp_static = processor.process(audio, vtln_warp=1.0)
    d_processor = DeltaPostProcessor(order=2)
    plp_deltas = d_processor.process(plp_static)
    features = np.float64(plp_deltas._to_dict()["data"])
    if norm == "cmvn":
        features = (features - np.mean(features, axis=0)) / np.std(features, axis=0)

    return features
Ejemplo n.º 3
0
def test_output(audio):
    assert PlpProcessor(frame_shift=0.01).process(audio).shape == (140, 13)
    assert PlpProcessor(frame_shift=0.02).process(audio).shape == (70, 13)
    assert PlpProcessor(frame_shift=0.02,
                        frame_length=0.05).process(audio).shape == (69, 13)

    # sample rate mismatch
    with pytest.raises(ValueError):
        PlpProcessor(sample_rate=8000).process(audio)

    # only mono signals are accepted
    with pytest.raises(ValueError):
        data = np.random.random((1000, 2))
        stereo = Audio(data, sample_rate=16000)
        PlpProcessor(sample_rate=stereo.sample_rate).process(stereo)
Ejemplo n.º 4
0
def test_params():
    assert len(PlpProcessor().get_params()) == 24

    params = {
        'num_bins': 0,
        'use_energy': True,
        'energy_floor': 10.0,
        'raw_energy': False,
        'htk_compat': True,
        'htk_compat': True}
    p = PlpProcessor(**params)
    out_params = p.get_params()
    assert len(out_params) == 24

    assert PlpProcessor().set_params(**params).get_params() == out_params
Ejemplo n.º 5
0
def test_raw(audio, raw_energy):
    p = {'raw_energy': raw_energy, 'dither': 0}
    mfcc = MfccProcessor(**p).process(audio).data[:, 0]
    plp = PlpProcessor(**p).process(audio).data[:, 0]
    energy = EnergyProcessor(**p).process(audio).data[:, 0]

    assert np.allclose(mfcc, energy)
    assert np.allclose(plp, energy)
Ejemplo n.º 6
0
def test_num_ceps(audio, num_ceps):
    if num_ceps >= 23:
        with pytest.raises(ValueError) as err:
            PlpProcessor(num_ceps=num_ceps)
        assert 'We must have num_ceps <= lpc_order+1' in str(err)
    else:
        proc = PlpProcessor(num_ceps=num_ceps)
        if 0 < proc.num_ceps:
            feat = proc.process(audio)
            assert proc.num_ceps == num_ceps == proc.ndims
            assert feat.shape == (140, num_ceps)

            proc.use_energy = False
            feat = proc.process(audio)
            assert feat.shape == (140, num_ceps)
        else:
            with pytest.raises(RuntimeError):
                proc.process(audio)
def get_features(sound_file, chosen_processor):
    # computes the feature coefficients of a sound file

    #     :param sound_file : sound file in format .wav
    #     :type amount: .wav file
    #     :returns: feature coefficients per frame of 25ms every 10ms can be 'filterbank'
    #     'plp', 'rasteplp' or 'bottleneck'
    #     :rtype: a numpy array

    audio = Audio.load(sound_file)
    processors = {
        'filterbank': FilterbankProcessor(sample_rate=audio.sample_rate),
        'plp': PlpProcessor(sample_rate=audio.sample_rate),
        'rastaplp': RastaPlpProcessor(sample_rate=audio.sample_rate),
        'bottleneck': BottleneckProcessor(weights='BabelMulti')
    }

    features = chosen_processor.process(audio)
    features = pd.DataFrame(features)
    return (features)
Ejemplo n.º 8
0
def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('wav', help='wav file to compute features on')

    # load the wav file
    wav_file = parser.parse_args().wav
    audio = Audio.load(wav_file)

    # initialize features processors
    processors = {
        'spectrogram': SpectrogramProcessor(sample_rate=audio.sample_rate),
        'filterbank': FilterbankProcessor(sample_rate=audio.sample_rate),
        'mfcc': MfccProcessor(sample_rate=audio.sample_rate),
        'plp': PlpProcessor(sample_rate=audio.sample_rate),
        'rastaplp': RastaPlpProcessor(sample_rate=audio.sample_rate),
        'bottleneck': BottleneckProcessor(weights='BabelMulti')}

    # compute the features for all processors
    features = {k: v.process(audio) for k, v in processors.items()}

    # plot the audio signal and the resulting features
    fig, axes = plt.subplots(
        nrows=len(processors)+1,
        gridspec_kw={'top': 0.95, 'bottom': 0.05, 'hspace': 0},
        subplot_kw={'xticks': [], 'yticks': []})
    time = np.arange(0.0, audio.nsamples) / audio.sample_rate
    axes[0].plot(time, audio.astype(np.float32).data)
    axes[0].set_xlim(0.0, audio.duration)
    axes[0].text(
        0.02, 0.8, 'audio',
        bbox={'boxstyle': 'round', 'alpha': 0.5, 'color': 'white'},
        transform=axes[0].transAxes)

    for n, (k, v) in enumerate(features.items(), start=1):
        axes[n].imshow(v.data.T, aspect='auto')
        axes[n].text(
            0.02, 0.8, k,
            bbox={'boxstyle': 'round', 'alpha': 0.5, 'color': 'white'},
            transform=axes[n].transAxes)

    plt.show()