예제 #1
0
def test_dim(dim):
    if dim in (1, 3):
        p = CmvnPostProcessor(dim)
        assert p.dim == dim
    else:
        with pytest.raises(ValueError) as err:
            CmvnPostProcessor(dim)
        assert 'dimension must be a strictly positive integer' in str(err)
예제 #2
0
def test_pre_stats(mfcc):
    with pytest.raises(ValueError) as err:
        CmvnPostProcessor(mfcc.ndims, stats=1)
    assert 'shape (2, 14), but is shaped as ()' in str(err.value)

    with pytest.raises(ValueError) as err:
        CmvnPostProcessor(mfcc.ndims, stats=np.random.random((2, mfcc.ndims)))
    assert 'shape (2, 14), but is shaped as (2, 13)' in str(err.value)

    stats = np.random.random((2, mfcc.ndims+1))
    proc = CmvnPostProcessor(mfcc.ndims, stats=stats.copy())
    assert stats == pytest.approx(proc.stats)
예제 #3
0
def test_cmvn(mfcc, norm_vars):
    backup = mfcc.data.copy()

    proc = CmvnPostProcessor(mfcc.ndims)
    assert proc.dim == mfcc.ndims

    # cannot process without accumulation
    with pytest.raises(ValueError) as err:
        proc.process(mfcc)
    assert 'insufficient accumulation of stats' in str(err.value)

    # accumulate
    proc.accumulate(mfcc)
    assert proc.count == mfcc.nframes

    # cmvn
    cmvn1 = proc.process(mfcc, norm_vars=norm_vars)
    assert np.array_equal(backup, mfcc.data)
    assert cmvn1.shape == mfcc.shape
    assert cmvn1.dtype == mfcc.dtype
    assert np.array_equal(cmvn1.times, mfcc.times)
    assert cmvn1.data.mean() == pytest.approx(0, abs=1e-6)
    if norm_vars is True:
        assert cmvn1.data.var(axis=0) == pytest.approx(np.ones(cmvn1.ndims))
    else:
        assert cmvn1.data.var(axis=0) == pytest.approx(mfcc.data.var(axis=0))
    assert mfcc.ndims == proc.dim == proc.ndims == cmvn1.ndims

    # reverse cmvn
    cmvn2 = proc.process(cmvn1, norm_vars=norm_vars, reverse=True)
    assert cmvn2.shape == mfcc.shape
    assert cmvn1.dtype == mfcc.dtype
    assert np.array_equal(cmvn2.times, mfcc.times)
    assert cmvn2.data == pytest.approx(mfcc.data, abs=1e-5)

    # accumulate a second time
    stats = proc.stats.copy()
    proc.accumulate(mfcc)
    assert proc.stats == pytest.approx(stats * 2)

    assert np.array_equal(backup, mfcc.data)
    assert 'cmvn' not in mfcc.properties
    assert 'cmvn' in cmvn2.properties
    assert cmvn2.properties['cmvn']['stats'].shape == (2, 14)
예제 #4
0
def test_params():
    p = {'dim': 1, 'stats': None}
    c = CmvnPostProcessor(**p)
    assert c.get_params()['dim'] == 1
    assert c.get_params()['stats'].shape == (2, 2)
    assert c.get_params()['stats'].dtype == np.float64
    assert c.get_params()['stats'].sum() == 0.0

    with pytest.raises(ValueError) as err:
        c.set_params(**{'dim': None})
    assert 'cannot set attribute dim for CmvnPostProcessor' in str(err.value)

    with pytest.raises(ValueError) as err:
        c.set_params(**{'stats': None})
    assert 'cannot set attribute stats for CmvnPostProcessor' in str(err.value)
예제 #5
0
def test_bad_weights(mfcc):
    proc = CmvnPostProcessor(dim=mfcc.ndims)

    with pytest.raises(ValueError) as err:
        proc.accumulate(mfcc, weights=np.asarray([[1, 2], [3, 4]]))
    assert 'weights must have a single dimension' in str(err.value)

    with pytest.raises(ValueError) as err:
        proc.accumulate(mfcc, weights=np.asarray([]))
    assert 'there is 0 weights but {} feature frames'.format(
        mfcc.nframes) in str(err.value)
예제 #6
0
def test_weights(mfcc):
    weights = np.zeros(mfcc.nframes)
    proc = CmvnPostProcessor(dim=mfcc.ndims)
    proc.accumulate(mfcc, weights=weights)
    assert proc.count == 0

    weights = np.ones(mfcc.nframes)
    proc = CmvnPostProcessor(dim=mfcc.ndims)
    proc.accumulate(mfcc, weights=weights)
    assert proc.count == mfcc.nframes

    weights = np.ones(mfcc.nframes) * 0.5
    proc = CmvnPostProcessor(dim=mfcc.ndims)
    proc.accumulate(mfcc, weights=weights)
    assert proc.count == mfcc.nframes * 0.5

    weights = np.zeros(mfcc.nframes)
    weights[:2] = 0.1
    proc = CmvnPostProcessor(dim=mfcc.ndims)
    proc.accumulate(mfcc, weights=weights)
    assert proc.count == pytest.approx(0.2)
예제 #7
0
def test_skip_dims(mfcc):
    proc = CmvnPostProcessor(mfcc.ndims)
    proc.accumulate(mfcc)

    cmvn1 = proc.process(mfcc, skip_dims=None)
    cmvn2 = proc.process(mfcc, skip_dims=[])
    assert cmvn1 == cmvn2

    cmvn3 = proc.process(mfcc, skip_dims=[0, 1, 2])
    assert cmvn3.data[:, :3] == pytest.approx(mfcc.data[:, :3])
    assert cmvn3.data[:, 3:] == pytest.approx(cmvn1.data[:, 3:])

    cmvn4 = proc.process(mfcc, skip_dims=[1, 2, 0])
    assert cmvn4 == cmvn3

    cmvn5 = proc.process(mfcc, skip_dims=list(range(mfcc.ndims)))
    del cmvn5.properties['cmvn']
    del cmvn5.properties['pipeline']
    del mfcc.properties['pipeline']
    assert cmvn5 == mfcc

    for d in ([-1], [-1, 2, 3], [100], [100, -1, 5]):
        with pytest.raises(ValueError):
            proc.process(mfcc, skip_dims=d)
예제 #8
0
    def get_features(self, y, sample_rate):
        """Feature extraction

        Parameters
        ----------
        y : (n_samples, 1) numpy array
            Waveform
        sample_rate : int
            Sample rate

        Returns
        -------
        data : (n_frames, n_dimensions) numpy array
            Features
        """
        # scale the audio signal between -1 and 1 before
        # creating audio object w/ shennong: Do this because
        # when pyannote uses "data augmentation", it normalizes
        # the signal, but when loading the data without data
        # augmentation it doesn't normalize it.
        y = y / np.max((-np.min(y), np.max(y)))

        # create audio object for shennong
        audio = Audio(data=y, sample_rate=sample_rate)

        # MFCC parameters
        processor = MfccProcessor(sample_rate=sample_rate)
        processor.dither = self.dither
        processor.preemph_coeff = self.preemph_coeff
        processor.remove_dc_offset = self.remove_dc_offset
        processor.window_type = self.window_type
        processor.blackman_coeff = self.blackman_coeff
        processor.vtln_low = self.vtln_low
        processor.vtln_high = self.vtln_high
        processor.energy_floor = self.energy_floor
        processor.raw_energy = self.raw_energy
        processor.cepstral_lifter = self.cepstral_lifter
        processor.htk_compat = self.htk_compat

        processor.low_freq = self.mfccLowFreq
        processor.high_freq = self.mfccHighFreq  # defines it as (nyquist - 100)
        processor.use_energy = self.e
        processor.num_ceps = self.coefs
        processor.snip_edges = False  # end with correct number of frames

        # MFCC extraction
        #audio = Audio(data=y, sample_rate=sample_rate)
        mfcc = processor.process(audio)
        # compute deltas
        if self.D:
            # define first or second order derivative
            if not self.DD:
                derivative_proc = DeltaPostProcessor(order=1)
            else:
                derivative_proc = DeltaPostProcessor(order=2)

            # process Mfccs
            mfcc = derivative_proc.process(mfcc)

        # Compute CMVN
        if self.with_cmvn:
            # define cmvn
            postproc = CmvnPostProcessor(self.get_dimension(), stats=None)

            # accumulate stats
            stats = postproc.accumulate(mfcc)

            # process cmvn
            mfcc = postproc.process(mfcc)

        # Compute Pitch
        if self.with_pitch:
            # extract pitch
            pitch = self.get_pitch(audio, self.pitchFmin, self.pitchFmax)

            mfcc = self.concatenate_with_pitch(mfcc.data, pitch.data)

        else:
            mfcc = mfcc.data

        return mfcc