Example #1
0
def test_energy(audio, mfcc):
    # VAD from mfcc or energy is the same excepted properties
    vad1 = VadPostProcessor().process(EnergyProcessor().process(audio))
    vad2 = VadPostProcessor().process(mfcc)
    vad1 = Features(vad1.data, vad1.times)
    vad2 = Features(vad2.data, vad2.times)
    assert vad1 == vad2
Example #2
0
def test_tofrom_dict(mfcc):
    a = mfcc._to_dict()
    b = Features._from_dict(a)
    assert b == mfcc

    with pytest.raises(ValueError) as err:
        Features._from_dict({'data': a['data'], 'properties': a['properties']})
    assert 'missing keys: times' in str(err)
Example #3
0
def test_1d_times_sorted():
    # 10 frames, 5 dims
    data = np.random.random((10, 5))

    p = MfccProcessor()
    times = p.times(10)
    assert times.shape == (10, 2)

    feats = Features(data, times[:, 1], validate=False)
    assert feats.is_valid()
Example #4
0
def test_collection_isclose():
    f1 = Features(np.random.random((10, 2)), np.ones((10, )))
    f2 = Features(np.random.random((10, 2)), np.ones((10, )))

    fc1 = FeaturesCollection(f1=f1, f2=f2)
    fc2 = FeaturesCollection(f1=f1, f2=Features(f2.data + 1, f2.times))
    fc3 = FeaturesCollection(f1=f1, f3=f2)

    assert fc1.is_close(fc1)
    assert not fc1.is_close(fc2)
    assert fc1.is_close(fc2, atol=1)
    assert not fc1.is_close(fc3)
Example #5
0
def mfcc_utf8(mfcc):
    props = mfcc.properties
    props['comments'] = '使用人口について正確な統計はないが、日本国'

    feats = FeaturesCollection()
    feats['æðÐ'] = Features(mfcc.data, mfcc.times, props)
    return feats
Example #6
0
    def process(self, features):
        """Computes voice activity detection (VAD) on the input `features`

        Parameters
        ----------
        features : :class:`~shennong.features.features.Features`, shape = [n,m]
            The speech features on which to look for voiced
            frames. The first coefficient must be a log-energy (or
            equivalent). Works well with
            :class:`~shennong.features.processor.mfcc.MfccProcessor` and
            :class:`~shennong.features.processor.plp.PlpProcessor`.

        Returns
        -------
        vad : :class:`~shennong.features.features.Features`, shape = [n,1]
            The output vad features are of dtype uint8 and contain 1
            for voiced frames or 0 for unvoiced frames.

        """
        data = kaldi.matrix.SubVector(
            kaldi.ivector.compute_vad_energy(
                self._options, kaldi.matrix.SubMatrix(features.data))).numpy()

        return Features(np.atleast_2d(data.astype(np.uint8)).T,
                        features.times,
                        properties=self.get_properties(features))
Example #7
0
    def _process(self, cls, signal, vtln_warp):
        """Inner process method common to all Kaldi Mel processors"""
        # ensure the signal is correct
        if signal.nchannels != 1:
            raise ValueError(
                'signal must have one dimension, but it has {}'.format(
                    signal.nchannels))

        if self.sample_rate != signal.sample_rate:
            raise ValueError('processor and signal mismatch in sample rates: '
                             '{} != {}'.format(self.sample_rate,
                                               signal.sample_rate))

        # we need to forward options (because the assignation here is
        # done by copy, not by reference. If the user do 'p =
        # Processor(); p.dither = 0', this is forwarded to Kaldi here)
        self._options.frame_opts = self._frame_options
        self._options.mel_opts = self._mel_options

        # force 16 bits integers
        signal = signal.astype(np.int16).data
        data = kaldi.matrix.SubMatrix(
            cls(self._options).compute(kaldi.matrix.SubVector(signal),
                                       vtln_warp)).numpy()

        return Features(data,
                        self.times(data.shape[0]),
                        properties=self.get_properties())
Example #8
0
def test_save_invalid(tmpdir, mfcc):
    f = str(tmpdir.join('foo.json'))
    h = serializers.get_serializer(FeaturesCollection, f, None)
    feats = FeaturesCollection(
        mfcc=Features(data=mfcc.data, times=0, validate=False))
    with pytest.raises(ValueError) as err:
        h.save(feats)
    assert 'features are not valid' in str(err.value)
Example #9
0
def test_apply_baddim(features_collection):
    feats = FeaturesCollection(
        {k: v.copy() for k, v in features_collection.items()})
    feats['new'] = Features(
        np.random.random((2, 1)), np.asarray([0, 1]))

    with pytest.raises(ValueError) as err:
        apply_cmvn(feats)
    assert 'must have consistent dimensions' in str(err.value)
Example #10
0
def features_collection():
    # build a collection of 3 random features of same ndims, various
    # nframes
    dim = 10
    feats = FeaturesCollection()
    for n in range(3):
        nframes = np.random.randint(5, 15)
        feats[str(n)] = Features(np.random.random((nframes, dim)),
                                 np.arange(0, nframes))
    return feats
Example #11
0
def test_partition():
    f1 = Features(np.random.random((10, 2)), np.ones((10, )))
    f2 = Features(np.random.random((5, 2)), np.ones((5, )))
    f3 = Features(np.random.random((5, 2)), np.ones((5, )))
    fc = FeaturesCollection(f1=f1, f2=f2, f3=f3)

    with pytest.raises(ValueError) as err:
        fp = fc.partition({'f1': 'p1', 'f2': 'p1'})
    assert ('following items are not defined in the partition index: f3'
            in str(err))

    fp = fc.partition({'f1': 'p1', 'f2': 'p1', 'f3': 'p2'})
    assert sorted(fp.keys()) == ['p1', 'p2']
    assert sorted(fp['p1'].keys()) == ['f1', 'f2']
    assert sorted(fp['p2'].keys()) == ['f3']

    assert fc.is_valid()
    for fc in fp.values():
        assert fc.is_valid()
Example #12
0
    def process(self, signal, vtln_warp=1.0):
        """Compute spectrogram with the specified options

        Do an optional feature-level vocal tract length normalization
        (VTLN) when `vtln_warp` != 1.0.

        Parameters
        ----------
        signal : Audio, shape = [nsamples, 1]
            The input audio signal to compute the features on, must be
            mono
        vtln_warp : float, optional
            The VTLN warping factor to be applied when computing
            features. Be 1.0 by default, meaning no warping is to be
            done.

        Returns
        -------
        features : `Features`, shape = [nframes, `ndims`]
            The computed features, output will have as many rows as there
            are frames (depends on the specified options `frame_shift`
            and `frame_length`).

        Raises
        ------
        ValueError
            If the input `signal` has more than one channel (i.e. is
            not mono). If `sample_rate` != `signal.sample_rate`.

        """
        # ensure the signal is correct
        if signal.nchannels != 1:
            raise ValueError(
                'signal must have one dimension, but it has {}'.format(
                    signal.nchannels))

        if self.sample_rate != signal.sample_rate:
            raise ValueError('processor and signal mismatch in sample rates: '
                             '{} != {}'.format(self.sample_rate,
                                               signal.sample_rate))

        # we need to forward options (because the assignation here is
        # done by copy, not by reference. If the user do 'p =
        # Processor(); p.dither = 0', this is forwarded to Kaldi here)
        self._options.frame_opts = self._frame_options

        # force 16 bits integers
        signal = signal.astype(np.int16).data
        data = kaldi.matrix.SubMatrix(
            kaldi.feat.spectrogram.Spectrogram(self._options).compute(
                kaldi.matrix.SubVector(signal), vtln_warp)).numpy()

        return Features(data,
                        self.times(data.shape[0]),
                        properties=self.get_properties())
Example #13
0
def test_post_pitch(raw_pitch):
    post_processor = PitchPostProcessor()
    params = post_processor.get_params()
    data = post_processor.process(raw_pitch)
    assert data.shape[1] == 3
    assert raw_pitch.shape[0] == data.shape[0]
    assert np.array_equal(raw_pitch.times, data.times)
    assert params == post_processor.get_params()

    bad_pitch = Features(np.random.random((raw_pitch.nframes, 1)),
                         raw_pitch.times)
    with pytest.raises(ValueError) as err:
        post_processor.process(bad_pitch)
    assert 'data shape must be (_, 2), but it is (_, 1)' in str(err.value)

    bad_pitch = Features(np.random.random((raw_pitch.nframes, 3)),
                         raw_pitch.times)
    with pytest.raises(ValueError) as err:
        post_processor.process(bad_pitch)
    assert 'data shape must be (_, 2), but it is (_, 3)' in str(err.value)
Example #14
0
def test_concatenate_tolerance(capsys):
    get_logger(level='info')
    f1 = Features(np.random.random((12, 2)), np.ones((12, )))
    f2 = Features(np.random.random((10, 2)), np.ones((10, )))

    with pytest.raises(ValueError) as err:
        f1.concatenate(f2, tolerance=0)
    assert 'features have a different number of frames' in str(err)

    with pytest.raises(ValueError) as err:
        f1.concatenate(f2, tolerance=1)
    assert 'features differs number of frames, and greater than ' in str(err)

    f3 = f1.concatenate(f2, tolerance=2)
    assert f3.shape == (10, 4)
    assert 'WARNING' in capsys.readouterr().err

    f3 = f2.concatenate(f1, tolerance=2)
    assert f3.shape == (10, 4)
    assert 'WARNING' in capsys.readouterr().err
Example #15
0
def test_concatenate(mfcc):
    mfcc2 = mfcc.concatenate(mfcc)
    assert mfcc2.nframes == mfcc.nframes
    assert mfcc2.ndims == mfcc.ndims * 2
    assert mfcc2.properties != mfcc.properties
    assert mfcc2.properties['mfcc'] == mfcc.properties['mfcc']

    mfcc2 = Features(mfcc.data, mfcc.times + 1)
    with pytest.raises(ValueError) as err:
        mfcc.concatenate(mfcc2)
    assert 'times are not equal' in str(err)
Example #16
0
def test_init_bad():
    with pytest.raises(ValueError) as err:
        Features(0, 0, properties=0)
    assert 'data must be a numpy array' in str(err)

    with pytest.raises(ValueError) as err:
        Features(np.asarray([0]), 0, properties=0)
    assert 'times must be a numpy array' in str(err)

    with pytest.raises(ValueError) as err:
        Features(np.asarray([0]), np.asarray([0]), properties=0)
    assert 'properties must be a dictionnary' in str(err)

    with pytest.raises(ValueError) as err:
        Features(np.asarray([0]), np.asarray([0]), properties={0: 0})
    assert 'data dimension must be 2' in str(err)

    with pytest.raises(ValueError) as err:
        Features(np.asarray([[0], [0]]), np.random.random((2, 2, 2)))
    assert 'times dimension must be 1 or 2' in str(err)

    with pytest.raises(ValueError) as err:
        data = np.random.random((12, 2))
        data[2, 1] = np.nan
        Features(data, np.ones((12, )))
    assert 'data contains non-finite numbers' in str(err)
Example #17
0
    def process(self, alignment):
        # build a bijection token <-> onehot index
        token2index = self._token2index(alignment)

        # sample the alignment at the requested sample rate
        sampled = alignment.at_sample_rate(self.frame.sample_rate)

        # get the frames as pairs (istart:istop)
        nframes = self.frame.nframes(sampled.shape[0])
        frame_boundaries = self.frame.boundaries(nframes)

        # allocate the features data
        data = np.zeros((frame_boundaries.shape[0], len(token2index)),
                        dtype=np.bool)

        # allocate the window function
        window = shennong.features.window.window(
            self.frame.samples_per_frame,
            type=self.window_type,
            blackman_coeff=self.blackman_coeff)

        for i, (onset, offset) in enumerate(frame_boundaries):
            framed = sampled[onset:offset]
            # the frame is made of a single token, no needs to compute
            # a window function
            if np.all(framed[0] == framed[1:]):
                winner = framed[0]
            else:
                # several tokens in the frame, compute the weights

                weights = collections.defaultdict(int)
                for j, w in enumerate(window):
                    weights[framed[j]] += w

                # the winner token has the biggest weight
                winner = sorted(weights.items(),
                                key=operator.itemgetter(1),
                                reverse=True)[0][0]

            data[i, token2index[winner]] = 1

        try:
            properties = self.get_properties()
        except ValueError:  # tokens not defined
            self.tokens = token2index.keys()
            properties = self.get_properties()
            self.tokens = None
        properties[self.name].update({'token2index': token2index})

        return Features(data,
                        frame_boundaries / self.frame.sample_rate,
                        properties=properties)
Example #18
0
def test_times_1d(serializer, tmpdir):
    filename = ('feats.ark'
                if serializer is serializers.KaldiSerializer else 'feats')
    tmpfile = str(tmpdir.join(filename))

    p = MfccProcessor()
    times = p.times(10)[:, 1]
    assert times.shape == (10, )

    col = FeaturesCollection(mfcc=Features(np.random.random((10, 5)), times))

    serializer(col.__class__, tmpfile).save(col)
    col2 = serializer(col.__class__, tmpfile).load()
    assert col == col2
Example #19
0
def test_equal(mfcc):
    # same object
    assert mfcc == mfcc
    assert mfcc.is_close(mfcc)
    # same data
    mfcc2 = mfcc.copy()
    assert mfcc == mfcc2
    assert mfcc.is_close(mfcc2)
    # not same shape
    mfcc2 = mfcc.concatenate(mfcc)
    assert not mfcc == mfcc2
    assert not mfcc.is_close(mfcc2)
    # not same dtype
    mfcc64 = mfcc.copy(dtype=np.float64)
    assert not mfcc == mfcc64
    assert mfcc.is_close(mfcc64)
    # not same properties
    mfcc2 = Features(mfcc.data, mfcc.times, properties={'foo': 0})
    assert not mfcc == mfcc2
    assert not mfcc.is_close(mfcc2)
    # not same times
    mfcc2 = Features(mfcc.data, mfcc.times + 1, properties=mfcc.properties)
    assert not mfcc == mfcc2
    assert not mfcc.is_close(mfcc2)
    # not same data
    mfcc2 = Features(mfcc.data + 1, mfcc.times, properties=mfcc.properties)
    assert not mfcc == mfcc2
    assert not mfcc.is_close(mfcc2)
    # not same data but close
    mfcc2 = Features(mfcc.data + 1, mfcc.times, properties=mfcc.properties)
    assert not mfcc == mfcc2
    assert mfcc.is_close(mfcc2, atol=1)
    # not same times but close
    mfcc2 = Features(mfcc.data, mfcc.times + 1, properties=mfcc.properties)
    assert not mfcc == mfcc2
    assert not mfcc.is_close(mfcc2, atol=1)
Example #20
0
    def process(self, raw_pitch):
        """Post process a raw pitch data as specified by the options

        Parameters
        ----------
        raw_pitch : Features, shape = [n, 2]
            The pitch as extracted by the `PitchProcessor.process`
            method

        Returns
        -------
        pitch : Features, shape = [n, 1 2 3 or 4]
            The post-processed pitch usable as speech features. The
            output columns are 'pov_feature', 'normalized_log_pitch',
            delta_pitch' and 'raw_log_pitch', in that order,if their
            respective options are set to True.

        Raises
        ------
        ValueError
            If `raw_pitch` has not exactly two columns. If all the
            following options are False: 'add_pov_feature',
            'add_normalized_log_pitch', 'add_delta_pitch' and
            'add_raw_log_pitch' (at least one of them must be True).

        """
        # check at least one required option is True
        if not (self.add_pov_feature or self.add_normalized_log_pitch
                or self.add_delta_pitch or self.add_raw_log_pitch):
            raise ValueError(
                'at least one of the following options must be True: '
                'add_pov_feature, add_normalized_log_pitch, '
                'add_delta_pitch, add_raw_log_pitch')

        if raw_pitch.shape[1] != 2:
            raise ValueError(
                'data shape must be (_, 2), but it is (_, {})'
                .format(raw_pitch.shape[1]))

        data = kaldi.matrix.SubMatrix(
            kaldi.feat.pitch.process_pitch(
                self._options, kaldi.matrix.SubMatrix(raw_pitch.data))).numpy()

        return Features(
            data, raw_pitch.times, properties=self.get_properties(raw_pitch))
Example #21
0
    def process(self, signal):
        """Extracts the (NCCF, pitch) from a given speech `signal`

        Parameters
        ----------
        signal : Audio
            The speech signal on which to estimate the pitch. The
            signal's sample rate must match the sample rate specified
            in the `PitchProcessor` options.

        Returns
        -------
        raw_pitch_features : Features, shape = [nframes, 2]
            The output array has as many rows as there are frames
            (depends on the specified options `frame_shift` and
            `frame_length`), and two columns corresponding to (NCCF,
            pitch).

        Raises
        ------
        ValueError
            If the input `signal` has more than one channel (i.e. is
            not mono). If `sample_rate` != `signal.sample_rate`.

        """
        if signal.nchannels != 1:
            raise ValueError(
                'audio signal must have one channel, but it has {}'
                .format(signal.nchannels))

        if self.sample_rate != signal.sample_rate:
            raise ValueError(
                'processor and signal mismatch in sample rates: '
                '{} != {}'.format(self.sample_rate, signal.sample_rate))

        # force 16 bits integers
        signal = signal.astype(np.int16).data
        data = kaldi.matrix.SubMatrix(
            kaldi.feat.pitch.compute_kaldi_pitch(
                self._options, kaldi.matrix.SubVector(signal))).numpy()

        return Features(
            data, self.times(data.shape[0]), properties=self.get_properties())
Example #22
0
def test_copy(mfcc):
    # by copy we allocate new arrays
    mfcc2 = mfcc.copy()
    assert mfcc2 == mfcc
    assert mfcc2 is not mfcc
    assert mfcc2.data is not mfcc.data
    assert mfcc2.times is not mfcc.times
    assert mfcc2.properties is not mfcc.properties

    # by explicit construction the arrays are shared
    mfcc2 = Features(mfcc.data,
                     mfcc.times,
                     properties=mfcc.properties,
                     validate=False)
    assert mfcc2 == mfcc
    assert mfcc2 is not mfcc
    assert mfcc2.data is mfcc.data
    assert mfcc2.times is mfcc.times
    assert mfcc2.properties is mfcc.properties
Example #23
0
    def process(self, signal):
        # ensure the signal is correct
        if signal.nchannels != 1:
            raise ValueError(
                'signal must have one dimension, but it has {}'.format(
                    signal.nchannels))

        if self.sample_rate != signal.sample_rate:
            raise ValueError('processor and signal mismatch in sample rates: '
                             '{} != {}'.format(self.sample_rate,
                                               signal.sample_rate))

        # force the signal to be int16
        signal = signal.astype(np.int16)

        # extract the features
        data = self._rastaplp(signal)

        return Features(data.T.astype(np.float32),
                        self.times(data.T.shape[0]),
                        properties=self.get_properties())
Example #24
0
    def process(self, features):
        """Compute deltas on `features` with the specified options

        Parameters
        ----------
        features : Features, shape = [nframes, ncols]
            The input features on which to compute the deltas

        Returns
        -------
        deltas : Features, shape = [nframes, ncols * (`order` + 1)]
            The computed deltas with as much orders as specified. The
            output features are the concatenation of the input
            `features` and it's time derivative at each orders.

        """
        data = kaldi.matrix.SubMatrix(
            kaldi.feat.functions.compute_deltas(
                self._options, kaldi.matrix.SubMatrix(features.data))).numpy()

        return Features(data, features.times, self.get_properties(features))
Example #25
0
    def process(self, alignment):
        # build a bijection token <-> onehot index
        token2index = self._token2index(alignment)

        # initialize the data matrix with zeros, TODO should data be a
        # scipy.sparse matrix?
        data = np.zeros((alignment.tokens.shape[0], len(token2index)),
                        dtype=np.bool)

        # fill the data with onehot encoding of tokens
        for i, p in enumerate(alignment.tokens):
            data[i, token2index[p]] = 1

        try:
            properties = self.get_properties()
        except ValueError:  # tokens not defined
            self.tokens = token2index.keys()
            properties = self.get_properties()
            self.tokens = None
        properties[self.name].update({'token2index': token2index})

        return Features(data, alignment.times, properties=properties)
Example #26
0
    def process(self, signal):
        """Computes energy on the input `signal`

        Parameters
        ----------
        signal : :class:`~signal.audio.audioData`

        Returns
        -------
        energy : :class:`~shennong.features.features.Features`
            The computed - and compressed - energy

        Raises
        ------
        ValueError
            If the input `signal` has more than one channel (i.e. is
            not mono). If `sample_rate` != `signal.sample_rate`.

        """
        # ensure the signal is correct
        if signal.nchannels != 1:
            raise ValueError(
                'signal must have one dimension, but it has {}'.format(
                    signal.nchannels))

        if self.sample_rate != signal.sample_rate:
            raise ValueError('processor and signal mismatch in sample rates: '
                             '{} != {}'.format(self.sample_rate,
                                               signal.sample_rate))

        if self.raw_energy:
            old_conf = self.get_params()
            self.preemph_coeff = 0
            self.window_type = 'rectangular'

        # number of frames in the framed signal
        nframes = kaldi.feat.window.num_frames(signal.nsamples,
                                               self._frame_options,
                                               flush=True)

        # a kaldi view of the numpy signal
        signal = kaldi.matrix.SubVector(signal.data)

        # windowing function to compute frames
        window = kaldi.feat.window.FeatureWindowFunction.from_options(
            self._frame_options)

        # compression function to compress energy
        compression = self._compression_fun[self._compression]

        # pre-allocate the resulting energy
        energy = np.zeros((nframes, 1))

        # pre-allocate a buffer for the frames, extract the frames and
        # compute the energy on them
        out_frame = kaldi.matrix.Vector(self._frame_options.window_size())
        for frame in range(nframes):
            kaldi.feat.window.extract_window(0, signal, frame,
                                             self._frame_options, window,
                                             out_frame)

            # square the signal, force float64 to avoid overflow
            square = np.square(out_frame.numpy(), dtype=np.float64)

            # avoid doing log on 0 (should be avoided already by
            # dithering, but who knows...)
            energy[frame] = compression(
                max(square.sum(),
                    np.finfo(np.float64).tiny))

        if self.raw_energy:
            self.set_params(**old_conf)

        return Features(energy, self.times(nframes), self.get_properties())
Example #27
0
    def process(self, signal):
        """Computes bottleneck features on an audio `signal`

        Use a pre-trained neural network to extract bottleneck
        features. Features have a frame shift of 10 ms and frame
        length of 25 ms.

        Parameters
        ----------
        signal : Audio, shape = [nsamples, 1]
            The input audio signal to compute the features on, must be
            mono. The signal is up/down-sampled at 8 kHz during
            processing.

        Returns
        -------
        features : Features, shape = [nframes, 80]
            The computes bottleneck features will have as many rows as
            there are frames (depends on the `signal` duration, expect
            about 100 frames per second), each frame with 80
            dimensions.

        Raises
        ------
        RuntimeError
            If no speech is detected on the `signal` during the voice
            activity detection preprocessing step.

        """
        # force resampling to 8 kHz and 16 bits integers
        need_resample = (signal.sample_rate != 8000
                         or signal.dtype is not np.dtype(np.int16))

        if need_resample:
            self._log.debug('resampling audio from %dHz@%db to %dHz@%db',
                            signal.sample_rate, signal.dtype.itemsize * 8,
                            8000, 16)
            signal = signal.resample(8000).astype(np.int16)

        signal = signal.data

        # define parameters to extract mel filterbanks. Those
        # parameters cannot be tuned because the networks are trained
        # with them... frame_noverlap is the number of samples to
        # overlap in each frame, so the frame_shift is 200 - 120 = 80
        frame_length = 200
        frame_noverlap = 120
        frame_shift = frame_length - frame_noverlap

        # voice activity detection TODO implement user-provided VAD
        # (vad input format could be an instance of Alignment, or
        # simply an array of bool).
        vad = _compute_vad(signal,
                           self._log,
                           win_length=frame_length,
                           win_overlap=frame_noverlap)

        # ensure we have some voiced frames in the signal
        voiced_frames = sum(vad)
        if not voiced_frames:
            raise RuntimeError(
                'no voice detected in signal, failed to extract features')
        self._log.debug('%d frames of speech detected (on %d total frames)',
                        voiced_frames, len(vad))

        # from audio signal to mel filterbank
        signal = _add_dither(signal, self.dither)
        window = np.hamming(frame_length)
        fbank_mx = _mel_fbank_mx(window.size,
                                 8000,
                                 numchans=24,
                                 lofreq=64.0,
                                 hifreq=3800.0)
        fea = _fbank_htk(signal, window, frame_noverlap, fbank_mx)

        # center the mel features from voiced frames mean
        fea -= np.mean(fea[vad], axis=0)

        # add a global context to the mel features
        left_ctx = right_ctx = 15
        fea = np.r_[np.repeat(fea[[0]], left_ctx, axis=0), fea,
                    np.repeat(fea[[-1]], right_ctx, axis=0)]

        # compute the network output from mel features
        left_ctx_bn1 = right_ctx_bn1 = self._get_weights()['context']
        nn_input = _preprocess_nn_input(fea, left_ctx_bn1, right_ctx_bn1)
        nn_output = np.vstack(
            _create_nn_extract_st_BN(nn_input, self._get_weights(), 2)[0])

        # compute the timestamps for each output frame
        times = (1.0 / 8000) * np.vstack(
            (np.arange(nn_output.shape[0]) * frame_shift,
             np.arange(nn_output.shape[0]) * frame_shift + frame_length)).T

        # return the final bottleneck features
        return Features(nn_output, times, self.get_properties())
Example #28
0
def test_2d_times_unsorted():
    with pytest.raises(ValueError) as err:
        Features(np.random.random((10, 3)), np.random.random((10, 2)))
    assert 'times is not sorted in increasing order' in str(err)
Example #29
0
def test_collection(mfcc):
    assert FeaturesCollection._value_type is Features
    assert FeaturesCollection().is_valid()
    assert FeaturesCollection(mfcc=mfcc).is_valid()
    assert not FeaturesCollection(
        mfcc=Features(np.asarray([0]), 0, validate=False)).is_valid()
Example #30
0
def test_2d_times_badshape():
    with pytest.raises(ValueError) as err:
        Features(np.random.random((10, 3)), np.random.random((10, 3)))
    assert 'times shape[1] must be 2, it is 3' in str(err)