Beispiel #1
0
    def comp_feat(self, sig, rate):
        '''
        compute the features

        Args:
            sig: the audio signal as a 1-D numpy array
            rate: the sampling rate

        Returns:
            the features as a [seq_length x feature_dim] numpy array
        '''

        #snip the edges
        sig = snip(sig, rate, float(self.conf['winlen']),
                   float(self.conf['winstep']))

        feat, energy = base.mfcc(sig, rate, self.conf)

        if self.conf['include_energy'] == 'True':
            feat = np.append(feat, energy[:, np.newaxis], 1)

        if self.conf['dynamic'] == 'delta':
            feat = base.delta(feat)
        elif self.conf['dynamic'] == 'ddelta':
            feat = base.ddelta(feat)
        elif self.conf['dynamic'] != 'nodelta':
            raise Exception('unknown dynamic type')

        return feat
Beispiel #2
0
    def comp_feat(self, sig, rate):
        '''
        compute the features

        Args:
            sig: the audio signal as a 1-D numpy array
            rate: the sampling rate

        Returns:
            the features as a [seq_length x feature_dim] numpy array
        '''

        feat, energy = base.logfbank(sig, rate, self.conf)

        if self.conf['include_energy'] == 'True':
            feat = np.append(feat, energy[:, np.newaxis], 1)

        if self.conf['dynamic'] == 'delta':
            feat = base.delta(feat)
        elif self.conf['dynamic'] == 'ddelta':
            feat = base.ddelta(feat)
        elif self.conf['dynamic'] != 'nodelta':
            raise Exception('unknown dynamic type')

        #mean and variance normalize the features
        if self.conf['mvn'] == 'True':
            feat = (feat - feat.mean(0)) / feat.std(0)

        return feat
Beispiel #3
0
    def comp_feat(self, sig, rate):
        '''
        compute the features

        Args:
            sig: the audio signal as a 1-D numpy array
            rate: the sampling rate

        Returns:
            the features as a [seq_length x feature_dim] numpy array
        '''

        feat, energy = base.mfcc(sig, rate, self.conf)

        # write the wav to temporary location and invoke external pitch extractor.
        # make sure 'reaper' is in your $PATH
        tempdir = os.path.join('/tmp', str(os.getpid()))
        if not os.path.isdir(tempdir):
            os.makedirs(tempdir)
        name = 'mix'
        wav.write(os.path.join(tempdir, name + '.wav'), rate, np.int16(sig))
        os.system('reaper -i ' + os.path.join(tempdir, name + '.wav') +
                  ' -f ' + os.path.join(tempdir, name + '.txt -a -e 0.01'))
        pitch = np.loadtxt(os.path.join(tempdir, name + '.txt'), skiprows=7)[:,
                                                                             2]
        pitch = np.pad(pitch, (0, max(0, feat.shape[0] - pitch.shape[0])),
                       'edge')
        # linear interpolation in voiceless regions
        voiceless = np.where(pitch == -1)[0]
        jump = np.where((voiceless[1:] - voiceless[:-1]) > 1)[0]
        segments = np.split(voiceless, jump + 1)
        for seg in segments:
            idx1 = seg[0] - 1
            idx2 = seg[-1] + 1
            val1 = -1
            val2 = -1
            if idx1 >= 0:
                val1 = pitch[idx1]
            if idx2 < pitch.size:
                val2 = pitch[idx2]
            if val1 == -1:  #segment starts at utterence start
                val1 = val2
            if val2 == -1:  # segment ends at utterance end
                val2 = val1
            if val1 == -1:  #segment is the whole utterance => make up a value
                val1 = 150
                val2 = 150
            #interpolate
            pitch[seg] = (val2 - val1) * (np.array(seg) -
                                          idx1) / float(idx2 - idx1) + val1

        feat = np.append(feat, pitch[:feat.shape[0], np.newaxis], 1)

        if self.conf['include_energy'] == 'True':
            feat = np.append(feat, energy[:, np.newaxis], 1)

        if self.conf['dynamic'] == 'delta':
            feat = base.delta(feat)
        elif self.conf['dynamic'] == 'ddelta':
            feat = base.ddelta(feat)
        elif self.conf['dynamic'] != 'nodelta':
            raise Exception('unknown dynamic type')

        #mean and variance normalize the features
        if self.conf['mvn'] == 'True':
            feat = (feat - feat.mean(0)) / (
                feat.std(0) + 1e-20
            )  # features could be constant, e.g. voiceless speech

        return feat