예제 #1
0
    def _extractFeatures_(self):
        print('Extraction des features ...')
        mfcc_feat = []
        filter_feat = []
        spectrum = []

        for i in range(len(self.rate)):
            mfcc_feat.append(
                mfcc(self.data[i], self.rate[i], self.winlen, self.winstep,
                     self.numcep, self.nfilt, self.nfft, self.lowfreq,
                     self.highfreq))
            filterFeat, energy, spectrum0 = fbank(self.data[i], self.rate[i],
                                                  self.winlen, self.winstep,
                                                  self.nfilt, self.nfft,
                                                  self.lowfreq, self.highfreq)
            filterFeat = 10 * np.log10(filterFeat)
            #filterFeat -= (numpy.mean(filterFeat, axis=0) + 1e-8)
            #filterFeat /=numpy.std(filterFeat, axis=0)
            spectrum0 = 10 * np.log10(spectrum0)

            #spectrum0 -= (numpy.mean(spectrum0, axis=0) + 1e-8)
            #spectrum0 /=numpy.std(spectrum0, axis=0)

            filter_feat.append(filterFeat)
            spectrum.append(spectrum0)
        self.features = {
            'mfcc': mfcc_feat,
            'filter': filter_feat,
            'spectrum': spectrum
        }
        print('Extraction des features terminée ...')
예제 #2
0
    def comp_feat(self, sig, rate):
        '''
        compute the features

        Args:
            sig: the audio signal as a 1-D numpy array
            rate: the sampling rate

        Returns:
            the features as a [seq_length x feature_dim] numpy array
        '''

        #snip the edges
        sig = snip(sig, rate, float(self.conf['winlen']),
                   float(self.conf['winstep']))

        feat, energy = base.mfcc(sig, rate, self.conf)

        if self.conf['include_energy'] == 'True':
            feat = np.append(feat, energy[:, np.newaxis], 1)

        if self.conf['dynamic'] == 'delta':
            feat = base.delta(feat)
        elif self.conf['dynamic'] == 'ddelta':
            feat = base.ddelta(feat)
        elif self.conf['dynamic'] != 'nodelta':
            raise Exception('unknown dynamic type')

        return feat
예제 #3
0
파일: mfcc.py 프로젝트: qmeeus/assist
    def comp_feat(self, sig, rate):
        '''
        compute the features

        Args:
            sig: the audio signal as a 1-D numpy array
            rate: the sampling rate

        Returns:
            the features as a [seq_length x feature_dim] numpy array
        '''

        feat, energy = base.mfcc(sig, rate, self.conf)

        if self.conf['include_energy'] == 'True':
            feat = np.append(feat, energy[:, np.newaxis], 1)

        if self.conf['dynamic'] == 'delta':
            feat = base.delta(feat)
        elif self.conf['dynamic'] == 'ddelta':
            feat = base.ddelta(feat)
        elif self.conf['dynamic'] != 'nodelta':
            raise Exception('unknown dynamic type')

        #mean and variance normalize the features
        if self.conf['mvn'] == 'True':
            feat = (feat - feat.mean(0)) / feat.std(0)

        return feat
예제 #4
0
def extractFeatures(input_signal):
    """extract features from the cleaned signal.
    
    :param cleaned signal
    :return: features list"""
    
    # compute mfcc list
    if len(input_signal) == 0:
        print("cleaned signal is empty")
        return input_signal
    
    mfcc_list = np.array(mfcc(input_signal, samplerate=prm.params["sample_rate"].get(), winlen=0.032, winstep=0.016, numcep=30,
          nfilt=55, nfft=2048, lowfreq=0, highfreq=6000, preemph=0.95, ceplifter=22, appendEnergy=True )) 
    extractor = LPCExtractor(prm.params["sample_rate"].get(), 32, 16, 30, 0.95)
    lpcc = extractor.extract(input_signal)
    
    pitch = extract_pitch(input_signal)
    
    # Cepstral Mean Normalization @TODO: WHY IS THIS NOT HELPING??
    if 0:
        mean_mfcc = np.mean(mfcc_list.T, 1)
        std_mfcc = np.std(mfcc_list.T, 1)
        for i in range(len(mfcc_list)):
            for j in range(len(mfcc_list[i])):
                mfcc_list[i][j] = (mfcc_list[i][j]-mean_mfcc[j])/std_mfcc[j]
 
#     print np.shape(mfcc_list[i-1]), np.shape(mean_mfcc), np.shape(std_mfcc)
    N = 2
    delta_list = delta(mfcc_list, N)
    ddelta_list = delta(delta_list, N)
    
    # do not keep first coeff (energy)
    features_list = list()
    for k in range(len(mfcc_list)):
#         features_list += [np.hstack((mfcc_list[k][0:], lpcc[k][0:]))]
        features_list += [mfcc_list[k][0:]]
#         features_list += [lpcc[k][0:]]
#         features_list += [np.hstack((mfcc_list[k][0:], delta_list[k][0:], ddelta_list[k][0:]))]
        
#     print np.shape(mfcc_list), np.shape(features_list)
    
    # dont return nan 
    # @TODO WHY DOES THIS HAPPEN?
    for row in features_list:
        for cell in row:
            if cell != cell:
                print "Cell is nan (see feature extraction):", str(cell)
                return []
    
    full_features_list = []
    full_features_list = list(np.ravel(features_list))
    full_features_list.extend([pitch]*30)         # do we need to append this multiple times to ensure that the forest selects it?
    
    return full_features_list
예제 #5
0
def mfcc_features(sig):
    features = base.mfcc(sig,
                         samplerate=44100,
                         winlen=0.02,
                         winstep=0.01,
                         numcep=13,
                         nfilt=40)

    # Mean Normalization for feature vectors.
    mean_vector = np.mean(features, axis=0)
    normalized = features - mean_vector
    return normalized
liste_fe_echant = []
liste_classe = []
liste_dsp = []
matrice = []

for NomFichier in liste_nom_fichier_apprentissage:
    (Fe, Echantillons) = scipy.io.wavfile.read("Signaux/" + NomFichier)
    liste_fe_echant.append((Fe, Echantillons))
    NumerClasse = Prefixe.index(NomFichier[0:2])
    liste_classe.append(NumerClasse)
    dsp = np.abs(np.fft.fft(Echantillons))
    liste_dsp.append(dsp)
    VecteurCoefficients = base.mfcc(Echantillons,
                                    samplerate=Fe,
                                    winlen=(len(Echantillons) / Fe),
                                    winstep=(len(Echantillons) / Fe),
                                    nfft=1024)
    matrice.append(VecteurCoefficients[0])

# Affichage représentation temporelle et frequentielle (3 fichiers aléatoires)

nb1 = random.randint(0, len(liste_fe_echant))
nb2 = random.randint(0, len(liste_fe_echant))
nb3 = random.randint(0, len(liste_fe_echant))
plt.subplot(321)
plt.plot(liste_fe_echant[nb1][1], "r")
plt.title("Représentation temporelle de " + Prefixe[liste_classe[nb1]])
plt.subplot(322)
plt.plot(liste_dsp[nb1], "r")
plt.title("Représentation frequentielle de " + Prefixe[liste_classe[nb1]])
예제 #7
0
    def comp_feat(self, sig, rate):
        '''
        compute the features

        Args:
            sig: the audio signal as a 1-D numpy array
            rate: the sampling rate

        Returns:
            the features as a [seq_length x feature_dim] numpy array
        '''

        feat, energy = base.mfcc(sig, rate, self.conf)

        # write the wav to temporary location and invoke external pitch extractor.
        # make sure 'reaper' is in your $PATH
        tempdir = os.path.join('/tmp', str(os.getpid()))
        if not os.path.isdir(tempdir):
            os.makedirs(tempdir)
        name = 'mix'
        wav.write(os.path.join(tempdir, name + '.wav'), rate, np.int16(sig))
        os.system('reaper -i ' + os.path.join(tempdir, name + '.wav') +
                  ' -f ' + os.path.join(tempdir, name + '.txt -a -e 0.01'))
        pitch = np.loadtxt(os.path.join(tempdir, name + '.txt'), skiprows=7)[:,
                                                                             2]
        pitch = np.pad(pitch, (0, max(0, feat.shape[0] - pitch.shape[0])),
                       'edge')
        # linear interpolation in voiceless regions
        voiceless = np.where(pitch == -1)[0]
        jump = np.where((voiceless[1:] - voiceless[:-1]) > 1)[0]
        segments = np.split(voiceless, jump + 1)
        for seg in segments:
            idx1 = seg[0] - 1
            idx2 = seg[-1] + 1
            val1 = -1
            val2 = -1
            if idx1 >= 0:
                val1 = pitch[idx1]
            if idx2 < pitch.size:
                val2 = pitch[idx2]
            if val1 == -1:  #segment starts at utterence start
                val1 = val2
            if val2 == -1:  # segment ends at utterance end
                val2 = val1
            if val1 == -1:  #segment is the whole utterance => make up a value
                val1 = 150
                val2 = 150
            #interpolate
            pitch[seg] = (val2 - val1) * (np.array(seg) -
                                          idx1) / float(idx2 - idx1) + val1

        feat = np.append(feat, pitch[:feat.shape[0], np.newaxis], 1)

        if self.conf['include_energy'] == 'True':
            feat = np.append(feat, energy[:, np.newaxis], 1)

        if self.conf['dynamic'] == 'delta':
            feat = base.delta(feat)
        elif self.conf['dynamic'] == 'ddelta':
            feat = base.ddelta(feat)
        elif self.conf['dynamic'] != 'nodelta':
            raise Exception('unknown dynamic type')

        #mean and variance normalize the features
        if self.conf['mvn'] == 'True':
            feat = (feat - feat.mean(0)) / (
                feat.std(0) + 1e-20
            )  # features could be constant, e.g. voiceless speech

        return feat
예제 #8
0
파일: example.py 프로젝트: mabaochang/delta
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

#!/usr/bin/env python
''' Example for sigproc.py '''

# pylint: skip-file

import scipy.io.wavfile as wav

from base import mfcc
from base import delta
from base import logfbank

if __name__ == '__main__':
    (rate, sig) = wav.read("english.wav")
    mfcc_feat = mfcc(sig, rate)
    d_mfcc_feat = delta(mfcc_feat, 2)
    fbank_feat = logfbank(sig, rate)

    print(fbank_feat[1:3, :])
예제 #9
0
파일: MFCC.py 프로젝트: nolanlad/SAX
for f in batch1_fns:
    # Get set of signals from 1 experiment with the highest value per channel
    v1, v2, ev = read_ae_file2(f)
    sig = []
    for i in range(len(v1)):
        sig.append(max_sig(v1[i], v2[i]))
    sig = np.array(sig)

    # jank code that converts raw signal to vector of mfcc
    holder = []
    for i in range(len(sig)):
        holder.append(
            base.mfcc(sig[i],
                      samplerate=rate,
                      winlen=window,
                      winstep=ratio,
                      lowfreq=300000,
                      highfreq=1800000))

    X = []
    for i in range(len(sig)):
        X.append(holder[i][0])
    '''
    CLUSTER STATISTICS ROUTINE
    '''
    silh = np.array([])  # holder arrays
    db_score = np.array([])

    # Cluster and get stat
    for i in range(min_cluster, max_cluster + 1):
        kmeans = KMeans(n_clusters=i, n_init=100, tol=1e-6).fit(X)