コード例 #1
0
def getCepsMatrixFromData(t,y):
    '''
    funkcja wyznacz wspolczynniki MFCC z danych otrzymanych jako parametry
    t: czas podany jako tablica kolejnych wartosci
    y: sygnal podany jako tablica wartosci
    '''
    ##########
    y = sigproc.preemp(y)
    fr, wordspower, wordszeros, wordsdetect, ITL ,ITU,  word_fr, word_y = RecordModule.detectSingleWord(t,y)
    ##################
    MelFeat = mealfeat.MelFeatures()
    ceps_matrix    = MelFeat.calcMelMatrixFeatures(word_y)

    return ceps_matrix
コード例 #2
0
    def calcMelMatrixFeatures(self, data):
        ''' funkcja oblicza i zwraca macierz wspolczynnikow MFCC '''
        x = sigproc.preemp(data, self.a)  #self.preemph(data,self.a)

        #       fr, wordspower, wordszeros, wordsdetect, ITL ,ITU,  word_fr, word_y  = RecordModule.detectSingleWord(range(len(x)), x)
        #       x = word_y
        self.fs = 44100.0

        outTuple = self.stft(x, self.t1, self.t2, self.fs)

        X = outTuple[0]
        nfft = outTuple[1]
        numWindows = outTuple[2]

        wts = self.filtbank(self.numFilts, self.minfrq, self.maxfrq,
                            self.width, nfft)

        Xp = pow(X, 2)
        wts = pow(wts.transpose(), 2)
        P = np.dot(wts, Xp)

        Q = np.log(P)
        C = self.dct(Q, self.numcepBasic)

        C_cmn = self.cmn(C)
        R_cmn = self.idct(C_cmn, 128)  #second parameter is length of iDCT

        d1 = self.deltas(C_cmn, self.del_w)
        d2 = self.deltas(d1, self.dbl_del_w)

        C_out = np.zeros((3 * self.numcepBasic, numWindows))

        C_out[0:self.numcepBasic, :] = C_cmn
        C_out[self.numcepBasic:2 * self.numcepBasic] = d1
        C_out[2 * self.numcepBasic:3 * self.numcepBasic] = d2
        #       C_out[0,:] = 0
        #       C_out = C_out[:,0:self.numcepsBands]

        #     usrednienie wartosci spektrum dla danych wspolcz. w danm przedziale czasu
        sizeBand = int(len(C_out.T) / self.numcepsBands)
        if (sizeBand == 0):
            sizeBand = 1
        lpBand = 0
        C_out2 = [[0 for x in range(self.numcepsBands)]
                  for y in range(self.numallceps)]

        if (False):  #srednia z przedzialu
            for i in range(self.numallceps):
                lpBand = 0
                for j in range(len(C_out.T)):
                    C_out2[i][lpBand] += C_out[i][j]
                    if (j % sizeBand == (sizeBand - 1)):
                        C_out2[i][lpBand] = C_out2[i][lpBand] / sizeBand
                        #                   print("b:",lpBand)
                        if (lpBand < self.numcepsBands - 1):
                            lpBand += 1

        if (True):  #amplituda z przedzialu
            for kk in range(self.numallceps):
                for ll in range(self.numcepsBands):
                    #                print('len',sizeBand,'ff',ll*sizeBand,ll*sizeBand+sizeBand)
                    amplMax = max(C_out[kk][ll * sizeBand:ll * sizeBand +
                                            sizeBand])
                    amplMin = min(C_out[kk][ll * sizeBand:ll * sizeBand +
                                            sizeBand])
                    if amplMax > abs(amplMin):
                        C_out2[kk][ll] = amplMax
                    else:
                        C_out2[kk][ll] = amplMin

        return C_out2
コード例 #3
0
    ylabel('|Y(freq)|')


def readWav(filename, Fs):
    '''
 funkcja czyta plik dzwiekowy
 '''
    rate, data = read(filename)
    y = numpy.atleast_2d(data)[0]
    lungime = len(y)
    timp = len(y) / Fs
    t = linspace(0, timp, len(y))
    return t, y


if __name__ == '__main__':
    Fs = 44100.0
    # sampling rate
    filename = "learn_set//wlacz//6.wav"
    t, y = readWav(filename, Fs)
    y = sigproc.preemp(y, 0.97)
    fr, wordspower, wordszeros, wordsdetect, ITL, ITU, word_fr, word_y = RecordModule.detectSingleWord(
        t, y)

    pylab.subplot(211)
    pylab.title(filename)
    pylab.plot(t, y)
    pylab.subplot(212)
    print(word_y.shape)
    plotSpectrum(word_y, Fs)
    pylab.show()
コード例 #4
0
    

    
if __name__ == '__main__':
    print("please speak a word into the microphone")
#     filename = "learn_set//wlacz//"+str( 12 )+".wav"   
    filename = 'test.wav' 
#     record_to_file(filename)
#     print("done - result written to ", filename)
#     t,y = PlotModule.readWav(filename, RATE)
    
    t,y = getSpeechFromMic()
    print("done")
    
#     t,y = getSpeechFromMic()
    y = sigproc.preemp(y,0.97)
    fr, wordspower, wordszeros, wordsdetect, ITL ,ITU,  word_fr, word_y = detectSingleWord(t,y)
     
 
    pylab.subplot(611)
    pylab.title(filename) 
    pylab.plot(t, y, 'b')

    pylab.subplot(612)
    pylab.title('sygnal mocy komendy ') 
    pylab.plot(fr, (wordspower), 'r')
 
     
    arrITL = array('f', range(len(wordspower)) )
    for i in range(len(wordspower)):
        arrITL[i] = ITL
コード例 #5
0
  def calcMelMatrixFeatures(self, data):
      ''' funkcja oblicza i zwraca macierz wspolczynnikow MFCC '''
      x = sigproc.preemp(data, self.a)#self.preemph(data,self.a)
      
#       fr, wordspower, wordszeros, wordsdetect, ITL ,ITU,  word_fr, word_y  = RecordModule.detectSingleWord(range(len(x)), x)
#       x = word_y
      self.fs = 44100.0
      
      outTuple = self.stft(x,self.t1,self.t2,self.fs)
      
      X          = outTuple[0]
      nfft       = outTuple[1]
      numWindows = outTuple[2]
      
      wts = self.filtbank(self.numFilts, self.minfrq, self.maxfrq, 
            self.width, nfft)
      
      Xp  = pow(X,2)
      wts = pow(wts.transpose(),2)
      P   = np.dot(wts,Xp)
      
      Q = np.log(P);
      C = self.dct(Q,self.numcepBasic)
      
      C_cmn = self.cmn(C);
      R_cmn = self.idct(C_cmn,128) #second parameter is length of iDCT 
      
      d1 = self.deltas(C_cmn,self.del_w)
      d2 = self.deltas(d1,self.dbl_del_w)
      
      C_out = np.zeros((3*self.numcepBasic,numWindows))
      
      C_out[0:self.numcepBasic,:]             = C_cmn
      C_out[self.numcepBasic:2*self.numcepBasic]   = d1
      C_out[2*self.numcepBasic:3*self.numcepBasic] = d2
#       C_out[0,:] = 0
#       C_out = C_out[:,0:self.numcepsBands]
      

#     usrednienie wartosci spektrum dla danych wspolcz. w danm przedziale czasu
      sizeBand = int(len(C_out.T) / self.numcepsBands)
      if(sizeBand==0):
          sizeBand=1
      lpBand = 0
      C_out2 = [[0 for x in range(self.numcepsBands)] for y in range(self.numallceps)] 

      if(False): #srednia z przedzialu
        for i in range(self.numallceps):
            lpBand = 0
            for j in range(len(C_out.T)):
                C_out2[i][lpBand] += C_out[i][j]
                if (j%sizeBand == (sizeBand-1)):
                    C_out2[i][lpBand] = C_out2[i][lpBand]/sizeBand
#                   print("b:",lpBand)
                    if(lpBand<self.numcepsBands-1):
                        lpBand+=1

      if(True): #amplituda z przedzialu
          for kk in range(self.numallceps):
            for ll in range(self.numcepsBands) :
#                print('len',sizeBand,'ff',ll*sizeBand,ll*sizeBand+sizeBand) 
               amplMax = max(C_out[kk][ll*sizeBand:ll*sizeBand+sizeBand])
               amplMin = min(C_out[kk][ll*sizeBand:ll*sizeBand+sizeBand])
               if amplMax > abs(amplMin) :
                    C_out2[kk][ll] = amplMax
               else :
                    C_out2[kk][ll] = amplMin
      
      return C_out2