def getCepsMatrixFromData(t,y): ''' funkcja wyznacz wspolczynniki MFCC z danych otrzymanych jako parametry t: czas podany jako tablica kolejnych wartosci y: sygnal podany jako tablica wartosci ''' ########## y = sigproc.preemp(y) fr, wordspower, wordszeros, wordsdetect, ITL ,ITU, word_fr, word_y = RecordModule.detectSingleWord(t,y) ################## MelFeat = mealfeat.MelFeatures() ceps_matrix = MelFeat.calcMelMatrixFeatures(word_y) return ceps_matrix
def calcMelMatrixFeatures(self, data): ''' funkcja oblicza i zwraca macierz wspolczynnikow MFCC ''' x = sigproc.preemp(data, self.a) #self.preemph(data,self.a) # fr, wordspower, wordszeros, wordsdetect, ITL ,ITU, word_fr, word_y = RecordModule.detectSingleWord(range(len(x)), x) # x = word_y self.fs = 44100.0 outTuple = self.stft(x, self.t1, self.t2, self.fs) X = outTuple[0] nfft = outTuple[1] numWindows = outTuple[2] wts = self.filtbank(self.numFilts, self.minfrq, self.maxfrq, self.width, nfft) Xp = pow(X, 2) wts = pow(wts.transpose(), 2) P = np.dot(wts, Xp) Q = np.log(P) C = self.dct(Q, self.numcepBasic) C_cmn = self.cmn(C) R_cmn = self.idct(C_cmn, 128) #second parameter is length of iDCT d1 = self.deltas(C_cmn, self.del_w) d2 = self.deltas(d1, self.dbl_del_w) C_out = np.zeros((3 * self.numcepBasic, numWindows)) C_out[0:self.numcepBasic, :] = C_cmn C_out[self.numcepBasic:2 * self.numcepBasic] = d1 C_out[2 * self.numcepBasic:3 * self.numcepBasic] = d2 # C_out[0,:] = 0 # C_out = C_out[:,0:self.numcepsBands] # usrednienie wartosci spektrum dla danych wspolcz. w danm przedziale czasu sizeBand = int(len(C_out.T) / self.numcepsBands) if (sizeBand == 0): sizeBand = 1 lpBand = 0 C_out2 = [[0 for x in range(self.numcepsBands)] for y in range(self.numallceps)] if (False): #srednia z przedzialu for i in range(self.numallceps): lpBand = 0 for j in range(len(C_out.T)): C_out2[i][lpBand] += C_out[i][j] if (j % sizeBand == (sizeBand - 1)): C_out2[i][lpBand] = C_out2[i][lpBand] / sizeBand # print("b:",lpBand) if (lpBand < self.numcepsBands - 1): lpBand += 1 if (True): #amplituda z przedzialu for kk in range(self.numallceps): for ll in range(self.numcepsBands): # print('len',sizeBand,'ff',ll*sizeBand,ll*sizeBand+sizeBand) amplMax = max(C_out[kk][ll * sizeBand:ll * sizeBand + sizeBand]) amplMin = min(C_out[kk][ll * sizeBand:ll * sizeBand + sizeBand]) if amplMax > abs(amplMin): C_out2[kk][ll] = amplMax else: C_out2[kk][ll] = amplMin return C_out2
ylabel('|Y(freq)|') def readWav(filename, Fs): ''' funkcja czyta plik dzwiekowy ''' rate, data = read(filename) y = numpy.atleast_2d(data)[0] lungime = len(y) timp = len(y) / Fs t = linspace(0, timp, len(y)) return t, y if __name__ == '__main__': Fs = 44100.0 # sampling rate filename = "learn_set//wlacz//6.wav" t, y = readWav(filename, Fs) y = sigproc.preemp(y, 0.97) fr, wordspower, wordszeros, wordsdetect, ITL, ITU, word_fr, word_y = RecordModule.detectSingleWord( t, y) pylab.subplot(211) pylab.title(filename) pylab.plot(t, y) pylab.subplot(212) print(word_y.shape) plotSpectrum(word_y, Fs) pylab.show()
if __name__ == '__main__': print("please speak a word into the microphone") # filename = "learn_set//wlacz//"+str( 12 )+".wav" filename = 'test.wav' # record_to_file(filename) # print("done - result written to ", filename) # t,y = PlotModule.readWav(filename, RATE) t,y = getSpeechFromMic() print("done") # t,y = getSpeechFromMic() y = sigproc.preemp(y,0.97) fr, wordspower, wordszeros, wordsdetect, ITL ,ITU, word_fr, word_y = detectSingleWord(t,y) pylab.subplot(611) pylab.title(filename) pylab.plot(t, y, 'b') pylab.subplot(612) pylab.title('sygnal mocy komendy ') pylab.plot(fr, (wordspower), 'r') arrITL = array('f', range(len(wordspower)) ) for i in range(len(wordspower)): arrITL[i] = ITL
def calcMelMatrixFeatures(self, data): ''' funkcja oblicza i zwraca macierz wspolczynnikow MFCC ''' x = sigproc.preemp(data, self.a)#self.preemph(data,self.a) # fr, wordspower, wordszeros, wordsdetect, ITL ,ITU, word_fr, word_y = RecordModule.detectSingleWord(range(len(x)), x) # x = word_y self.fs = 44100.0 outTuple = self.stft(x,self.t1,self.t2,self.fs) X = outTuple[0] nfft = outTuple[1] numWindows = outTuple[2] wts = self.filtbank(self.numFilts, self.minfrq, self.maxfrq, self.width, nfft) Xp = pow(X,2) wts = pow(wts.transpose(),2) P = np.dot(wts,Xp) Q = np.log(P); C = self.dct(Q,self.numcepBasic) C_cmn = self.cmn(C); R_cmn = self.idct(C_cmn,128) #second parameter is length of iDCT d1 = self.deltas(C_cmn,self.del_w) d2 = self.deltas(d1,self.dbl_del_w) C_out = np.zeros((3*self.numcepBasic,numWindows)) C_out[0:self.numcepBasic,:] = C_cmn C_out[self.numcepBasic:2*self.numcepBasic] = d1 C_out[2*self.numcepBasic:3*self.numcepBasic] = d2 # C_out[0,:] = 0 # C_out = C_out[:,0:self.numcepsBands] # usrednienie wartosci spektrum dla danych wspolcz. w danm przedziale czasu sizeBand = int(len(C_out.T) / self.numcepsBands) if(sizeBand==0): sizeBand=1 lpBand = 0 C_out2 = [[0 for x in range(self.numcepsBands)] for y in range(self.numallceps)] if(False): #srednia z przedzialu for i in range(self.numallceps): lpBand = 0 for j in range(len(C_out.T)): C_out2[i][lpBand] += C_out[i][j] if (j%sizeBand == (sizeBand-1)): C_out2[i][lpBand] = C_out2[i][lpBand]/sizeBand # print("b:",lpBand) if(lpBand<self.numcepsBands-1): lpBand+=1 if(True): #amplituda z przedzialu for kk in range(self.numallceps): for ll in range(self.numcepsBands) : # print('len',sizeBand,'ff',ll*sizeBand,ll*sizeBand+sizeBand) amplMax = max(C_out[kk][ll*sizeBand:ll*sizeBand+sizeBand]) amplMin = min(C_out[kk][ll*sizeBand:ll*sizeBand+sizeBand]) if amplMax > abs(amplMin) : C_out2[kk][ll] = amplMax else : C_out2[kk][ll] = amplMin return C_out2