def getCepsVect(y): ceps, mspec, spec = mfcc(y) vect_of_mccf = np.zeros(COEFS) for j in range(COEFS): vect_of_mccf[j] = RecordModule.arithmeticMean(ceps.T[j]) vect_of_mccf[0] = 0 return vect_of_mccf
def getCepsMatrixFromData(t,y): ''' funkcja wyznacz wspolczynniki MFCC z danych otrzymanych jako parametry t: czas podany jako tablica kolejnych wartosci y: sygnal podany jako tablica wartosci ''' ########## y = sigproc.preemp(y) fr, wordspower, wordszeros, wordsdetect, ITL ,ITU, word_fr, word_y = RecordModule.detectSingleWord(t,y) ################## MelFeat = mealfeat.MelFeatures() ceps_matrix = MelFeat.calcMelMatrixFeatures(word_y) return ceps_matrix
def goRecognition(): ''' funkcja nagrywa komendem, a nastepnie klasyfikuje ja do odpowiedniej klasy ''' print("please speak a word into the microphone") t, y = RecordModule.getSpeechFromMic() print("done") print("dl probki :",len(y)) # t,y = PlotModule.readWav("learn_set//wylacz//9.wav", 44100.0) predict = getCepsMatrixFromData(t, y) predClass = getClasificationDecision(predict) if(MODE == 1): SendCommandToSerialPort(predClass.name) print("done")
ylabel('|Y(freq)|') def readWav(filename, Fs): ''' funkcja czyta plik dzwiekowy ''' rate, data = read(filename) y = numpy.atleast_2d(data)[0] lungime = len(y) timp = len(y) / Fs t = linspace(0, timp, len(y)) return t, y if __name__ == '__main__': Fs = 44100.0 # sampling rate filename = "learn_set//wlacz//6.wav" t, y = readWav(filename, Fs) y = sigproc.preemp(y, 0.97) fr, wordspower, wordszeros, wordsdetect, ITL, ITU, word_fr, word_y = RecordModule.detectSingleWord( t, y) pylab.subplot(211) pylab.title(filename) pylab.plot(t, y) pylab.subplot(212) print(word_y.shape) plotSpectrum(word_y, Fs) pylab.show()
return vect_of_mccf if __name__ == '__main__': # ++++++++++++++++++++++++++++++++++++++++++++ for i in range(11): print("please speak a word into the microphone") filename = "learn_set//wlacz//"+str(i+1)+".wav" # RecordModule.record_to_file(filename) # print("done - result written to ", filename) # filename = 'learn_set//wlacz//3.wav' # ++++++++++++++++++++++++++++++++++++++++++++ t, extract = PlotModule.readWav(filename, FS) extract = RecordModule.preemp(extract) fr, wordspower, wordszeros, wordsdetect, ITL ,ITU, word_fr, word_y = RecordModule.detectSingleWord(t,extract) ceps, mspec, spec = mfcc(word_y) show_MFCC_spectrum(ceps) # show_MFCC(ceps) vect_of_mccf = np.zeros(len(ceps.T)) for i in range(len(ceps.T)): vect_of_mccf[i] = max(ceps.T[i]) # sum(data.T[i]) #
# Compute the spectrum magnitude spec = np.abs(fft(framed, nfft, axis=-1)) # Filter the spectrum through the triangle filterbank mspec = np.log10(np.dot(spec, fbank.T)) # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain) ceps = dct(mspec, type=2, norm='ortho', axis=-1)[:, :nceps] return ceps, mspec, spec def preemp(input, p): """Pre-emphasis filter.""" return lfilter([1., -p], 1, input) if __name__ == '__main__': for i in range(10): filename = "learn_set//wlacz//"+str(i+1)+".wav" RATE = 44100.0 t,y = PlotModule.readWav(filename, RATE) ceps, mspec, spec = mfcc(y) print(ceps.shape) vect_of_mccf = np.zeros(len(ceps.T)) for i in range(len(ceps.T)): vect_of_mccf[i] = RecordModule.arithmeticMean(ceps.T[i]) # sum(data.T[i]) # pylab.title("ceps : ") pylab.plot(range(len(vect_of_mccf)), vect_of_mccf, 'g') pylab.show()
mspec = np.log10(np.dot(spec, fbank.T)) # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain) ceps = dct(mspec, type=2, norm='ortho', axis=-1)[:, :nceps] return ceps, mspec, spec def preemp(input, p): """Pre-emphasis filter.""" return lfilter([1., -p], 1, input) if __name__ == '__main__': for i in range(10): filename = "learn_set//wlacz//" + str(i + 1) + ".wav" RATE = 44100.0 t, y = PlotModule.readWav(filename, RATE) ceps, mspec, spec = mfcc(y) print(ceps.shape) vect_of_mccf = np.zeros(len(ceps.T)) for i in range(len(ceps.T)): vect_of_mccf[i] = RecordModule.arithmeticMean( ceps.T[i]) # sum(data.T[i]) # pylab.title("ceps : ") pylab.plot(range(len(vect_of_mccf)), vect_of_mccf, 'g') pylab.show()
plot(frq,abs(Y),'r') # plotting the spectrum xlabel('Freq (Hz)') ylabel('|Y(freq)|') def readWav(filename, Fs): ''' funkcja czyta plik dzwiekowy ''' rate,data=read(filename) y = numpy.atleast_2d(data)[0] lungime=len(y) timp=len(y)/Fs t=linspace(0,timp,len(y)) return t, y if __name__ == '__main__': Fs = 44100.0; # sampling rate filename = "learn_set//wlacz//6.wav" t,y = readWav(filename, Fs) y = sigproc.preemp(y,0.97) fr, wordspower, wordszeros, wordsdetect, ITL ,ITU, word_fr, word_y = RecordModule.detectSingleWord(t,y) pylab.subplot(211) pylab.title(filename) pylab.plot(t, y) pylab.subplot(212) print(word_y.shape) plotSpectrum(word_y,Fs) pylab.show()