def training(nfiltbank, orderLPC): nSpeaker = 2 nCentroid = 16 codebooks_mfcc = np.empty((nSpeaker, nfiltbank, nCentroid)) codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid)) directory = os.getcwd() + '/train' fname = str() for i in range(nSpeaker): fname = '/speaker' + str(i + 1) + '.wav' print('Now speaker ', str(i + 1), 'features are being trained') (fs, s) = read(directory + fname) # read the wav file specified as first command line arg s = wav_to_floats(directory + fname) s = s[:48000] sd.play(s, fs) mel_coeff = mfcc(s, fs, nfiltbank) lpc_coeff = lpc(s, fs, orderLPC) codebooks_mfcc[i, :, :] = lbg(mel_coeff, nCentroid) codebooks_lpc[i, :, :] = lbg(lpc_coeff, nCentroid) codebooks = np.empty((2, nfiltbank, nCentroid)) mel_coeff = np.empty((2, nfiltbank, 68)) for i in range(2): fname = '/speaker' + str(i + 1) + '.wav' (fs, s) = read(directory + fname) s = s[:48000] mel_coeff[i, :, :] = mfcc(s, fs, nfiltbank)[:, 0:68] codebooks[i, :, :] = lbg(mel_coeff[i, :, :], nCentroid) plt.figure(nSpeaker + 1) s1 = plt.scatter(mel_coeff[0, 6, :], mel_coeff[0, 4, :], s=100, color='r', marker='o') c1 = plt.scatter(codebooks[0, 6, :], codebooks[0, 4, :], s=100, color='r', marker='+') s2 = plt.scatter(mel_coeff[1, 6, :], mel_coeff[1, 4, :], s=100, color='b', marker='o') c2 = plt.scatter(codebooks[1, 6, :], codebooks[1, 4, :], s=100, color='b', marker='+') plt.grid() plt.legend((s1, s2, c1, c2), ('Child', 'Parent', 'Child centroids', 'Parent centroids'), scatterpoints=1, loc='upper left') plt.show() return (codebooks_mfcc, codebooks_lpc)
def training(nfiltbank, orderLPC): #Get directory and list of *.wav files directory = os.getcwd() + '/train_all_speakers' wave_files = [f for f in os.listdir(directory)] nSpeaker = len(wave_files) nCentroid = 32 # original is 16 codebooks_mfcc = np.empty((nSpeaker, nfiltbank, nCentroid)) codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid)) for i, wave_file in enumerate(wave_files): fname = '/' + wave_file print 'Speaker [' + str( i) + '] File:' + wave_file + ' Training features...' (fs, s) = read(directory + fname) mel_coeff = mfcc_p(s, fs) mel_coeff = mel_coeff.transpose() mel_coeff[0, :] = np.zeros(mel_coeff.shape[1]) lpc_coeff = lpc(s, fs, orderLPC) codebooks_mfcc[i, :, :] = lbg(mel_coeff, nCentroid) codebooks_lpc[i, :, :] = lbg(lpc_coeff, nCentroid) print('Training finished\n') return (codebooks_mfcc, codebooks_lpc)
def training(nfiltbank, orderLPC): nSpeaker = 8 nCentroid = 16 codebooks_mfcc = np.empty((nSpeaker,nfiltbank,nCentroid)) codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid)) directory = os.getcwd() + '/train'; fname = str() for i in range(nSpeaker): fname = '/s' + str(i+1) + '.wav' print('Now speaker ', str(i+1), 'features are being trained' ) (fs,s) = read(directory + fname) mel_coeff = mfcc(s, fs, nfiltbank) lpc_coeff = lpc(s, fs, orderLPC) codebooks_mfcc[i,:,:] = lbg(mel_coeff, nCentroid) codebooks_lpc[i,:,:] = lbg(lpc_coeff, nCentroid) plt.figure(i) plt.title('Codebook for speaker ' + str(i+1) + ' with ' + str(nCentroid) + ' centroids') for j in range(nCentroid): plt.subplot(211) plt.stem(codebooks_mfcc[i,:,j]) plt.ylabel('MFCC') plt.subplot(212) markerline, stemlines, baseline = plt.stem(codebooks_lpc[i,:,j]) plt.setp(markerline,'markerfacecolor','r') plt.setp(baseline,'color', 'k') plt.ylabel('LPC') plt.axis(ymin = -1, ymax = 1) plt.xlabel('Number of features') plt.show() print('Training complete') #plotting 5th and 6th dimension MFCC features on a 2D plane #comment lines 54 to 71 if you don't want to see codebook codebooks = np.empty((2, nfiltbank, nCentroid)) mel_coeff = np.empty((2, nfiltbank, 68)) for i in range(2): fname = '/s' + str(i+2) + '.wav' (fs,s) = read(directory + fname) mel_coeff[i,:,:] = mfcc(s, fs, nfiltbank)[:,0:68] codebooks[i,:,:] = lbg(mel_coeff[i,:,:], nCentroid) plt.figure(nSpeaker + 1) s1 = plt.scatter(mel_coeff[0,6,:], mel_coeff[0,4,:],s = 100, color = 'r', marker = 'o') c1 = plt.scatter(codebooks[0,6,:], codebooks[0,4,:], s = 100, color = 'r', marker = '+') s2 = plt.scatter(mel_coeff[1,6,:], mel_coeff[1,4,:],s = 100, color = 'b', marker = 'o') c2 = plt.scatter(codebooks[1,6,:], codebooks[1,4,:], s = 100, color = 'b', marker = '+') plt.grid() plt.legend((s1, s2, c1, c2), ('Sp1','Sp2','Sp1 centroids', 'Sp2 centroids'), scatterpoints = 1, loc = 'upper left') plt.show() return (codebooks_mfcc, codebooks_lpc)
def training(nfiltbank, orderLPC): nSpeaker = 8 nCentroid = 16 codebooks_mfcc = np.empty((nSpeaker,nfiltbank,nCentroid)) codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid)) directory = os.getcwd() + '/train'; fname = str() print('Speakers features are being trained.Please wait....') for i in range(nSpeaker): fname = '/s' + str(i+1) + '.wav' #print('Now speaker ', str(i+1), 'features are being trained' ) (fs,s) = read(directory + fname) mel_coeff = mfcc(s, fs, nfiltbank) lpc_coeff = lpc(s, fs, orderLPC) codebooks_mfcc[i,:,:] = lbg(mel_coeff, nCentroid) codebooks_lpc[i,:,:] = lbg(lpc_coeff, nCentroid) plt.figure(i) plt.title('Codebook for speaker ' + str(i+1) + ' with ' + str(nCentroid) + ' centroids') for j in range(nCentroid): plt.subplot(211) plt.stem(codebooks_mfcc[i,:,j]) plt.ylabel('MFCC') plt.subplot(212) markerline, stemlines, baseline = plt.stem(codebooks_lpc[i,:,j]) plt.setp(markerline,'markerfacecolor','r') plt.setp(baseline,'color', 'k') plt.ylabel('LPC') plt.axis(ymin = -1, ymax = 1) plt.xlabel('Number of features') plt.show() print('Training complete') #plotting 5th and 6th dimension MFCC features on a 2D plane #comment lines 54 to 71 if you don't want to see codebook codebooks = np.empty((2, nfiltbank, nCentroid)) mel_coeff = np.empty((2, nfiltbank, 68)) for i in range(2): fname = '/s' + str(i+2) + '.wav' (fs,s) = read(directory + fname) mel_coeff[i,:,:] = mfcc(s, fs, nfiltbank)[:,0:68] codebooks[i,:,:] = lbg(mel_coeff[i,:,:], nCentroid) plt.figure(nSpeaker + 1) s1 = plt.scatter(mel_coeff[0,6,:], mel_coeff[0,4,:],s = 100, color = 'r', marker = 'o') c1 = plt.scatter(codebooks[0,6,:], codebooks[0,4,:], s = 100, color = 'r', marker = '+') s2 = plt.scatter(mel_coeff[1,6,:], mel_coeff[1,4,:],s = 100, color = 'b', marker = 'o') c2 = plt.scatter(codebooks[1,6,:], codebooks[1,4,:], s = 100, color = 'b', marker = '+') plt.grid() plt.legend((s1, s2, c1, c2), ('Sp1','Sp2','Sp1 centroids', 'Sp2 centroids'), scatterpoints = 1, loc = 'upper left') plt.show() return (codebooks_mfcc, codebooks_lpc)
def training(nfiltbank, orderLPC): trainingSet = 4 nCentroid = 16 cbMfcc = np.empty((trainingSet,nfiltbank,nCentroid)) cbLpc = np.empty((trainingSet, orderLPC, nCentroid)) directory = os.getcwd() + '/train'; fname = str() for i in range(trainingSet): fname = '/s' + str(i+1) + '.wav' print 'Voice ', str(i+1), 'is trained' (fs,s) = read(directory + fname) MFCC = MFCC_Coeff(s, fs, nfiltbank) lpc_coeff = lpc(s, fs, orderLPC) cbMfcc[i,:,:] = lbg(MFCC, nCentroid) cbLpc[i,:,:] = lbg(lpc_coeff, nCentroid) plt.figure(i) plt.title('Codebook for speaker ' + str(i+1) + ' with ' + str(nCentroid) + ' centroids') for j in range(nCentroid): plt.subplot(211) plt.stem(cbMfcc[i,:,j]) plt.ylabel('MFCC') plt.subplot(212) markerline, stemlines, baseline = plt.stem(cbLpc[i,:,j]) plt.setp(markerline,'markerfacecolor','r') plt.setp(baseline,'color', 'k') plt.ylabel('LPC') plt.axis(ymin = -1, ymax = 1) plt.xlabel('Number of features') plt.show() print 'Training has been performed ' codebooks = np.empty((2, nfiltbank, nCentroid)) MFCC = np.empty((2, nfiltbank, 68)) for i in range(2): fname = '/s' + str(i+2) + '.wav' (fs,s) = read(directory + fname) MFCC[i,:,:] = MFCC_Coeff(s, fs, nfiltbank)[:,0:68] codebooks[i,:,:] = lbg(MFCC[i,:,:], nCentroid) plt.figure(trainingSet + 1) s1 = plt.scatter(MFCC[0,6,:], MFCC[0,4,:],s = 100, color = 'r', marker = 'o') c1 = plt.scatter(codebooks[0,6,:], codebooks[0,4,:], s = 100, color = 'r', marker = '+') s2 = plt.scatter(MFCC[1,6,:], MFCC[1,4,:],s = 100, color = 'b', marker = 'o') c2 = plt.scatter(codebooks[1,6,:], codebooks[1,4,:], s = 100, color = 'b', marker = '+') plt.grid() plt.legend((s1, s2, c1, c2), ('Sp1','Sp2','Sp1 centroids', 'Sp2 centroids'), scatterpoints = 1, loc = 'upper left') plt.show() return (cbMfcc, cbLpc)
def training(filtbankN): Centroids = 16 SpeakerN = 12 file = str() codebooks_org = np.empty( (SpeakerN, filtbankN, Centroids)) #np.empty((층, 행, 열), dtype) #12개의 13행 16열 배열 생성 for i in range(SpeakerN): file = 'Speaker' + str(i + 1) + '.wav' #file 읽어오기 (fs, sig) = read(file) # fs = sample rate, sig = array data melcoeffs = MFCC(sig, fs, 0.025, 0.01, 13, 40) # --------TESTING #mel feature을 생성하여 배열로 저장 #winlen=0.025,winstep=0.01,numcep=13,nfilt=26,nfft=512 # DCT 연산 수행 # Filter Bank는 모두 Overlapping 되어 있기 때문에 Filter Bank 에너지들 사이에 상관관계가 존재하기 때문이다. DCT는 에너지들 사이에 이러한 상관관계를 분리 해주는 역할 #26개 DCT Coefficient 들 중 12만 남겨야 하는데, 그 이유는 DCT Coefficient 가 많으면, Filter Bank 에너지의 빠른 변화를 나타내게 되고, 이것은 음성인식의 성능을 낮추게 되기 때문이다. print("---after mfcc---") print(np.shape(melcoeffs)) print() melcoeffs = np.transpose(melcoeffs) # --------TESTING #계산 편의를 위한 전치 행렬 codebooks_org[i, :, :] = lbg(melcoeffs, Centroids) #codebooks배열에 저장 print('\nTraining of model is complete!') return (codebooks_org)
def training(nfiltbank): nSpeaker = 4 nCentroid = 4 codebooks_mfcc = np.empty((nSpeaker, nfiltbank, nCentroid)) directory = os.getcwd() + '/train' fname = str() for i in range(nSpeaker): fname = '/s' + str(i + 1) + '.wav' print('Now speaker ', str(i + 1), 'features are being trained') (fs, s) = read(directory + fname) #REMOVE COMMENTS TO PLOT THE INPUT SPEAKER WAVE #audio = (fs,s)[1] # plot the first 1024 samples #plt.plot(audio[0:1024]) # label the axes # plt.ylabel("Amplitude") # plt.xlabel("Time") # set the title #plt.title("Sample Wav") # display the plot #plt.show() mel_coeff = mfcc(s, fs, nfiltbank) codebooks_mfcc[i, :, :] = lbg(mel_coeff, nCentroid) plt.figure(i) for j in range(nCentroid): plt.subplot(211) plt.stem(codebooks_mfcc[i, :, j]) plt.xlabel('Number of features') plt.ylabel('MFCC') plt.title('Codebook for speaker ' + str(i + 1)) plt.show() print('Training completed') return (codebooks_mfcc)
codebooks_mfcc = np.empty((nSpeaker, nfiltbank, nCentroid)) # Wskazanie katalogu gdzie przechowywane są pliki do trenowania. directory = os.getcwd() + "/train" # Pętla wczytania pliku, trenowania i pokazywania wykresu. for i in range(nSpeaker): fname = "/s" + str(i + 1) + ".wav" print("Trenowany mówca nr: ", str(i + 1)) # Wczytanie pliku. (fs, s) = read(directory + fname) # Zastosowanie metody trenującej. mel_coeff = mfcc(s, fs, nfiltbank) codebooks_mfcc[i, :, :] = lbg(mel_coeff, nCentroid) ####Joanna changed # Tworzenei wykresu. plt.figure(i + 1) plt.title("Codebook dla mówcy " + str(i + 1) + " z " + str(nCentroid) + " centroidami") plt.xlabel("Liczba cech") ax1 = plt # dla kilku subplot(2, 1, 1) plt.ylabel("MFCC") for j in range(nCentroid): ax1.stem( codebooks_mfcc[i, :, j], use_line_collection=True, linefmt="C2-.", markerfmt="C8o",
def training(filtbankN): Centroids = 16 SpeakerN = 10 dir_train = 'D:/_python/SpeakerRecognition/3/Data/train' file = str() codebooks_org = np.empty((SpeakerN, filtbankN, Centroids)) for i in range(SpeakerN): file = '/Speaker' + str(i + 1) + '.wav' (fs, sig) = read(dir_train + file) print('Training the features of speaker ', str(i + 1)) # melcoeffs = MFCC(fs, sig, filtbankN)#--------TESTING melcoeffs = MFCC(sig, fs) #--------TESTING melcoeffs = np.transpose(melcoeffs) #--------TESTING codebooks_org[i, :, :] = lbg(melcoeffs, Centroids) plt.figure(i) plt.title('Speaker ' + str(i + 1) + ' codeword with ' + str(Centroids) + ' centroids') for j in range(Centroids): plt.plot(211) plt.stem(codebooks_org[i, :, j]) plt.show() print('Training of model is complete!') #ploting actual cook books for first two speakers codebooks = np.empty((2, filtbankN, Centroids)) melcoeffs = np.empty((2, filtbankN, 68)) for i in range(2): file = '/Speaker' + str(i + 1) + '.wav' (fs, sig) = read(dir_train + file) # melcoeffs[i,:,:] = MFCC(fs, sig, filtbankN)[:,0:68]#--------TESTING # codebooks[i,:,:] = lbg(melcoeffs[i,:,:], Centroids)#--------TESTING temp1 = MFCC(sig, fs) #--------TESTING temp1 = np.transpose(temp1) #--------TESTING melcoeffs[i, :, :] = temp1[:, 0:68] #--------TESTING codebooks[i, :, :] = lbg(melcoeffs[i, :, :], Centroids) #--------TESTING plt.figure(SpeakerN + 1) c1 = plt.scatter(codebooks[0, 4, :], codebooks[1, 5, :], s=100, color='g', marker='+') s1 = plt.scatter(melcoeffs[0, 4, :], melcoeffs[0, 5, :], s=100, color='g', marker='o') c2 = plt.scatter(codebooks[1, 4, :], codebooks[1, 5, :], s=100, color='b', marker='+') s2 = plt.scatter(melcoeffs[1, 4, :], melcoeffs[1, 5, :], s=100, color='b', marker='o') plt.grid() plt.legend((s1, s2, c1, c2), ('Speak1', 'Speak2', 'Speak1 centroids', 'Speak2 centroids'), scatterpoints=1, loc='lower right') plt.show() return (codebooks_org)