def training(nfiltbank, orderLPC): #Get directory and list of *.wav files directory = os.getcwd() + '/train_all_speakers' wave_files = [f for f in os.listdir(directory)] nSpeaker = len(wave_files) nCentroid = 32 # original is 16 codebooks_mfcc = np.empty((nSpeaker, nfiltbank, nCentroid)) codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid)) for i, wave_file in enumerate(wave_files): fname = '/' + wave_file print 'Speaker [' + str( i) + '] File:' + wave_file + ' Training features...' (fs, s) = read(directory + fname) mel_coeff = mfcc_p(s, fs) mel_coeff = mel_coeff.transpose() mel_coeff[0, :] = np.zeros(mel_coeff.shape[1]) lpc_coeff = lpc(s, fs, orderLPC) codebooks_mfcc[i, :, :] = lbg(mel_coeff, nCentroid) codebooks_lpc[i, :, :] = lbg(lpc_coeff, nCentroid) print('Training finished\n') return (codebooks_mfcc, codebooks_lpc)
def training(nfiltbank, orderLPC): nSpeaker = 2 nCentroid = 16 codebooks_mfcc = np.empty((nSpeaker, nfiltbank, nCentroid)) codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid)) directory = os.getcwd() + '/train' fname = str() for i in range(nSpeaker): fname = '/speaker' + str(i + 1) + '.wav' print('Now speaker ', str(i + 1), 'features are being trained') (fs, s) = read(directory + fname) # read the wav file specified as first command line arg s = wav_to_floats(directory + fname) s = s[:48000] sd.play(s, fs) mel_coeff = mfcc(s, fs, nfiltbank) lpc_coeff = lpc(s, fs, orderLPC) codebooks_mfcc[i, :, :] = lbg(mel_coeff, nCentroid) codebooks_lpc[i, :, :] = lbg(lpc_coeff, nCentroid) codebooks = np.empty((2, nfiltbank, nCentroid)) mel_coeff = np.empty((2, nfiltbank, 68)) for i in range(2): fname = '/speaker' + str(i + 1) + '.wav' (fs, s) = read(directory + fname) s = s[:48000] mel_coeff[i, :, :] = mfcc(s, fs, nfiltbank)[:, 0:68] codebooks[i, :, :] = lbg(mel_coeff[i, :, :], nCentroid) plt.figure(nSpeaker + 1) s1 = plt.scatter(mel_coeff[0, 6, :], mel_coeff[0, 4, :], s=100, color='r', marker='o') c1 = plt.scatter(codebooks[0, 6, :], codebooks[0, 4, :], s=100, color='r', marker='+') s2 = plt.scatter(mel_coeff[1, 6, :], mel_coeff[1, 4, :], s=100, color='b', marker='o') c2 = plt.scatter(codebooks[1, 6, :], codebooks[1, 4, :], s=100, color='b', marker='+') plt.grid() plt.legend((s1, s2, c1, c2), ('Child', 'Parent', 'Child centroids', 'Parent centroids'), scatterpoints=1, loc='upper left') plt.show() return (codebooks_mfcc, codebooks_lpc)
def training(nfiltbank, orderLPC): """Train the speaker recognition model using .wav files from the Speaker folder. Feature extraction is done in two ways: MFCC and LPC Parameters ---------- nfiltbank: number of filter banks for mfcc (default 13) orderLPC: number or order for LPC (default 15) Return ------ Returns a pair (codebooks_mfcc, codebooks_lpc) which are the encoded clusters for trained speakers""" # Get directory and list of speaker *.wav files original_dir = os.getcwd() directory = os.getcwd() + '/static/speakers' os.chdir(directory) wave_files = filter(os.path.isfile, os.listdir(directory + "/")) wave_files = [os.path.join(directory, f) for f in wave_files] # add path to each file wave_files.sort(key=lambda x: os.path.getmtime(x)) os.chdir(original_dir) def cutDirectoryFromName(file): if os.name == 'nt': for i, c in enumerate(file): if file[-(i + 1)] == "\\": return file[-(i):] else: for i, c in enumerate(file): if file[-(i + 1)] == "/": return file[-(i):] wave_files = list(map(lambda x: cutDirectoryFromName(x), wave_files)) nSpeaker = len(wave_files) nCentroid = 32 # original is 16 codebooks_mfcc = np.empty((nSpeaker, nfiltbank, nCentroid)) codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid)) for i, wave_file in enumerate(wave_files): fname = '/' + wave_file (fs, s) = read(directory + fname) mel_coeff = mfcc(s, fs) mel_coeff = mel_coeff.transpose() mel_coeff[0, :] = np.zeros(mel_coeff.shape[1]) lpc_coeff = lpc(s, fs, orderLPC) codebooks_mfcc[i, :, :] = lbg(mel_coeff, nCentroid) codebooks_lpc[i, :, :] = lbg(lpc_coeff, nCentroid) print('Training finished\n') return (codebooks_mfcc, codebooks_lpc)
class test(): def minDistance(features, codebooks): speaker = 0 distmin = np.inf for k in range(np.shape(codebooks)[0]): D = EUDistance(features, codebooks[k, :, :]) dist = np.sum(np.min(D, axis=1)) / (np.shape(D)[0]) if dist < distmin: distmin = dist speaker = k return speaker for i in range(nSpeaker): fname = '/s' + str(i + 1) + '.wav' print 'Now speaker ', str(i + 1), 'features are being tested' (fs, s) = read(directory + fname) mel_coefs = mfcc(s, fs, nfiltbank) lpc_coefs = lpc(s, fs, orderLPC) sp_mfcc = minDistance(mel_coefs, codebooks_mfcc) sp_lpc = minDistance(lpc_coefs, codebooks_lpc) if ((sp_mfcc + 1) <= 5): b = 'vesa' elif ((sp_mfcc + 1) > 5) and ((sp_mfcc + 1) <= 10): b = 'belva' elif ((sp_mfcc + 1) > 10) and ((sp_mfcc + 1) <= 15): b = 'ary' print 'Speaker ', (i + 1), ' in test matches with speaker ', b, ( sp_mfcc + 1), ' in train for training with MFCC' print 'Speaker ', ( i + 1 ), ' in test matches with speaker ', b, ' in train for training with LPC'
def training(nfiltbank, orderLPC): nSpeaker = 8 nCentroid = 16 codebooks_mfcc = np.empty((nSpeaker,nfiltbank,nCentroid)) codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid)) directory = os.getcwd() + '/train'; fname = str() for i in range(nSpeaker): fname = '/s' + str(i+1) + '.wav' print('Now speaker ', str(i+1), 'features are being trained' ) (fs,s) = read(directory + fname) mel_coeff = mfcc(s, fs, nfiltbank) lpc_coeff = lpc(s, fs, orderLPC) codebooks_mfcc[i,:,:] = lbg(mel_coeff, nCentroid) codebooks_lpc[i,:,:] = lbg(lpc_coeff, nCentroid) plt.figure(i) plt.title('Codebook for speaker ' + str(i+1) + ' with ' + str(nCentroid) + ' centroids') for j in range(nCentroid): plt.subplot(211) plt.stem(codebooks_mfcc[i,:,j]) plt.ylabel('MFCC') plt.subplot(212) markerline, stemlines, baseline = plt.stem(codebooks_lpc[i,:,j]) plt.setp(markerline,'markerfacecolor','r') plt.setp(baseline,'color', 'k') plt.ylabel('LPC') plt.axis(ymin = -1, ymax = 1) plt.xlabel('Number of features') plt.show() print('Training complete') #plotting 5th and 6th dimension MFCC features on a 2D plane #comment lines 54 to 71 if you don't want to see codebook codebooks = np.empty((2, nfiltbank, nCentroid)) mel_coeff = np.empty((2, nfiltbank, 68)) for i in range(2): fname = '/s' + str(i+2) + '.wav' (fs,s) = read(directory + fname) mel_coeff[i,:,:] = mfcc(s, fs, nfiltbank)[:,0:68] codebooks[i,:,:] = lbg(mel_coeff[i,:,:], nCentroid) plt.figure(nSpeaker + 1) s1 = plt.scatter(mel_coeff[0,6,:], mel_coeff[0,4,:],s = 100, color = 'r', marker = 'o') c1 = plt.scatter(codebooks[0,6,:], codebooks[0,4,:], s = 100, color = 'r', marker = '+') s2 = plt.scatter(mel_coeff[1,6,:], mel_coeff[1,4,:],s = 100, color = 'b', marker = 'o') c2 = plt.scatter(codebooks[1,6,:], codebooks[1,4,:], s = 100, color = 'b', marker = '+') plt.grid() plt.legend((s1, s2, c1, c2), ('Sp1','Sp2','Sp1 centroids', 'Sp2 centroids'), scatterpoints = 1, loc = 'upper left') plt.show() return (codebooks_mfcc, codebooks_lpc)
def training(nfiltbank, orderLPC): nSpeaker = 8 nCentroid = 16 codebooks_mfcc = np.empty((nSpeaker,nfiltbank,nCentroid)) codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid)) directory = os.getcwd() + '/train'; fname = str() print('Speakers features are being trained.Please wait....') for i in range(nSpeaker): fname = '/s' + str(i+1) + '.wav' #print('Now speaker ', str(i+1), 'features are being trained' ) (fs,s) = read(directory + fname) mel_coeff = mfcc(s, fs, nfiltbank) lpc_coeff = lpc(s, fs, orderLPC) codebooks_mfcc[i,:,:] = lbg(mel_coeff, nCentroid) codebooks_lpc[i,:,:] = lbg(lpc_coeff, nCentroid) plt.figure(i) plt.title('Codebook for speaker ' + str(i+1) + ' with ' + str(nCentroid) + ' centroids') for j in range(nCentroid): plt.subplot(211) plt.stem(codebooks_mfcc[i,:,j]) plt.ylabel('MFCC') plt.subplot(212) markerline, stemlines, baseline = plt.stem(codebooks_lpc[i,:,j]) plt.setp(markerline,'markerfacecolor','r') plt.setp(baseline,'color', 'k') plt.ylabel('LPC') plt.axis(ymin = -1, ymax = 1) plt.xlabel('Number of features') plt.show() print('Training complete') #plotting 5th and 6th dimension MFCC features on a 2D plane #comment lines 54 to 71 if you don't want to see codebook codebooks = np.empty((2, nfiltbank, nCentroid)) mel_coeff = np.empty((2, nfiltbank, 68)) for i in range(2): fname = '/s' + str(i+2) + '.wav' (fs,s) = read(directory + fname) mel_coeff[i,:,:] = mfcc(s, fs, nfiltbank)[:,0:68] codebooks[i,:,:] = lbg(mel_coeff[i,:,:], nCentroid) plt.figure(nSpeaker + 1) s1 = plt.scatter(mel_coeff[0,6,:], mel_coeff[0,4,:],s = 100, color = 'r', marker = 'o') c1 = plt.scatter(codebooks[0,6,:], codebooks[0,4,:], s = 100, color = 'r', marker = '+') s2 = plt.scatter(mel_coeff[1,6,:], mel_coeff[1,4,:],s = 100, color = 'b', marker = 'o') c2 = plt.scatter(codebooks[1,6,:], codebooks[1,4,:], s = 100, color = 'b', marker = '+') plt.grid() plt.legend((s1, s2, c1, c2), ('Sp1','Sp2','Sp1 centroids', 'Sp2 centroids'), scatterpoints = 1, loc = 'upper left') plt.show() return (codebooks_mfcc, codebooks_lpc)
def recognize_speaker(codebooks_mfcc, codebooks_lpc, wav): """ Predicts speaker for .wav file using trained clusters Parameters --------- codebooks_mfcc: trained clusters for MFCC codebooks_lpc: trained clusters for LPC Returns ------- A pair of strings for the speaker prediction of both methods MFCC and LPC """ def minDistance(features, codebooks): speaker = 0 distmin = np.inf for k in range(np.shape(codebooks)[0]): D = EUDistance(features, codebooks[k, :, :]) dist = np.sum(np.min(D, axis=1)) / (np.shape(D)[0]) # print "DISTANCE!!!" + str(dist) if dist < distmin: distmin = dist speaker = k return speaker #Read .wave file (fs, s) = read(wav) # Passing test file to MFCC mel_coefs = mfcc(s, fs) mel_coefs = mel_coefs.transpose() mel_coefs[0, :] = np.zeros( mel_coefs.shape[1] ) # 0th coefficient does not carry significant information # Passing test file to LPC lpc_coefs = lpc(s, fs, 15) sp_mfcc = minDistance(mel_coefs, codebooks_mfcc) sp_lpc = minDistance(lpc_coefs, codebooks_lpc) #Find the speaker stored in the textfile with open("Speaker_Names.txt") as f: content = f.readlines() # you may also want to remove whitespace characters like `\n` at the end of each line speaker_names = [x.strip() for x in content] return ("You are " + speaker_names[sp_mfcc] + '\n', "You are " + speaker_names[sp_lpc] + '\n')
def test1234(): list_of_files = glob.glob('D:/FTP/speaker/*.wav') latest_file = max(list_of_files, key=os.path.getmtime) #print latest_file stamp = latest_file #print (latest_file) print(stamp) (fs, s) = read(stamp) mel_coefs = mfcc(s, fs, nfiltbank) lpc_coefs = lpc(s, fs, orderLPC) sp_mfcc = minDistance(mel_coefs, codebooks_mfcc) sp_lpc = minDistance(lpc_coefs, codebooks_lpc) b = namaorang(sp_mfcc) print('Speaker ', (stamp), ' in test matches with speaker ', b, (sp_mfcc + 1), ' in train for training with MFCC')
for k in range(np.shape(codebooks)[0]): D = EUDistance(features, codebooks[k, :, :]) dist = np.sum(np.min(D, axis=1)) / (np.shape(D)[0]) if dist < distmin: distmin = dist speaker = k return speaker for i in range(nSpeaker): fname = '/s' + str(i + 1) + '.wav' print('Now speaker ', str(i + 1), 'features are being tested') (fs, s) = read(directory + fname) mel_coefs = mfcc(s, fs, nfiltbank) lpc_coefs = lpc(s, fs, orderLPC) sp_mfcc = minDistance(mel_coefs, codebooks_mfcc) sp_lpc = minDistance(lpc_coefs, codebooks_lpc) print('Speaker ', (i + 1), ' in test matches with speaker ', (sp_mfcc + 1), ' in train for training with MFCC') print('Speaker ', (i + 1), ' in test matches with speaker ', (sp_lpc + 1), ' in train for training with LPC') if i == sp_mfcc: nCorrect_MFCC += 1 if i == sp_lpc: nCorrect_LPC += 1 percentageCorrect_MFCC = (nCorrect_MFCC / nSpeaker) * 100 print('Accuracy of result for training with MFCC is ', percentageCorrect_MFCC,