def get_ceps_true_speaker(speaker_samples_ceps='combined'): nfft = 512 t_frame = 20 * 10**(-3) # Duration in seconds of each frame if speaker_samples_ceps == 'combined': [fs, TS_audiofiles] = get_np_audiofiles(UBM=False, TS=True, normalize=True) nwin = t_frame * fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320 ceps, mspec, spec = mfcc(TS_audiofiles, nwin, nfft, fs, nceps) return ceps else: files_in_folder = os.listdir( os.getcwd()) # List of files in current directory true_speaker_files = [ x for x in files_in_folder if ('.wav' in x) and ('true_speaker' in x) ] [fs, audio_01] = read_audiofile(true_speaker_files[0], normalize=True) nwin = t_frame * fs ceps_01, mspec, spec = mfcc(audio_01, nwin, nfft, fs, nceps) [fs, audio_02] = read_audiofile(true_speaker_files[1], normalize=True) nwin = t_frame * fs ceps_02, mspec, spec = mfcc(audio_02, nwin, nfft, fs, nceps) [fs, audio_03] = read_audiofile(true_speaker_files[2], normalize=True) nwin = t_frame * fs ceps_03, mspec, spec = mfcc(audio_03, nwin, nfft, fs, nceps) return ceps_01, ceps_02, ceps_03
def get_ceps_UBM(str_gender='all', exclude_speaker=None): # ------------------- # Getting audio files in Numpy array format # ------------------- [fs, np_audio_male, np_audio_female, np_audio_all] = get_np_audiofiles(UBM=True, exclude_speaker=None) # ---------------------------------------------------- # Computing MFCC # ---------------------------------------------------- t_frame = 20*10**(-3) # Duration in seconds of each frame nwin = t_frame*fs # nwin is the number of samples per frame. # Para t_frame=20ms e fs=16kHz, nwin=320 nfft = 512 if str_gender == 'male': ceps, mspec, spec = mfcc(np_audio_male, nwin, nfft, fs, nceps) if str_gender == 'female': ceps, mspec, spec = mfcc(np_audio_female, nwin, nfft, fs, nceps) else: ceps, mspec, spec = mfcc(np_audio_all, nwin, nfft, fs, nceps) return ceps
def get_ceps_UBM(str_gender='all', exclude_speaker=None): # ------------------- # Getting audio files in Numpy array format # ------------------- [fs, np_audio_male, np_audio_female, np_audio_all] = get_np_audiofiles(UBM=True, exclude_speaker=None) # ---------------------------------------------------- # Computing MFCC # ---------------------------------------------------- t_frame = 20 * 10**(-3) # Duration in seconds of each frame nwin = t_frame * fs # nwin is the number of samples per frame. # Para t_frame=20ms e fs=16kHz, nwin=320 nfft = 512 if str_gender == 'male': ceps, mspec, spec = mfcc(np_audio_male, nwin, nfft, fs, nceps) if str_gender == 'female': ceps, mspec, spec = mfcc(np_audio_female, nwin, nfft, fs, nceps) else: ceps, mspec, spec = mfcc(np_audio_all, nwin, nfft, fs, nceps) return ceps
def get_ceps_true_speaker(speaker_samples_ceps='combined'): nfft = 512 t_frame = 20*10**(-3) # Duration in seconds of each frame if speaker_samples_ceps=='combined': [fs, TS_audiofiles] = get_np_audiofiles(UBM=False, TS=True, normalize=True) nwin = t_frame*fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320 ceps, mspec, spec = mfcc(TS_audiofiles, nwin, nfft, fs, nceps) return ceps else: files_in_folder = os.listdir(os.getcwd()) # List of files in current directory true_speaker_files = [x for x in files_in_folder if ('.wav' in x) and ('true_speaker' in x)] [fs,audio_01] = read_audiofile(true_speaker_files[0],normalize=True) nwin = t_frame*fs ceps_01, mspec, spec = mfcc(audio_01, nwin, nfft, fs, nceps) [fs,audio_02] = read_audiofile(true_speaker_files[1],normalize=True) nwin = t_frame*fs ceps_02, mspec, spec = mfcc(audio_02, nwin, nfft, fs, nceps) [fs,audio_03] = read_audiofile(true_speaker_files[2],normalize=True) nwin = t_frame*fs ceps_03, mspec, spec = mfcc(audio_03, nwin, nfft, fs, nceps) return ceps_01, ceps_02, ceps_03
def audio2ceps(filename, flag_normalize=True): ''' Reads an audio file and creates a text file with its MFCC. ''' file = filename.split('.')[0] # Separates file name and extension # -------------------------------------------- # File reading into numpy array [fs, np_audio] = read_audiofile(filename, normalize=flag_normalize) # -------------------------------------------- # MFCC calculation t_frame = 20 * 10**(-3) # Duration in seconds of each frame nwin = t_frame * fs # nwin is the number of samples per frame. # Para t_frame=20ms e fs=16kHz, nwin=320 nfft = 512 ceps, mspec, spec = mfcc(np_audio, nwin, nfft, fs, nceps) [nframes, ncolumns] = ceps.shape # -------------------------------------------- # Text file creation np.savetxt(file + '_ceps.txt', ceps, header='MFCC from file ' + filename + '.\nInfo:\n\tSample rate: ' + str(RATE) + '\n\tNumber of MFCC per frame: ' + str(nceps) + '\n\tNumber of frames (samples): ' + str(nframes) + '\n\n')
def audio2ceps(filename,flag_normalize=True): ''' Reads an audio file and creates a text file with its MFCC. ''' file = filename.split('.')[0] # Separates file name and extension # -------------------------------------------- # File reading into numpy array [fs, np_audio] = read_audiofile(filename,normalize = flag_normalize) # -------------------------------------------- # MFCC calculation t_frame = 20*10**(-3) # Duration in seconds of each frame nwin = t_frame*fs # nwin is the number of samples per frame. # Para t_frame=20ms e fs=16kHz, nwin=320 nfft = 512 ceps, mspec, spec = mfcc(np_audio, nwin, nfft, fs, nceps) [nframes, ncolumns] = ceps.shape # -------------------------------------------- # Text file creation np.savetxt(file + '_ceps.txt',ceps,header='MFCC from file ' + filename + '.\nInfo:\n\tSample rate: ' + str(RATE) + '\n\tNumber of MFCC per frame: ' + str(nceps) + '\n\tNumber of frames (samples): ' + str(nframes) + '\n\n')
def audio2ceps(filename,flag_normalize=True): ''' Reads an audio file and creates a text file with its MFCC. Problem to fix: in the Raspberry, the function np.savetxt somehow doesn't accept the keyword 'header'. ''' file = filename.split('.')[0] # Separates file name and extension # -------------------------------------------- # File reading into numpy array [fs, np_audio] = read_audiofile(filename,normalize = flag_normalize) # -------------------------------------------- # MFCC calculation t_frame = 20*10**(-3) # Duration in seconds of each frame nwin = t_frame*fs # nwin is the number of samples per frame. # Para t_frame=20ms e fs=16kHz, nwin=320 nfft = 512 ceps, mspec, spec = mfcc(np_audio, nwin, nfft, fs, nceps) [nframes, ncolumns] = ceps.shape # -------------------------------------------- # Text file creation str_header = 'MFCC from file ' + filename + '.\nInfo:\n\tSample rate: ' + str(RATE) + '\n\tNumber of MFCC per frame: ' + str(nceps) + '\n\tNumber of frames (samples): ' + str(nframes) + '\n\n' if sys.platform=='linux2': # in Raspbian np.savetxt(file + '_ceps.txt',ceps) else: np.savetxt(file + '_ceps.txt',ceps,header=str_header)
def audio2ceps(filename, flag_normalize=True): ''' Reads an audio file and creates a text file with its MFCC. Problem to fix: in the Raspberry, the function np.savetxt somehow doesn't accept the keyword 'header'. ''' file = filename.split('.')[0] # Separates file name and extension # -------------------------------------------- # File reading into numpy array [fs, np_audio] = read_audiofile(filename, normalize=flag_normalize) # -------------------------------------------- # MFCC calculation t_frame = 20 * 10**(-3) # Duration in seconds of each frame nwin = t_frame * fs # nwin is the number of samples per frame. # Para t_frame=20ms e fs=16kHz, nwin=320 nfft = 512 ceps, mspec, spec = mfcc(np_audio, nwin, nfft, fs, nceps) [nframes, ncolumns] = ceps.shape # -------------------------------------------- # Text file creation str_header = 'MFCC from file ' + filename + '.\nInfo:\n\tSample rate: ' + str( RATE) + '\n\tNumber of MFCC per frame: ' + str( nceps) + '\n\tNumber of frames (samples): ' + str(nframes) + '\n\n' if sys.platform == 'linux2': # in Raspbian np.savetxt(file + '_ceps.txt', ceps) else: np.savetxt(file + '_ceps.txt', ceps, header=str_header)
def get_ceps_threshold_files(): files_in_folder = os.listdir(os.getcwd()) # List of files in current directory threshold_files = [x for x in files_in_folder if ('threshold_audio.wav' in x)] t_frame = 20*10**(-3) # Duration in seconds of each frame nfft = 512 [fs,np_threshold] = read_audiofile(threshold_files[0],normalize=True) nwin = t_frame*fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320 ceps_01, mspec, spec = mfcc(np_threshold, nwin, nfft, fs, nceps) [fs,np_threshold] = read_audiofile(threshold_files[1],normalize=True) nwin = t_frame*fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320 ceps_02, mspec, spec = mfcc(np_threshold, nwin, nfft, fs, nceps) [fs,np_threshold] = read_audiofile(threshold_files[2],normalize=True) nwin = t_frame*fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320 ceps_03, mspec, spec = mfcc(np_threshold, nwin, nfft, fs, nceps) return ceps_01, ceps_02, ceps_03
def get_ceps_test_speaker(): files_in_folder = os.listdir(os.getcwd()) # List of files in current directory test_file = [x for x in files_in_folder if ('test_speaker.wav' in x)] [fs,np_test] = read_audiofile(test_file[0],normalize=True) t_frame = 20*10**(-3) # Duration in seconds of each frame nwin = t_frame*fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320 nfft = 512 ceps, mspec, spec = mfcc(np_test, nwin, nfft, fs, nceps) return ceps
def get_ceps_threshold_files(): files_in_folder = os.listdir( os.getcwd()) # List of files in current directory threshold_files = [ x for x in files_in_folder if ('threshold_audio.wav' in x) ] t_frame = 20 * 10**(-3) # Duration in seconds of each frame nfft = 512 [fs, np_threshold] = read_audiofile(threshold_files[0], normalize=True) nwin = t_frame * fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320 ceps_01, mspec, spec = mfcc(np_threshold, nwin, nfft, fs, nceps) [fs, np_threshold] = read_audiofile(threshold_files[1], normalize=True) nwin = t_frame * fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320 ceps_02, mspec, spec = mfcc(np_threshold, nwin, nfft, fs, nceps) [fs, np_threshold] = read_audiofile(threshold_files[2], normalize=True) nwin = t_frame * fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320 ceps_03, mspec, spec = mfcc(np_threshold, nwin, nfft, fs, nceps) return ceps_01, ceps_02, ceps_03
def get_ceps_test_speaker(): files_in_folder = os.listdir( os.getcwd()) # List of files in current directory test_file = [x for x in files_in_folder if ('test_speaker.wav' in x)] [fs, np_test] = read_audiofile(test_file[0], normalize=True) t_frame = 20 * 10**(-3) # Duration in seconds of each frame nwin = t_frame * fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320 nfft = 512 ceps, mspec, spec = mfcc(np_test, nwin, nfft, fs, nceps) return ceps
def evaluate_all_vs_single_true(flag_speaker_samples='combined'): # ---------------------------------------- # Model of True Speaker if flag_speaker_samples=='combined': model_true = get_GMM_true_speaker(speaker_samples=flag_speaker_samples) else: # Neste caso flag_speaker_samples=='separated' [model_true_01, model_true_02, model_true_03] = get_GMM_true_speaker(speaker_samples=flag_speaker_samples) # ---------------------------------------- # Setting up text file files_in_folder = os.listdir(os.getcwd()) # List of files in current directory text_output_files = [x for x in files_in_folder if ('Verification_Test_04_true' in x) and (speaker_samples in x)] number = len(text_output_files) + 1 f = open('Verification_Test_04_true_' + speaker_samples + '_0' + str(number) + '.txt','w') f.write('# ' + text_dependency + ' ASV\n# Header: a single ASV test is carried out for each test speaker, with the true speaker fixed, but having three elocutions.\n# GENDER\tINDEX\tAGE\tSCORE\tDECISION\n') n_accepted = 0 n_false_acceptance = 0 n_rejected = 0 n_false_rejection = 0 ''' ---------------------------------------------------- Training UBM for all UBM-speakers except the True one ---------------------------------------------------- ''' cov_type = 'full' UBM_all = get_UBM_all(cov_type='full') ''' ---------------------------------------------------- Getting threshold ---------------------------------------------------- ''' threshold = get_score_threshold(speaker_samples_threshold = speaker_samples) files_in_folder = os.listdir(os.getcwd()) # List of files in current directory audiofiles_male = [x for x in files_in_folder if ('.wav' in x) and ('M' in x) and ('UBM' not in x) and ('_test' not in x) and ('_true' not in x)] audiofiles_female = [x for x in files_in_folder if ('.wav' in x) and ('F' in x) and ('UBM' not in x) and ('_test' not in x) and ('_true' not in x)] audiofiles_S = [x for x in files_in_folder if ('.wav' in x) and ('S' in x) and ('UBM' not in x) and ('_test' not in x) and ('_true' not in x)] # ------------------------------------------------------------------------ # Running through the lists of utterances from non-True speakers for gender_file_list in [audiofiles_male, audiofiles_female]: for current_test_speaker in gender_file_list: print '\nTEST 04' print 'For file ' + current_test_speaker + ':' [gender,index,aux] = current_test_speaker.split('_') # "aux" is just a disposable variable [age,aux] = aux.split('.') ''' ---------------------------------------------------- Training test speaker ---------------------------------------------------- ''' [fs, audio_test] = read_audiofile(current_test_speaker,normalize=True) t_frame = 20*10**(-3) # Duration in seconds of each frame nwin = t_frame*fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320 nfft = 512 ceps_test, mspec, spec = mfcc(audio_test, nwin, nfft, fs, nceps) ''' ---------------------------------------------------- Scoring ---------------------------------------------------- ''' if speaker_samples=='combined': score_UBM = UBM_all.score(ceps_test) score_true = model_true.score(ceps_test) score = np.sum(score_true - score_UBM) print '\nScore: ', score else: # Neste caso speaker_samples=='separated' score_UBM = UBM_all.score(ceps_test) score01 = model_true_01.score(ceps_test) score02 = model_true_02.score(ceps_test) score03 = model_true_03.score(ceps_test) score_true = np.max(np.array([score01, score02, score03])) score = np.sum(score_true - score_UBM) print '\nScore: ', score print "Difference between sum(log) probabilites (True VS. UBM_all): ", score # ---------------------------------- # DECISION # ---------------------------------- if score >= threshold: decision = 'ACCEPT' n_accepted += 1 n_false_acceptance += 1 else: decision = 'Reject' n_rejected += 1 f.write(gender + '\t' + index + '\t' + age + '\t' + str(score) + '\t' + decision + '\n') # ------------------------------------------------------------------------ # Running through the utterances of true speaker (prefix = S) for current_test_speaker in audiofiles_S: print '\nTEST 04' print 'For file ' + current_test_speaker + ':' [gender,index,aux] = current_test_speaker.split('_') # "aux" is just a disposable variable [age,aux] = aux.split('.') ''' ---------------------------------------------------- Training test speaker ---------------------------------------------------- ''' [fs, audio_test] = read_audiofile(current_test_speaker,normalize=True) t_frame = 20*10**(-3) # Duration in seconds of each frame nwin = t_frame*fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320 nfft = 512 ceps_test, mspec, spec = mfcc(audio_test, nwin, nfft, fs, nceps) ''' ---------------------------------------------------- Scoring ---------------------------------------------------- ''' if speaker_samples=='combined': score_UBM = UBM_all.score(ceps_test) score_true = model_true.score(ceps_test) score = np.sum(score_true - score_UBM) print '\nScore: ', score else: # Neste caso speaker_samples=='separated' score_UBM = UBM_all.score(ceps_test) score01 = model_true_01.score(ceps_test) score02 = model_true_02.score(ceps_test) score03 = model_true_03.score(ceps_test) score_true = np.max(np.array([score01, score02, score03])) score = np.sum(score_true - score_UBM) print '\nScore: ', score print "Difference between sum(log) probabilites (True VS. UBM_all): ", score # ---------------------------------- # DECISION # ---------------------------------- if score >= threshold: decision = 'ACCEPT' n_accepted += 1 else: decision = 'Reject' n_rejected += 1 n_false_rejection += 1 f.write(gender + '\t' + index + '\t' + age + '\t' + str(score) + '\t' + decision + '\n') f.write('# Number of accepted: ' + str(n_accepted) + '\n# Number of rejected: ' + str(n_rejected) + '\n# Number of false-acceptance: ' + str(n_false_acceptance) + '\n# Number of false-rejection: ' + str(n_false_rejection) + '\n# Threshold: ' + str(threshold)) f.close()
def evaluate_all_vs_true(): """ # ====================================================================== # TESTING WITH ALL SPEAKERS AGAINST A SINGLE TRUE SPEAKER """ files_in_folder = os.listdir(os.getcwd()) # List of files in current directory audiofiles_male = [ x for x in files_in_folder if (".wav" in x) and ("M" in x) and ("UBM" not in x) and ("_true" not in x) and ("_test" not in x) ] audiofiles_female = [ x for x in files_in_folder if (".wav" in x) and ("F" in x) and ("UBM" not in x) and ("_true" not in x) and ("_test" not in x) ] print "len(audiofiles_male): ", len(audiofiles_male) print "len(audiofiles_female): ", len(audiofiles_female) true_speakers_list = ( audiofiles_male + audiofiles_female + ["01_true_speaker.wav", "02_true_speaker.wav", "03_true_speaker.wav"] ) # Concatenation of both lists for true_speaker in true_speakers_list: """ ---------------------------------------------------- Training UBM for all training speakers (male & female) except the True one ---------------------------------------------------- """ cov_type = "full" UBM_all = get_UBM_all(cov_type="full", exclude_speaker=true_speaker) """ ---------------------------------------------------- Setting up .txt files and lists ---------------------------------------------------- """ f = open("Verification_Test_02_" + true_speaker.split(".")[0] + ".txt", "w") f.write( "# Header: a single ASV test is carried out for each speaker S, with the speaker " + true_speaker.split(".")[0] + " being the True Speaker in all tests.\n# GENDER\tINDEX\tAGE\tSCORE\n" ) for gender_file_list in [audiofiles_male, audiofiles_female]: for current_test_speaker in gender_file_list: print "\nTEST 02 (all against one)" print "For file " + current_test_speaker + ":" [gender, index, aux] = current_test_speaker.split("_") # "aux" is just a disposable variable [age, aux] = aux.split(".") """ ---------------------------------------------------- Training true speaker ---------------------------------------------------- """ fs, audio_true = spwave.read(true_speaker) t_frame = 20 * 10 ** (-3) # Duration in seconds of each frame nwin = t_frame * fs # nwin is the number of samples per frame. # Para t_frame=20ms e fs=16kHz, nwin=320 nfft = 512 ceps_true, mspec, spec = mfcc(audio_true, nwin, nfft, fs, nceps) ngaussians = 10 cov_type = "full" gmm = GMM(n_components=ngaussians, covariance_type=cov_type) model_true = gmm.fit(ceps_true) print "model_true converged? ", model_true.converged_ """ ---------------------------------------------------- Training test speaker ---------------------------------------------------- """ fs, audio_test = spwave.read(current_test_speaker) nwin = t_frame * fs # nwin is the number of samples per frame. # Para t_frame=20ms e fs=16kHz, nwin=320 ceps_test, mspec, spec = mfcc(audio_test, nwin, nfft, fs, nceps) """ ---------------------------------------------------- Scoring ---------------------------------------------------- """ log_prob_true = model_true.score(ceps_test) log_prob_UBM = UBM_all.score(ceps_test) print np.sum(log_prob_true) print np.sum(log_prob_UBM) score = np.sum(log_prob_true) - np.sum(log_prob_UBM) print "Difference between sum(log) probabilites (True VS. UBM_all): ", score f.write(gender + "\t" + index + "\t" + age + "\t" + str(score) + "\n") f.close()
def evaluate_all_vs_single_true(speaker_samples="combined"): current_folder = os.getcwd() text_dependency = current_folder.split("/")[ -1 ] # Takes the name of the enclosing folder, if it is TD (text dependent) or TI (text independent) files_in_folder = os.listdir(os.getcwd()) # List of files in current directory text_output_files = [x for x in files_in_folder if ("Verification_Test_04_true" in x) and (speaker_samples in x)] number = len(text_output_files) + 1 f = open("Verification_Test_04_true_" + speaker_samples + "_0" + str(number) + ".txt", "w") f.write( "# " + text_dependency + " ASV\n# Header: a single ASV test is carried out for each test speaker, with the true speaker fixed, but having three elocutions.\n# GENDER\tINDEX\tAGE\tSCORE\tDECISION\n" ) n_accepted = 0 n_false_acceptance = 0 n_rejected = 0 n_false_rejection = 0 """ ---------------------------------------------------- Training UBM for all UBM-speakers except the True one ---------------------------------------------------- """ cov_type = "full" UBM_all = get_UBM_all(cov_type="full") """ ---------------------------------------------------- Getting threshold ---------------------------------------------------- """ threshold = get_score_threshold(speaker_samples_threshold=speaker_samples) files_in_folder = os.listdir(os.getcwd()) # List of files in current directory audiofiles_male = [ x for x in files_in_folder if (".wav" in x) and ("M" in x) and ("UBM" not in x) and ("_test" not in x) and ("_true" not in x) ] audiofiles_female = [ x for x in files_in_folder if (".wav" in x) and ("F" in x) and ("UBM" not in x) and ("_test" not in x) and ("_true" not in x) ] audiofiles_S = [ x for x in files_in_folder if (".wav" in x) and ("S" in x) and ("UBM" not in x) and ("_test" not in x) and ("_true" not in x) ] # Running through the lists of utterances from non-True speakers for gender_file_list in [audiofiles_male, audiofiles_female]: for current_test_speaker in gender_file_list: print "\nTEST 04" print "For file " + current_test_speaker + ":" [gender, index, aux] = current_test_speaker.split("_") # "aux" is just a disposable variable [age, aux] = aux.split(".") """ ---------------------------------------------------- Training test speaker ---------------------------------------------------- """ [fs, audio_test] = read_audiofile(current_test_speaker, normalize=True) t_frame = 20 * 10 ** (-3) # Duration in seconds of each frame nwin = t_frame * fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320 nfft = 512 ceps_test, mspec, spec = mfcc(audio_test, nwin, nfft, fs, nceps) """ ---------------------------------------------------- Scoring ---------------------------------------------------- """ if speaker_samples == "combined": model_true = get_GMM_true_speaker(speaker_samples="combined") score_UBM = UBM_all.score(ceps_test) score_true = model_true.score(ceps_test) score = np.sum(score_true - score_UBM) print "\nScore: ", score else: # Neste caso speaker_samples=='separated' [model_true_01, model_true_02, model_true_03] = get_GMM_true_speaker(speaker_samples="separated") score_UBM = UBM_all.score(ceps_test) score01 = model_true_01.score(ceps_test) score02 = model_true_02.score(ceps_test) score03 = model_true_03.score(ceps_test) score_true = np.max(np.array([score01, score02, score03])) score = np.sum(score_true - score_UBM) print "\nScore: ", score print "Difference between sum(log) probabilites (True VS. UBM_all): ", score # ---------------------------------- # DECISION # ---------------------------------- if score >= threshold: decision = "ACCEPT" n_accepted += 1 n_false_acceptance += 1 else: decision = "Reject" n_rejected += 1 f.write(gender + "\t" + index + "\t" + age + "\t" + str(score) + "\t" + decision + "\n") # Running through the utterances of true speaker (prefix = S) for current_test_speaker in audiofiles_S: print "\nTEST 04" print "For file " + current_test_speaker + ":" [gender, index, aux] = current_test_speaker.split("_") # "aux" is just a disposable variable [age, aux] = aux.split(".") """ ---------------------------------------------------- Training test speaker ---------------------------------------------------- """ [fs, audio_test] = read_audiofile(current_test_speaker, normalize=True) t_frame = 20 * 10 ** (-3) # Duration in seconds of each frame nwin = t_frame * fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320 nfft = 512 ceps_test, mspec, spec = mfcc(audio_test, nwin, nfft, fs, nceps) """ ---------------------------------------------------- Scoring ---------------------------------------------------- """ if speaker_samples == "combined": model_true = get_GMM_true_speaker(speaker_samples="combined") score_UBM = UBM_all.score(ceps_test) score_true = model_true.score(ceps_test) score = np.sum(score_true - score_UBM) print "\nScore: ", score else: # Neste caso speaker_samples=='separated' [model_true_01, model_true_02, model_true_03] = get_GMM_true_speaker(speaker_samples="separated") score_UBM = UBM_all.score(ceps_test) score01 = model_true_01.score(ceps_test) score02 = model_true_02.score(ceps_test) score03 = model_true_03.score(ceps_test) score_true = np.max(np.array([score01, score02, score03])) score = np.sum(score_true - score_UBM) print "\nScore: ", score print "Difference between sum(log) probabilites (True VS. UBM_all): ", score # ---------------------------------- # DECISION # ---------------------------------- if score >= threshold: decision = "ACCEPT" n_accepted += 1 else: decision = "Reject" n_rejected += 1 n_false_rejection += 1 f.write(gender + "\t" + index + "\t" + age + "\t" + str(score) + "\t" + decision + "\n") f.write( "# Number of accepted: " + str(n_accepted) + "\n# Number of rejected: " + str(n_rejected) + "\n# Number of false-acceptance: " + str(n_false_acceptance) + "\n# Number of false-rejection: " + str(n_false_rejection) + "\n# Threshold: " + str(threshold) ) f.close()
def evaluate_all_vs_true(): ''' # ====================================================================== # TESTING WITH ALL SPEAKERS AGAINST A SINGLE TRUE SPEAKER ''' files_in_folder = os.listdir( os.getcwd()) # List of files in current directory audiofiles_male = [ x for x in files_in_folder if ('.wav' in x) and ('M' in x) and ( 'UBM' not in x) and ('_true' not in x) and ('_test' not in x) ] audiofiles_female = [ x for x in files_in_folder if ('.wav' in x) and ('F' in x) and ( 'UBM' not in x) and ('_true' not in x) and ('_test' not in x) ] print 'len(audiofiles_male): ', len(audiofiles_male) print 'len(audiofiles_female): ', len(audiofiles_female) true_speakers_list = audiofiles_male + audiofiles_female + [ '01_true_speaker.wav', '02_true_speaker.wav', '03_true_speaker.wav' ] # Concatenation of both lists for true_speaker in true_speakers_list: ''' ---------------------------------------------------- Training UBM for all training speakers (male & female) except the True one ---------------------------------------------------- ''' cov_type = 'full' UBM_all = get_UBM_all(cov_type='full', exclude_speaker=true_speaker) ''' ---------------------------------------------------- Setting up .txt files and lists ---------------------------------------------------- ''' f = open('Verification_Test_02_' + true_speaker.split('.')[0] + '.txt', 'w') f.write( '# Header: a single ASV test is carried out for each speaker S, with the speaker ' + true_speaker.split('.')[0] + ' being the True Speaker in all tests.\n# GENDER\tINDEX\tAGE\tSCORE\n' ) for gender_file_list in [audiofiles_male, audiofiles_female]: for current_test_speaker in gender_file_list: print '\nTEST 02 (all against one)' print 'For file ' + current_test_speaker + ':' [gender, index, aux] = current_test_speaker.split( '_') # "aux" is just a disposable variable [age, aux] = aux.split('.') ''' ---------------------------------------------------- Training true speaker ---------------------------------------------------- ''' fs, audio_true = spwave.read(true_speaker) t_frame = 20 * 10**(-3) # Duration in seconds of each frame nwin = t_frame * fs # nwin is the number of samples per frame. # Para t_frame=20ms e fs=16kHz, nwin=320 nfft = 512 ceps_true, mspec, spec = mfcc(audio_true, nwin, nfft, fs, nceps) ngaussians = 10 cov_type = 'full' gmm = GMM(n_components=ngaussians, covariance_type=cov_type) model_true = gmm.fit(ceps_true) print "model_true converged? ", model_true.converged_ ''' ---------------------------------------------------- Training test speaker ---------------------------------------------------- ''' fs, audio_test = spwave.read(current_test_speaker) nwin = t_frame * fs # nwin is the number of samples per frame. # Para t_frame=20ms e fs=16kHz, nwin=320 ceps_test, mspec, spec = mfcc(audio_test, nwin, nfft, fs, nceps) ''' ---------------------------------------------------- Scoring ---------------------------------------------------- ''' log_prob_true = model_true.score(ceps_test) log_prob_UBM = UBM_all.score(ceps_test) print np.sum(log_prob_true) print np.sum(log_prob_UBM) score = np.sum(log_prob_true) - np.sum(log_prob_UBM) print "Difference between sum(log) probabilites (True VS. UBM_all): ", score f.write(gender + '\t' + index + '\t' + age + '\t' + str(score) + '\n') f.close()
def evaluate_all_vs_single_true(flag_speaker_samples='combined'): # ---------------------------------------- # Model of True Speaker if flag_speaker_samples == 'combined': model_true = get_GMM_true_speaker(speaker_samples=flag_speaker_samples) else: # Neste caso flag_speaker_samples=='separated' [model_true_01, model_true_02, model_true_03 ] = get_GMM_true_speaker(speaker_samples=flag_speaker_samples) # ---------------------------------------- # Setting up text file files_in_folder = os.listdir( os.getcwd()) # List of files in current directory text_output_files = [ x for x in files_in_folder if ('Verification_Test_04_true' in x) and (speaker_samples in x) ] number = len(text_output_files) + 1 f = open( 'Verification_Test_04_true_' + speaker_samples + '_0' + str(number) + '.txt', 'w') f.write( '# ' + text_dependency + ' ASV\n# Header: a single ASV test is carried out for each test speaker, with the true speaker fixed, but having three elocutions.\n# GENDER\tINDEX\tAGE\tSCORE\tDECISION\n' ) n_accepted = 0 n_false_acceptance = 0 n_rejected = 0 n_false_rejection = 0 ''' ---------------------------------------------------- Training UBM for all UBM-speakers except the True one ---------------------------------------------------- ''' cov_type = 'full' UBM_all = get_UBM_all(cov_type='full') ''' ---------------------------------------------------- Getting threshold ---------------------------------------------------- ''' threshold = get_score_threshold(speaker_samples_threshold=speaker_samples) files_in_folder = os.listdir( os.getcwd()) # List of files in current directory audiofiles_male = [ x for x in files_in_folder if ('.wav' in x) and ('M' in x) and ( 'UBM' not in x) and ('_test' not in x) and ('_true' not in x) ] audiofiles_female = [ x for x in files_in_folder if ('.wav' in x) and ('F' in x) and ( 'UBM' not in x) and ('_test' not in x) and ('_true' not in x) ] audiofiles_S = [ x for x in files_in_folder if ('.wav' in x) and ('S' in x) and ( 'UBM' not in x) and ('_test' not in x) and ('_true' not in x) ] # ------------------------------------------------------------------------ # Running through the lists of utterances from non-True speakers for gender_file_list in [audiofiles_male, audiofiles_female]: for current_test_speaker in gender_file_list: print '\nTEST 04' print 'For file ' + current_test_speaker + ':' [gender, index, aux] = current_test_speaker.split( '_') # "aux" is just a disposable variable [age, aux] = aux.split('.') ''' ---------------------------------------------------- Training test speaker ---------------------------------------------------- ''' [fs, audio_test] = read_audiofile(current_test_speaker, normalize=True) t_frame = 20 * 10**(-3) # Duration in seconds of each frame nwin = t_frame * fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320 nfft = 512 ceps_test, mspec, spec = mfcc(audio_test, nwin, nfft, fs, nceps) ''' ---------------------------------------------------- Scoring ---------------------------------------------------- ''' if speaker_samples == 'combined': score_UBM = UBM_all.score(ceps_test) score_true = model_true.score(ceps_test) score = np.sum(score_true - score_UBM) print '\nScore: ', score else: # Neste caso speaker_samples=='separated' score_UBM = UBM_all.score(ceps_test) score01 = model_true_01.score(ceps_test) score02 = model_true_02.score(ceps_test) score03 = model_true_03.score(ceps_test) score_true = np.max(np.array([score01, score02, score03])) score = np.sum(score_true - score_UBM) print '\nScore: ', score print "Difference between sum(log) probabilites (True VS. UBM_all): ", score # ---------------------------------- # DECISION # ---------------------------------- if score >= threshold: decision = 'ACCEPT' n_accepted += 1 n_false_acceptance += 1 else: decision = 'Reject' n_rejected += 1 f.write(gender + '\t' + index + '\t' + age + '\t' + str(score) + '\t' + decision + '\n') # ------------------------------------------------------------------------ # Running through the utterances of true speaker (prefix = S) for current_test_speaker in audiofiles_S: print '\nTEST 04' print 'For file ' + current_test_speaker + ':' [gender, index, aux] = current_test_speaker.split( '_') # "aux" is just a disposable variable [age, aux] = aux.split('.') ''' ---------------------------------------------------- Training test speaker ---------------------------------------------------- ''' [fs, audio_test] = read_audiofile(current_test_speaker, normalize=True) t_frame = 20 * 10**(-3) # Duration in seconds of each frame nwin = t_frame * fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320 nfft = 512 ceps_test, mspec, spec = mfcc(audio_test, nwin, nfft, fs, nceps) ''' ---------------------------------------------------- Scoring ---------------------------------------------------- ''' if speaker_samples == 'combined': score_UBM = UBM_all.score(ceps_test) score_true = model_true.score(ceps_test) score = np.sum(score_true - score_UBM) print '\nScore: ', score else: # Neste caso speaker_samples=='separated' score_UBM = UBM_all.score(ceps_test) score01 = model_true_01.score(ceps_test) score02 = model_true_02.score(ceps_test) score03 = model_true_03.score(ceps_test) score_true = np.max(np.array([score01, score02, score03])) score = np.sum(score_true - score_UBM) print '\nScore: ', score print "Difference between sum(log) probabilites (True VS. UBM_all): ", score # ---------------------------------- # DECISION # ---------------------------------- if score >= threshold: decision = 'ACCEPT' n_accepted += 1 else: decision = 'Reject' n_rejected += 1 n_false_rejection += 1 f.write(gender + '\t' + index + '\t' + age + '\t' + str(score) + '\t' + decision + '\n') f.write('# Number of accepted: ' + str(n_accepted) + '\n# Number of rejected: ' + str(n_rejected) + '\n# Number of false-acceptance: ' + str(n_false_acceptance) + '\n# Number of false-rejection: ' + str(n_false_rejection) + '\n# Threshold: ' + str(threshold)) f.close()
def evaluate_all_vs_true(): ''' # ====================================================================== # TESTING WITH ALL SPEAKERS AGAINST A SINGLE TRUE SPEAKER ''' files_in_folder = os.listdir(os.getcwd()) # List of files in current directory audiofiles_male = [x for x in files_in_folder if ('.wav' in x) and ('M' in x) and ('UBM' not in x) and ('_true' not in x) and ('_test' not in x)] audiofiles_female = [x for x in files_in_folder if ('.wav' in x) and ('F' in x) and ('UBM' not in x) and ('_true' not in x) and ('_test' not in x)] print 'len(audiofiles_male): ', len(audiofiles_male) print 'len(audiofiles_female): ', len(audiofiles_female) true_speakers_list = audiofiles_male + audiofiles_female + ['01_true_speaker.wav','02_true_speaker.wav','03_true_speaker.wav'] # Concatenation of both lists for true_speaker in true_speakers_list: ''' ---------------------------------------------------- Training UBM for all training speakers (male & female) except the True one ---------------------------------------------------- ''' cov_type = 'full' UBM_all = get_UBM_all(cov_type='full',exclude_speaker = true_speaker) ''' ---------------------------------------------------- Setting up .txt files and lists ---------------------------------------------------- ''' f = open('Verification_Test_02_' + true_speaker.split('.')[0] + '.txt','w') f.write('# Header: a single ASV test is carried out for each speaker S, with the speaker ' + true_speaker.split('.')[0] + ' being the True Speaker in all tests.\n# GENDER\tINDEX\tAGE\tSCORE\n') for gender_file_list in [audiofiles_male, audiofiles_female]: for current_test_speaker in gender_file_list: print '\nTEST 02 (all against one)' print 'For file ' + current_test_speaker + ':' [gender,index,aux] = current_test_speaker.split('_') # "aux" is just a disposable variable [age,aux] = aux.split('.') ''' ---------------------------------------------------- Training true speaker ---------------------------------------------------- ''' fs, audio_true = spwave.read(true_speaker) t_frame = 20*10**(-3) # Duration in seconds of each frame nwin = t_frame*fs # nwin is the number of samples per frame. # Para t_frame=20ms e fs=16kHz, nwin=320 nfft = 512 ceps_true, mspec, spec = mfcc(audio_true, nwin, nfft, fs, nceps) ngaussians = 10 cov_type = 'full' gmm = GMM(n_components = ngaussians, covariance_type = cov_type) model_true = gmm.fit(ceps_true) print "model_true converged? ",model_true.converged_ ''' ---------------------------------------------------- Training test speaker ---------------------------------------------------- ''' fs, audio_test = spwave.read(current_test_speaker) nwin = t_frame*fs # nwin is the number of samples per frame. # Para t_frame=20ms e fs=16kHz, nwin=320 ceps_test, mspec, spec = mfcc(audio_test, nwin, nfft, fs, nceps) ''' ---------------------------------------------------- Scoring ---------------------------------------------------- ''' log_prob_true = model_true.score(ceps_test) log_prob_UBM = UBM_all.score(ceps_test) print np.sum(log_prob_true) print np.sum(log_prob_UBM) score = np.sum(log_prob_true) - np.sum(log_prob_UBM) print "Difference between sum(log) probabilites (True VS. UBM_all): ", score f.write(gender + '\t' + index + '\t' + age + '\t' + str(score) + '\n') f.close()