def do_mfccs(): print("=======MFCC extraction phase========") for folder_name in list_sets: cepstral_type = "mfcc" # choose between "mfcc" or "plp" # Loading id-wavs specified in the labels file print("\nReading dir:", folder_name) source_file = '../../data/{}/labels/labels.csv'.format(recipe) list_of_wavs = util.traverse_dir(audio_dir + folder_name, file_type='.wav') list_of_wavs.sort() list_specific_wavs = ah.load_specific( source_file=source_file, list_original_audios=list_of_wavs) list_specific_wavs.sort() # best 33 wavs selected manually for deltas in [0, 1, 2]: print("\n Extracting with {} deltas".format(deltas)) extract_mfccs.compute_flevel_feats(list_specific_wavs, out_dir, cepstral_type=cepstral_type, num_feats=23, recipe=recipe, folder_name=folder_name, num_deltas=deltas, obs='')
def do_resample(): audio_dir = work_dir + 'audio/' out_dir = work_dir + 'audio/' list_sets = ['readtext_test', 'monologue_test'] for folder_name in list_sets: print("Reading dir:", folder_name) list_of_wavs = util.traverse_dir(audio_dir + folder_name, '.wav') resample_wav.make_resample(list_of_wavs, out_dir, target_sample_rate=16000, subtype='PCM_16')
def save_labels(list_sets, audio_dir, out_dir): for task in list_sets: list_of_wavs = util.traverse_dir(audio_dir + task, '.wav') labels_task = [] for wav in list_of_wavs: w, label, task_name = make_labels(wav) labels_task.append(w + ' ' + label) labels_task.sort() np.savetxt(out_dir + "labels_{}.txt".format(task), labels_task, delimiter=',', fmt='%s') print("labels saved to:", out_dir)
def do_ivecs(): mfccs_dir = work_dir + 'data/{}/'.format(recipe) out_dir = work_dir + 'data/' file_ubm = work_dir + 'data/pcgita/ubm/.mfcc' # Naming format is: "featureType_recipeName_numberOfDeltas.mfcc" for folder_name in list_sets: print("\nReading dir:", mfccs_dir + folder_name) list_mfcc_files = util.traverse_dir(mfccs_dir + folder_name, '.mfcc') extract_ivecs.compute_ivecs(list_n_gauss=[2, 4, 8, 16, 32, 64], list_mfcc_files=list_mfcc_files, out_dir=out_dir, file_ubm_feats=file_ubm, recipe=recipe, folder_name=folder_name)
def do_fishers(): mfccs_dir = work_dir + 'data/{}/'.format(recipe) out_dir = work_dir + 'data/' file_ubm = work_dir + 'data/pcgita/DDK_analysis/mfccs_pcgita_20_DDK_analysis_2del.mfcc' for folder_name in list_sets: print("Reading dir:", mfccs_dir + folder_name) list_mfcc_files = util.traverse_dir(mfccs_dir + folder_name, '.mfcc') extract_fishers.compute_fishers(list_n_clusters=[2, 4, 8, 16, 32], list_mfcc_files=list_mfcc_files, out_dir=out_dir, list_files_ubm=file_ubm, recipe=recipe, folder_name=folder_name)
def do_ivecs_pretrained_mdls(): mfccs_dir = work_dir + 'data/{}/'.format(recipe) out_dir = work_dir + 'data/' ubm_dir = work_dir + 'data/' + recipe + '/UBMs/' # where the ubms live list_ubm_files = util.traverse_dir( ubm_dir, '.mdll') # reading all the files with .mdl format list_sets = ['read_text'] for folder_name in list_sets: # iterating over the list of sets where the features live print("\nReading dir:", mfccs_dir + folder_name) for ubm in list_ubm_files: # iterating over the pretrained ubms n_ubm = util.extract_numbers_from_str( ubm) # getting the number of ubms of the corresponding file print("\ni-vecs for {} GMMs".format(n_ubm)) list_mfcc_files = util.traverse_dir( mfccs_dir + folder_name, '.mfcc') # reading MFCCs to extracting i-vecs from extract_ivecs.compute_ivecs_pretr_ubms( list_mfcc_files, out_dir, #n_ubm=n_ubm, file_ubm=ubm, recipe=recipe, folder_name=folder_name)
def do_mfccs(): print("=======MFCC extraction phase========") recipe = 'cold' audio_dir = '/opt/project/audio/' out_dir = '/opt/project/data/' for folder_name in list_sets: print("\nReading dir:", folder_name) list_of_wavs = util.traverse_dir(audio_dir + folder_name, '.wav') # print(list_of_wavs[0]) extract_mfccs.compute_flevel_feats(list_of_wavs, out_dir, num_mfccs=13, recipe=recipe, folder_name=folder_name)
def sel_spec_wavs(): recipe = 'dementia_new8k' work_dir = '/media/jose/hk-data/PycharmProjects/the_speech/' # for ubuntu (native bob kaldi) audio_dir = work_dir + 'audio/dementia_new8k' source_file = work_dir + 'data/{}/labels/labels.csv'.format(recipe) list_of_wavs = util.traverse_dir(audio_dir, file_type='.wav') list_of_wavs.sort() list_specific_wavs = ah.load_specific(source_file=source_file, list_original_audios=list_of_wavs) list_specific_wavs.sort() wav_names_only = [] for wav in list_specific_wavs: wav_names_only.append(os.path.basename(wav)) return wav_names_only
def do_ivecs(): print("=======i-vector extraction phase========") recipe = 'cold' mfccs_dir = '/opt/project/data/{}/'.format(recipe) out_dir = '/opt/project/data/' file_ubm = '/opt/project/data/cold/train/mfccs_cold_13_train_2del.mfcc' # Format is: "featureType_recipeName_numberOfDeltas.mfcc" for folder_name in list_sets: print("\nReading dir:", mfccs_dir + folder_name) list_mfcc_files = util.traverse_dir(mfccs_dir + folder_name, '.mfcc') extract_ivecs.compute_ivecs(list_mfcc_files, out_dir, info_num_feats_got=13, ivec_dims=256, file_ubm_feats=file_ubm, recipe=recipe, folder_name=folder_name)
def do_fishers(): print("=======fisher-vector extraction phase========") work_dir = 'C:/Users/Win10/PycharmProjects/the_speech' recipe = 'cold' mfccs_dir = work_dir + '/data/{}/'.format(recipe) out_dir = work_dir + '/data/' file_ubm = work_dir + '/data/cold/train/mfccs_cold_13_train_2del.mfcc' # Format is: "featureType_recipeName_numberOfDeltas.mfcc" for folder_name in list_sets: print("\nReading dir:", mfccs_dir + folder_name) list_mfcc_files = util.traverse_dir(mfccs_dir + folder_name, '.mfcc') extract_fishers.compute_fishers(list_n_clusters, list_mfcc_files, out_dir, info_num_feats=13, file_ubm_feats=file_ubm, recipe=recipe, folder_name=folder_name)
def do_frame_level(): print("=======Frame-level extraction phase========") cepstral_type = "mfcc" # choose between "mfcc" or "plp" for folder_name in list_sets: print("\nReading dir:", folder_name) list_of_wavs = util.traverse_dir(audio_dir + folder_name, '.wav') list_of_wavs.sort() # print(list_of_wavs) for deltas in [0, 1, 2]: extract_mfccs.compute_flevel_feats(list_of_wavs, out_dir, cepstral_type=cepstral_type, num_feats=23, recipe=recipe, folder_name=folder_name, num_deltas=deltas, obs='')
def do_fishers_pretrained_ubm(): mfccs_dir = work_dir + 'data/{}/'.format(recipe) out_dir = work_dir + 'data/' # Where the computed features will live in ubm_dir = work_dir + 'data/' + recipe + '/UBMs/' # where the diagonal ubms live list_ubm_files = util.traverse_dir( ubm_dir, '.mdl' ) # reading all the files with .mdl or .dubm as format (latter is more reliable) mfcc_n_deltas = 2 # Number of deltas of the mfccs list_sets = ['monologue_erlangen', 'readtext_erlangen'] for folder_name in list_sets: # iterating over the list of sets where the features live print("\nReading dir:", mfccs_dir + folder_name) for ubm in list_ubm_files: # iterating over the pretrained ubms list_mfcc_files = util.traverse_dir_2( mfccs_dir + folder_name, '*{}del.mfcc'.format( mfcc_n_deltas)) # reading MFCCs to extracting fishers from extract_fishers.compute_fishers_pretr_ubm( list_mfcc_files=list_mfcc_files, out_dir=out_dir, file_ubm=ubm, recipe=recipe, folder_name=folder_name)
def do_mfccs(): print("=======Frame-level extraction phase========") audio_dir = work_dir + 'audio/' out_dir = work_dir + 'data/' list_sets = ['monologue_erlangen', 'readtext_erlangen'] cepstral_type = "mfcc" # choose between "mfcc" or "plp" for folder_name in list_sets: print("Reading dir:", folder_name) list_of_wavs = util.traverse_dir(audio_dir + folder_name, '.wav') list_of_wavs.sort() # save the labels. list of sets/tasks (NAME of the folders containing the audios), dir to the audios, output dir save_labels(list_sets, audio_dir, out_dir + recipe + '/') # make labels of the wavs for deltas in [1, 2]: extract_mfccs.compute_flevel_feats(list_of_wavs, out_dir, cepstral_type=cepstral_type, num_feats=20, recipe=recipe, folder_name=folder_name, num_deltas=deltas, obs='')