def do_fishers_pretrained_ubm(): feature_dir = work_dir + 'data/{}/'.format(recipe) out_dir = work_dir + 'data/' # Where the computed features will be saved ubm_dir = work_dir + 'data/UBMs/' + ubm_folder_name + '/ivec_models/' # where the diagonal ubms live # list_ubm_files = util.traverse_dir(ubm_dir, '.dubm') # reading all the files with .mdl or .dubm as format (latter is more reliable) list_sets = ['dementia_new8k'] for g in list_n_clusters: for deltas in [0, 1, 2]: # info-purpose parameters from the frame-level extracted features # feats_info = [ 20, deltas, 'mfcc' ] # info of the features (n_features/dimension, deltas, cepstral_type=choose between mfcc or plp) for folder_name in list_sets: # iterating over the list of sets where the features live print("\nReading dir:", feature_dir + folder_name) list_ubm_files = util.traverse_dir_2( ubm_dir, '*_{}g_{}{}-{}del_{}.dubm'.format(g, feats_info[0], feats_info[2], feats_info[1], ubm_folder_name)) list_mfcc_files = util.traverse_dir_2( feature_dir + folder_name, '*{}_{}_{}del.{}'.format(feats_info[0], folder_name, feats_info[1], feats_info[2])) extract_fishers.compute_fishers_pretr_ubm_2( list_mfcc_files=list_mfcc_files, out_dir=out_dir, feats_info=feats_info, list_files_ubm=list_ubm_files, recipe=recipe, folder_name=folder_name)
def do_ivecs(): print("=======i-vector extraction phase========") mfccs_dir = work_dir + 'data/{}/'.format(recipe) ubm_dir = work_dir + 'data/UBMs/{}'.format(ubm_folder_name) for deltas in [2]: feats_info = [20, deltas, 'mfcc'] # info of the mfccs (n_features, deltas) # the following is a list in the case of the UBM is meant to be trained with training and dev sets list_files_ubm = [ ubm_dir + '/mfcc_{}_{}_{}del.mfcc'.format(feats_info[0], ubm_folder_name, deltas) ] for folder_name in list_sets: print("\nReading dir:", mfccs_dir + folder_name) list_mfcc_files = util.traverse_dir_2( mfccs_dir + folder_name, '*{}_{}_{}del.{}'.format(feats_info[0], folder_name, feats_info[1], feats_info[2])) extract_ivecs.compute_ivecs(list_n_gauss=list_n_clusters, list_mfcc_files=list_mfcc_files, out_dir=out_dir, list_files_ubm=list_files_ubm, recipe=recipe, mfcc_info=feats_info, folder_name=folder_name)
def do_fishers(): print("=======fisher-vector extraction phase========") feature_dir = work_dir + '/data/{}/'.format(recipe) for deltas in [0, 1, 2]: # info-purpose parameters from the frame-level extracted features # feats_info = [ 20, deltas, 'mfcc' ] # info of the features (n_features/dimension, deltas, cepstral_type=choose between mfcc or plp) obs = '' # Format is: "featureType_recipeName_nMFCCs_nDeltas.mfcc" list_files_ubm = [ work_dir + 'data/demencia94ABC/wav16k_split_long/mfcc_demencia94ABC_20_wav16k_split_long_{}del.mfcc' .format(deltas) ] for folder_name in list_sets: list_mfcc_files = util.traverse_dir_2( feature_dir + folder_name, '*{}_{}_{}del.{}'.format(feats_info[0], folder_name, feats_info[1], feats_info[2])) print(list_mfcc_files) extract_fishers.compute_fishers(list_n_clusters, list_mfcc_files, out_dir, feats_info=feats_info, list_files_ubm=list_files_ubm, recipe=recipe, folder_name=folder_name)
def do_dimension_reduction(): print("=======dimension reduction phase========") feature_dir = work_dir + '/data/{}/'.format(recipe) for delta in [0, 1, 2]: # info-purpose parameters from the frame-level extracted features # feats_info = [ 40, delta, 'mfcc' ] # info of the features (n_features/dimension, deltas, cepstral_type=choose between mfcc or plp) obs = '_hires' # observations of the features' config (if there is such) e.g. '_hires' (when the mfccs were extracted using 'hires' params) list_files_ubm = [ work_dir + '/data/mask/train/{}_mask_{}_train_{}del{}.{}'.format( feats_info[2], feats_info[0], delta, obs, feats_info[2]) ] pca = pca_trainer(list_files_ubm[0], n_components=0.97) # train PCA using training set for folder_name in list_sets: print("\nReading dir:", feature_dir + folder_name) list_mfcc_file = util.traverse_dir_2( feature_dir + folder_name, '*{}_{}_{}del.{}'.format(feats_info[0], folder_name, feats_info[1], feats_info[2])) for item in list_mfcc_file: # transform each dataset reduced_data = pca_transformer(pca, item) util.save_pickle( feature_dir + folder_name + '*{}_{}_{}del{}_pca.{}'.format( feats_info[0], folder_name, feats_info[1], obs, feats_info[2]), reduced_data)
def do_fishers(): print("=======fisher-vector extraction phase========") feature_dir = work_dir + '/data/{}/'.format(recipe) for delta in [0]: # info-purpose parameters from the frame-level extracted features # feats_info = [ 23, delta, 'mfcc' ] # info of the features (n_features/dimension, deltas, cepstral_type=choose between mfcc or plp) obs = '' # observations of the features' config e.g. '_hires' (when the mfccs were extracted using 'hires' params) list_files_ubm = [ work_dir + '/data/{}/train/{}_{}_{}_train_{}del.{}'.format( recipe, feats_info[2], recipe, feats_info[0], delta, feats_info[2]) ] # Format is: "featureType_recipeName_nMFCCs_nDeltas.mfcc" for folder_name in list_sets: print("\nReading dir:", feature_dir + folder_name) list_mfcc_files = util.traverse_dir_2( feature_dir + folder_name, '*{}_{}_{}del.{}'.format(feats_info[0], folder_name, feats_info[1], feats_info[2])) print(list_mfcc_files) extract_fishers.compute_fishers(list_n_clusters, list_mfcc_files, out_dir, feats_info=feats_info, list_files_ubm=list_files_ubm, recipe=recipe, folder_name=folder_name)
def do_ivecs(): print("=======i-vector extraction phase========") mfccs_dir = work_dir + 'data/{}/'.format(recipe) for deltas in [2]: feats_info = [20, deltas, 'mfcc'] # info of the mfccs (n_features, deltas) list_files_ubm = [ work_dir + 'data/demencia94ABC/wav16k_split_long/mfcc_demencia94ABC_20_wav16k_split_long_{}del.mfcc' .format(deltas) ] for folder_name in list_sets: print("\nReading dir:", mfccs_dir + folder_name) list_mfcc_files = util.traverse_dir_2( mfccs_dir + folder_name, '*{}_{}_{}del.{}'.format(feats_info[0], folder_name, feats_info[1], feats_info[2])) extract_ivecs.compute_ivecs(list_n_gauss=list_n_clusters, list_mfcc_files=list_mfcc_files, out_dir=out_dir, list_files_ubm=list_files_ubm, recipe=recipe, mfcc_info=feats_info, folder_name=folder_name)
def do_fishers_pretrained_ubm(): mfccs_dir = work_dir + 'data/{}/'.format(recipe) out_dir = work_dir + 'data/' # Where the computed features will live in ubm_dir = work_dir + 'data/' + recipe + '/UBMs/' # where the diagonal ubms live list_ubm_files = util.traverse_dir( ubm_dir, '.mdl' ) # reading all the files with .mdl or .dubm as format (latter is more reliable) mfcc_n_deltas = 2 # Number of deltas of the mfccs list_sets = ['monologue_erlangen', 'readtext_erlangen'] for folder_name in list_sets: # iterating over the list of sets where the features live print("\nReading dir:", mfccs_dir + folder_name) for ubm in list_ubm_files: # iterating over the pretrained ubms list_mfcc_files = util.traverse_dir_2( mfccs_dir + folder_name, '*{}del.mfcc'.format( mfcc_n_deltas)) # reading MFCCs to extracting fishers from extract_fishers.compute_fishers_pretr_ubm( list_mfcc_files=list_mfcc_files, out_dir=out_dir, file_ubm=ubm, recipe=recipe, folder_name=folder_name)
feature_dir = work_dir + 'data/{}/'.format(recipe) out_dir = work_dir + 'data/' # Where the computed features will be saved ubm_dir = work_dir + 'data/UBMs/' + ubm_folder_name +'/ivec_models/' # where the diagonal ubms live # list_ubm_files = util.traverse_dir(ubm_dir, '.dubm') # reading all the files with .mdl or .dubm as format (latter is more reliable) for g in list_n_clusters: # info-purpose parameters from the frame-level extracted features # feats_info_ubm = [20, 0, 'mfcc'] # info of the features (n_features/dimension, deltas, cepstral_type=choose between mfcc or plp) feats_info_flevel = [20, 0, 'fbank'] # info of the features (n_features/dimension, deltas, cepstral_type=choose between mfcc or plp) for folder_name in list_sets: # iterating over the list of sets where the features live # print("\nReading dir:", feature_dir + folder_name+'/fisher/') # here the FULL-diag ubm is used list_ubm_files = util.traverse_dir_2(ubm_dir, '*_{}g_{}{}-{}del_{}.fubm'.format(g, feats_info_ubm[0], feats_info_ubm[2], feats_info_ubm[1], ubm_folder_name)) list_mfcc_files = util.traverse_dir_2(feature_dir + folder_name+'/', '*{}_{}_{}del.{}'.format( feats_info_flevel[0], folder_name, feats_info_flevel[1], feats_info_flevel[2] )) extract_ivecs.extract_ivecs(list_mfcc_files=list_mfcc_files, g=g, list_fubms=list_ubm_files, mfcc_info=feats_info_ubm, recipe=recipe, out_dir=out_dir, folder_name=folder_name) fubmfile='/media/jose/hk-data/PycharmProjects/the_speech/data/UBMs/wav16k_split_long/ivec_models/fubm_mdl_256g_20mfcc-0del_wav16k_split_long.fubm'