def load_embeddings(): return { 'img_embeds_train': read_pickle(DATA_DIR + 'image_captioning/train_img_embeds.pickle'), 'img_filenames_train': read_pickle(DATA_DIR + 'image_captioning/train_img_fns.pickle'), 'img_embeds_val': read_pickle(DATA_DIR + 'image_captioning/val_img_embeds.pickle'), 'img_filenames_val': read_pickle(DATA_DIR + 'image_captioning/val_img_fns.pickle') }
def load_data(_x, _y, load_mode): x = None if load_mode == 'txt': x = np.loadtxt(_x) elif load_mode == 'pickle': x = util.read_pickle(_x) y = pd.read_csv(_y, header=None) y.columns = ['patient_id', 'diagnosis'] y.diagnosis = pd.Categorical(y.diagnosis) y['diag_code'] = y.diagnosis.cat.codes return x, y
def concatenate_list_of_deltas2(original, deltas1, deltas2): list_conc = [] for item1, delta1, delta2 in zip(original, deltas1, deltas2): conc = np.concatenate((item1, delta1, delta2)) list_conc.append(np.float32(conc)) return list_conc if __name__ == '__main__': # Loading fbanks file_fbanks = 'C:/Users/Win10/PycharmProjects/the_speech/data/fbanks/fbanks_dem_40' file_fbanks_bea = 'C:/Users/Win10/PycharmProjects/the_speech/data/fbanks/fbanks_ubm_dem_40' list_fbanks = util.read_pickle(file_fbanks) fbanks_bea = np.vstack(util.read_pickle(file_fbanks_bea)) # Output files file_pca_fbanks = 'C:/Users/Win10/PycharmProjects/the_speech/data/fbanks/fbanks_dem_40_PCA' file_pca_bea = 'C:/Users/Win10/PycharmProjects/the_speech/data/fbanks/fbanks_ubm_dem_40_PCA_SP' # Scaling and selecting best number of components bea_scaled, list_dem_scaled = scale_min_max(fbanks_bea, list_fbanks) c = best_components(bea_scaled) # Reducing dimensions PCA fbanks_reduced, bea_reduced = run_pca(bea_scaled, list_dem_scaled, c) # Computing deltas # list_fbanks_deltas = compute_deltas(list_fbanks, 1) #bea_deltas = python_speech_features.base.delta(feat=fbanks_bea, N=1)
def main(): work_dir = '/opt/project' set_ = '' set_models = '' obs1 = 'aug_2del' obs = '2del_aug-ubm' obs_ivec = '2del_aug-ubm-augv1-mf' num_mfccs = '20' ivecs_dim = 256 # ---Input Files--- # MFCCs file_mfccs_ivec = '/opt/project/data/hc/alzheimer/mfccs_dem_{}_{}'.format( num_mfccs, obs1) file_mfccs_ubm = '/opt/project/data/hc/alzheimer/mfccs_ubm_dem_{}_{}'.format( num_mfccs, obs) # Load MFCCs for UBM mfccs_wav_ubm = np.vstack(util.read_pickle(file_mfccs_ubm)) # Load MFCCs for i-vectors extraction list_mfccs_ivecs = util.read_pickle(file_mfccs_ivec) # group per type (original, noised, stretched, pitched) corresponding to each spk. # and join (concatenate) 3 wavs per speaker # list_mfccs_joint = util.join_speakers_wavs(util.group_per_audio_type(list_mfccs_ivecs)) num_gauss = [2, 4, 8, 16, 32, 64, 128] for g in num_gauss: # ---OUTPUT FILES--- # i-vecs ivector_2D_file = work_dir + '/data/ivecs/alzheimer/ivecs-' + str( g) + '-{}mf-{}--{}i'.format(num_mfccs, obs_ivec, ivecs_dim) # models for i-vecs file_diag_ubm_model = work_dir + '/data/models/ivecs_alz/dubm_mdl_{}g_dem_{}'.format( g, obs_ivec) file_full_ubm_model = work_dir + '/data/models/ivecs_alz/fubm_mdl_{}g_dem_{}'.format( g, obs_ivec) file_ivec_extractor_model = work_dir + '/data/models/ivecs_alz/ivec_mdl_{}g_dem_{}'.format( g, obs_ivec) # Train models model_dubm, model_fubm, model_ivector = train_models( mfccs_wav_ubm, list_mfccs_ivecs, file_diag_ubm_model, file_full_ubm_model, file_ivec_extractor_model, g, ivecs_dim) # Extract ivectors print("Extracting i-vecs...") ivectors_list = [] n_gselect = int(np.log2(g)) print(n_gselect) for i2 in list_mfccs_ivecs: ivector_array = bob.kaldi.ivector_extract(i2, model_fubm, model_ivector, num_gselect=n_gselect) ivectors_list.append(ivector_array) a_ivectors = np.vstack(ivectors_list) # a_ivectors_3d = np.expand_dims(a_ivectors, axis=1) print("i-vectors shape:", a_ivectors.shape) # Save i-vectors to a txt file np.savetxt(ivector_2D_file, a_ivectors) print("i-vectors saved to:", ivector_2D_file)