scriptname = 'mesa_kpca2' #List of datasets to test #dataset_list = ['diabetes', 'sex', 'cac_binomial', 'cac_extremes', 'family_hx_diabetes', 'parent_cvd_65_hx', 'family_hx_cvd', 'bp_treatment', 'diabetes_treatment', 'lipids_treatment', 'mi_stroke_hx', 'plaque'] dataset_list = ['diabetes', 'sex', 'cac_binomial'] for dataset in dataset_list: print('\n##### Now running dataset %s #####' % dataset) #Create directory if directory does not exist filepath = '../../figs/out/%s/%s/%s/' % (scriptname, nowdate, dataset) if not os.path.exists(filepath): os.makedirs(filepath) X = pd.read_csv( '../../data/mesa/MESA_CPMG_MBINV2_ManuallyBinnedData_BatchCorrected_LogTransformed_1stcol_%s.csv' % dataset, sep=',', header=None, index_col=0) #print(X) X_imp = p2f.filt_imp(X, 0.1) X_imp_df = pd.DataFrame.from_records(X_imp) #print(X_imp_df) X, y = p2f.tsplit(X_imp_df) #print(y) X_scaled = scale(X)
# Collect optimal tier1 gammas opt_t1_gammas = [] #Using first input dataset to generate toy datasets inp_df = pd.read_csv( '../../data/mesa/MESA_CPMG_MBINV2_ManuallyBinnedData_BatchCorrected_LogTransformed_1stcol_%s.csv' % inp_dataset_list[0][1], sep=',', header=None, index_col=0) print( '\nUsing %s dataset to generate simulated datasets for the purpose of tuning algorithms and hyperperameters.' % inp_dataset_list[0][0]) X_imp = p2f.filt_imp(inp_df, 0.1) X, y = p2f.tsplit(X_imp) toy_dataset_list, toy_y = p2f.toybox_gen(X) for toy_label, toy_X in toy_dataset_list: print('\n##### Now running dataset %s through tier 1 #####' % toy_label) #Create directory if directory does not exist filepath = '../../figs/out/%s/%s/%s/' % (scriptname, nowdate, toy_label) plotpath = '%splotting/' % filepath if not os.path.exists(filepath): os.makedirs(filepath) os.makedirs(plotpath) toy_X_scaled = scale(toy_X)