#Create directory if directory does not exist filepath = '../../figs/out/%s/%s/%s/' % (scriptname, nowdate, dataset) if not os.path.exists(filepath): os.makedirs(filepath) # Import dataset and target if dataset == 'MESA': X = pd.read_csv( '../../data/mesa/MESA_Clinical_data_full_COMBI-BIO_non-verbose.csv', sep=',', header=0, index_col=1) X = p2.filt_imp(X, 0.1) else: X = pd.read_csv('../../data/simulated/mvnsim/mvnsim' + dataset + '.csv', sep=',', header=0, index_col=0) y = np.load('../../data/simulated/mvnsim/target' + dataset + '.npy') #print(y) #print(y.shape) #print(X.shape) ''' p2.distribution_boxplot(X, y,
scriptname = 'mesa_kpca2' #List of datasets to test #dataset_list = ['diabetes', 'sex', 'cac_binomial', 'cac_extremes', 'family_hx_diabetes', 'parent_cvd_65_hx', 'family_hx_cvd', 'bp_treatment', 'diabetes_treatment', 'lipids_treatment', 'mi_stroke_hx', 'plaque'] dataset_list = ['diabetes', 'sex', 'cac_binomial'] for dataset in dataset_list: print('\n##### Now running dataset %s #####' % dataset) #Create directory if directory does not exist filepath = '../../figs/out/%s/%s/%s/' % (scriptname, nowdate, dataset) if not os.path.exists(filepath): os.makedirs(filepath) X = pd.read_csv( '../../data/mesa/MESA_CPMG_MBINV2_ManuallyBinnedData_BatchCorrected_LogTransformed_1stcol_%s.csv' % dataset, sep=',', header=None, index_col=0) #print(X) X_imp = p2f.filt_imp(X, 0.1) X_imp_df = pd.DataFrame.from_records(X_imp) #print(X_imp_df) X, y = p2f.tsplit(X_imp_df) #print(y) X_scaled = scale(X)
#Create directory if directory does not exist filepath = '../../figs/out/%s/%s/%s/' % (scriptname, nowdate, dataset) if not os.path.exists(filepath): os.makedirs(filepath) # Import dataset and target if dataset == 'MESA': X = pd.read_csv( '../../data/mesa/MESA_Clinical_data_full_COMBI-BIO_non-verbose.csv', sep=',', header=0, index_col=1) X = filt_imp(X, 0.1) else: X = pd.read_csv('../../data/simulated/mvnsim/mvnsim' + dataset + '.csv', sep=',', header=0, index_col=0) y = np.load('../../data/simulated/mvnsim/target' + dataset + '.npy') #print(y) #print(y.shape) #print(X.shape) distribution_boxplot( X,
# Collect optimal tier1 gammas opt_t1_gammas = [] #Using first input dataset to generate toy datasets inp_df = pd.read_csv( '../../data/mesa/MESA_CPMG_MBINV2_ManuallyBinnedData_BatchCorrected_LogTransformed_1stcol_%s.csv' % inp_dataset_list[0][1], sep=',', header=None, index_col=0) print( '\nUsing %s dataset to generate simulated datasets for the purpose of tuning algorithms and hyperperameters.' % inp_dataset_list[0][0]) X_imp = p2f.filt_imp(inp_df, 0.1) X, y = p2f.tsplit(X_imp) toy_dataset_list, toy_y = p2f.toybox_gen(X) for toy_label, toy_X in toy_dataset_list: print('\n##### Now running dataset %s through tier 1 #####' % toy_label) #Create directory if directory does not exist filepath = '../../figs/out/%s/%s/%s/' % (scriptname, nowdate, toy_label) plotpath = '%splotting/' % filepath if not os.path.exists(filepath): os.makedirs(filepath) os.makedirs(plotpath)
import pandas as pd import numpy as np from numpy.random import multivariate_normal import p2funcs as p2f inp_df = pd.read_csv( '../../data/mesa/MESA_CPMG_MBINV2_ManuallyBinnedData_BatchCorrected_LogTransformed_Data.csv', sep=',') imp_df = p2f.filt_imp(inp_df, 0.1) ''' def toybox_gen(inp_df): #Define size of component using input data cols, rows = inp_df.shape comp_size = [cols, int(round(rows/2))] cov1 = np.array([[[0.3, 0.2], [0.2, 0.2]], [[0.6, 0.4], [0.4, 0.4]], [[1.2, 0.8], [0.8, 0.8]], [[2.4, 1.6], [1.6, 1.6]], [[6, 4], [4, 4]],[[9, 6], [6, 6]]]) cov2 = np.array([[12, 8],[8, 8]]) mean1 = np.array([[20, 15], [20, 15], [20, 15], [20, 15], [20, 15], [20.5, 15.5]]) mean2 = [20, 15] #Set up target array target = np.zeros(100, dtype=int) target[0:50] = '1' dataset_list = [] # counter for labelling dataset counter = 1