mean2 = [20, 15] #Set up target array target = np.zeros(100, dtype=int) target[0:50] = '1' dataset_list = [] # counter for labelling dataset counter = 1 # Second component consistent. Generate first to save time. d2_x, d2_y = multivariate_normal(mean2, cov2, comp_size).T for i in range(len(cov1)): d1_x, d1_y = multivariate_normal(mean1[i], cov1[i], comp_size).T #Put components together mvn_sim = np.vstack((d1_x, d2_x)) mvn_sim_df = pd.DataFrame.from_records(mvn_sim) dataset_list.append(('ds00%d' % counter, mvn_sim_df)) counter += 1 return(dataset_list, target) ''' ds_list, y = p2f.toybox_gen(imp_df) print('Length of y: %s' % len(y)) print('SHAPE of X: ') print(ds_list[0][1].shape)
opt_t1_gammas = [] #Using first input dataset to generate toy datasets inp_df = pd.read_csv( '../../data/mesa/MESA_CPMG_MBINV2_ManuallyBinnedData_BatchCorrected_LogTransformed_1stcol_%s.csv' % inp_dataset_list[0][1], sep=',', header=None, index_col=0) print( '\nUsing %s dataset to generate simulated datasets for the purpose of tuning algorithms and hyperperameters.' % inp_dataset_list[0][0]) X_imp = p2f.filt_imp(inp_df, 0.1) X, y = p2f.tsplit(X_imp) toy_dataset_list, toy_y = p2f.toybox_gen(X) for toy_label, toy_X in toy_dataset_list: print('\n##### Now running dataset %s through tier 1 #####' % toy_label) #Create directory if directory does not exist filepath = '../../figs/out/%s/%s/%s/' % (scriptname, nowdate, toy_label) plotpath = '%splotting/' % filepath if not os.path.exists(filepath): os.makedirs(filepath) os.makedirs(plotpath) toy_X_scaled = scale(toy_X)
now = datetime.datetime.now() nowdate = now.strftime("%Y-%m-%d") nowtime = now.strftime("%H-%M") # Name of script to trace where images came from scriptname = 'tbgenplot' X = pd.read_csv('../../data/mesa/MESA_CPMG_MBINV2_ManuallyBinnedData_BatchCorrected_LogTransformed_1stcol_diabetes.csv', sep=',', header=None, index_col=0) X_imp = p2f.filt_imp(X, 0.1) X, y = p2f.tsplit(X_imp) X_scaled = scale(X) dataset_list, y = p2f.toybox_gen(X_scaled) for ds_label, dataset in dataset_list: print(dataset.shape) ''' for ds_label, dataset in dataset_list: print('\n##### Now running dataset %s #####' %dataset) #Create directory if directory does not exist filepath = '../../figs/out/%s/%s/%s/' % (scriptname, nowdate, dataset) if not os.path.exists(filepath): os.makedirs(filepath)