Esempio n. 1
0
    mean2 = [20, 15]
    
    #Set up target array
    target = np.zeros(100, dtype=int)
    target[0:50] = '1'
    
    dataset_list = []
    
    # counter for labelling dataset
    counter = 1
    # Second component consistent. Generate first to save time.
    d2_x, d2_y = multivariate_normal(mean2, cov2, comp_size).T
    
    for i in range(len(cov1)):
        
        d1_x, d1_y = multivariate_normal(mean1[i], cov1[i], comp_size).T
        
        #Put components together
        mvn_sim = np.vstack((d1_x, d2_x))
        mvn_sim_df = pd.DataFrame.from_records(mvn_sim)
        dataset_list.append(('ds00%d' % counter, mvn_sim_df))
        counter += 1
        
    return(dataset_list, target)
'''
ds_list, y = p2f.toybox_gen(imp_df)

print('Length of y: %s' % len(y))

print('SHAPE of X: ')
print(ds_list[0][1].shape)
Esempio n. 2
0
opt_t1_gammas = []

#Using first input dataset to generate toy datasets
inp_df = pd.read_csv(
    '../../data/mesa/MESA_CPMG_MBINV2_ManuallyBinnedData_BatchCorrected_LogTransformed_1stcol_%s.csv'
    % inp_dataset_list[0][1],
    sep=',',
    header=None,
    index_col=0)

print(
    '\nUsing %s dataset to generate simulated datasets for the purpose of tuning algorithms and hyperperameters.'
    % inp_dataset_list[0][0])
X_imp = p2f.filt_imp(inp_df, 0.1)
X, y = p2f.tsplit(X_imp)
toy_dataset_list, toy_y = p2f.toybox_gen(X)

for toy_label, toy_X in toy_dataset_list:

    print('\n##### Now running dataset %s through tier 1 #####' % toy_label)

    #Create directory if directory does not exist
    filepath = '../../figs/out/%s/%s/%s/' % (scriptname, nowdate, toy_label)
    plotpath = '%splotting/' % filepath

    if not os.path.exists(filepath):
        os.makedirs(filepath)
        os.makedirs(plotpath)

    toy_X_scaled = scale(toy_X)
Esempio n. 3
0
now = datetime.datetime.now()
nowdate = now.strftime("%Y-%m-%d")
nowtime = now.strftime("%H-%M")

# Name of script to trace where images came from
scriptname = 'tbgenplot'

X = pd.read_csv('../../data/mesa/MESA_CPMG_MBINV2_ManuallyBinnedData_BatchCorrected_LogTransformed_1stcol_diabetes.csv', sep=',', header=None, index_col=0)

X_imp = p2f.filt_imp(X, 0.1)

X, y = p2f.tsplit(X_imp)

X_scaled = scale(X)

dataset_list, y = p2f.toybox_gen(X_scaled)

for ds_label, dataset in dataset_list:
    print(dataset.shape)

'''
for ds_label, dataset in dataset_list:
    
    print('\n##### Now running dataset %s #####' %dataset)
    #Create directory if directory does not exist
    filepath = '../../figs/out/%s/%s/%s/' % (scriptname, nowdate, dataset)
    
    if not os.path.exists(filepath):
        os.makedirs(filepath)