def def_export_index(name_of_dataset, df_, target_value, number_of_experiment): list_export = [] X_split = df_.loc[:,~df_.columns.isin([target_])] y_split = df_.loc[:,df_.columns.isin([target_])] path_output = tfm_data.def_check_create_path('EXPERIMENTS_kfold', '') kfold = KFold(n_splits=n_experiment_, shuffle=True) for train, test in kfold.split(df_): # list_export.append([tuple(train), tuple(test)]) list_export.append([train, test]) pd.Series(list_export).to_pickle(os.path.join(path_output+str(name_of_dataset)+'.pkl')) print(os.path.join(path_output+str(name_of_dataset)+'.pkl'))
def def_export_index(name_of_dataset, df_, target_value, list_left_out, number_of_experiment): X_split = df_.loc[:, ~df_.columns.isin([target_value])] y_split = df_.loc[:, df_.columns.isin([target_value])] path_output = tfm_data.def_check_create_path('EXPERIMENTS', '') for n_cells_out in list_left_out: print('n_cells_out,', n_cells_out) list_export = def_get_n_psplits(X_split, y_split, y_split['cell_type'], n_cells_out, number_of_experiment) pd.Series(list_export).to_pickle( os.path.join(path_output + str(name_of_dataset) + '_cell_out_' + str(n_cells_out) + '.pkl')) print( os.path.join(path_output + str(name_of_dataset) + '_cell_out_' + str(n_cells_out) + '.pkl'))
config.gpu_options.per_process_gpu_memory_fraction = 0.45 config.gpu_options.allow_growth = True session = InteractiveSession(config=config) time_start = dt.datetime.now().time().strftime('%H:%M:%S') # = time.time() # DEFAULT VALUES for PAPER DESIGN epochs_default = 100 batch_size_default = 10 # TARGET VARIABLE NAME target_ = 'cell_type' TYPE_OF_SCALING = False # THE LOCATION of THE RESULT of SCORE and MODEL path_hyperband = tfm_data.def_check_create_path('kt_result', 'DELETE_hyperband') path_output_result = tfm_data.def_check_create_path( 'kt_result', 'design_' + str(TYPE_OF_SCALING)) path_model = tfm_data.def_check_create_path('kt_result', 'models_' + str(TYPE_OF_SCALING)) # IMPORT EXPERIMENT INDEX path_experiments = os.path.join( os.path.dirname(os.getcwd()) + '/data/EXPERIMENTS/') list_all_model = sorted(os.listdir(path_experiments)) list_experiments = [ string for string in list_all_model if re.match(re.compile('metabolic_signaling_cell_out_'), string) ] # LOADING REQUIRED DATASETS
time_start = dt.datetime.now().time().strftime('%H:%M:%S') # = time.time() # DEFAULT VALUES for PAPER DESIGN epochs_default = 100 batch_size_default = 10 # TARGET VARIABLE NAME target_ = 'cell_type' TYPE_OF_SCALING = [False, True] for i_row_scaling in TYPE_OF_SCALING: TYPE_OF_EXPERIMENT = 'no_co_' + str(i_row_scaling) # THE LOCATION of THE RESULT of SCORE and MODEL path_model = tfm_data.def_check_create_path('NN_result', 'models_' + TYPE_OF_EXPERIMENT) # LOADING REQUIRED DATASETS df_weight_signaling, df_weight_metabolic_signaling = tfm_data.def_load_weight_pathways( ) df_paper, df_signaling, df_metabolic_signaling = tfm_data.def_load_dataset( ['cell_type'] + list(df_weight_signaling.index.values), ['cell_type'] + list(df_weight_metabolic_signaling.index.values), row_scaling=i_row_scaling) df_weight_ppi_tf_signaling, df_weight_ppi_tf_metabolic_signaling = tfm_data.def_load_weight_ppi_tf( list(df_weight_signaling.index.values), list(df_weight_metabolic_signaling.index.values)) df_weight_both = pd.concat( [df_weight_ppi_tf_metabolic_signaling, df_weight_metabolic_signaling], axis=1)
time_start = dt.datetime.now().time().strftime('%H:%M:%S') # = time.time() # DEFAULT VALUES for PAPER DESIGN epochs_default=100 batch_size_default=10 dense_layer=100 # TARGET VARIABLE NAME target_='cell_type' TYPE_OF_SCALING = [False, True] for i_row_scaling in TYPE_OF_SCALING: # THE LOCATION of THE RESULT of SCORE and MODEL path_output = tfm_data.def_check_create_path('NN_result_2', 'design_'+str(i_row_scaling)) path_model = tfm_data.def_check_create_path('NN_result_2', 'models_'+str(i_row_scaling)) # IMPORT EXPERIMENT INDEX path_experiments = os.path.join(os.path.dirname(os.getcwd())+'/data/EXPERIMENTS/') list_all_model = sorted(os.listdir(path_experiments)) list_experiments = [string for string in list_all_model if re.match(re.compile('default_cell_out'), string)] # LOADING REQUIRED DATASETS df_weight_signaling, df_weight_metabolic_signaling = tfm_data.def_load_weight_pathways() df_paper, df_signaling, df_metabolic_signaling = tfm_data.def_load_dataset(['cell_type']+list(df_weight_signaling.index.values) , ['cell_type']+list(df_weight_metabolic_signaling.index.values) , row_scaling=True) del(df_signaling) del(df_metabolic_signaling)
time_start = dt.datetime.now().time().strftime('%H:%M:%S') # = time.time() # DEFAULT VALUES for PAPER DESIGN epochs_default = 100 batch_size_default = 10 dense_layer = 100 # TARGET VARIABLE NAME target_ = 'cell_type' TYPE_OF_SCALING = [False, True] for i_scaling in TYPE_OF_SCALING: # THE LOCATION of THE RESULT of SCORE and MODEL # path_hyperband_ = tfm_data.def_check_create_path('kt_result', '') path_hyperband_ = tfm_data.def_check_create_path('kt_result', 'delete') path_output_result = tfm_data.def_check_create_path( 'kt_result', 'design_no_co_' + str(i_scaling)) path_model = tfm_data.def_check_create_path( 'kt_result', 'models_no_co_' + str(i_scaling)) # LOADING REQUIRED DATASETS df_weight_signaling, df_weight_metabolic_signaling = tfm_data.def_load_weight_pathways( ) df_paper, df_signaling, df_metabolic_signaling = tfm_data.def_load_dataset( ['cell_type'] + list(df_weight_signaling.index.values), ['cell_type'] + list(df_weight_metabolic_signaling.index.values), row_scaling=i_scaling, retrieval=False) df_weight_dense = pd.DataFrame(df_paper.columns[1:]).set_index('Sample')
from sklearn.preprocessing import MinMaxScaler, StandardScaler from tensorflow import keras import tensorflow.keras.backend as K import warnings warnings.filterwarnings('ignore') time_start = dt.datetime.now().time().strftime('%H:%M:%S') # = time.time() # Clustering is designed with 20 experiments. # Testing size is %20 (it means that Kfold split is 5) # and I am applying 4 times to reach the 20 experiments. # THE LOCATION of THE RESULT path_output = tfm_data.def_check_create_path(main_folder='clustering_result', sub_folder='') # Loading required data df_weight_pathway_signaling, df_weight_pathway_metabolic_signaling = tfm_data.def_load_weight_pathways( ) df_paper, df_paper_signaling, df_paper_metabolic_signaling = tfm_data.def_load_dataset( ['cell_type'] + list(df_weight_pathway_signaling.index.values), ['cell_type'] + list(df_weight_pathway_metabolic_signaling.index.values), row_scaling=False, retrieval=False) print('Normalization paper data') df_scaler_ss = tfm_data.def_dataframe_normalize(df_paper, StandardScaler(), 'cell_type') print('Normalization signaling data') df_scaler_ss_signaling = tfm_data.def_dataframe_normalize(