def load_data(config, base_dir): # load config feats = config['features'] target_name = config['target_name'] cloud = config['cloud'] # load data X_train_all, X_test = load_datasets(feats, base_dir=base_dir, cloud=cloud) y_train_all = load_target(target_name, base_dir=base_dir, cloud=cloud) return X_train_all, y_train_all, X_test
def subtype_select(subtype): """ this function returns Features (X) and labels (y) representing subtype. """ X = data_loader.load_dataset() y = data_loader.load_target() X = X.subtract(X.mean()) subtypes = ['HER2+', 'HR+', 'Triple Neg'] subtypes.remove(subtype) y = y.replace(subtype, 1) y = y.replace(subtypes, 0) return X, y
def select_subtype(subtype): """ this function returns Features (X) and labels (y) representing subtype. """ X = data_loader.load_dataset() y = data_loader.load_target() # apply mean centering to for each region X = X.subtract(X.mean()) # all subtypes subtypes = ['HER2+', 'HR+', 'Triple Neg'] # remove the current subtype from the whole subtype set subtypes.remove(subtype) # re-coding the subtypes to 0 and 1 y = y.replace(subtype, 1) y = y.replace(subtypes, 0) return X, y
models = [] for i in range(100): print("iteration #", i) training_set, test_set, training_labels, test_labels = train_test_split( dataset, target, test_size=0.33, random_state=i) # create a dictionary to store the train and test results # in (as used in below functions) # using 2/3 original dataset - applying in the inner loop records, rfecv = train(training_set, training_labels, i) curr_results.append(records) models.append(rfecv) # using 1/3 original dataset result_record.append(test(rfecv, test_set, test_labels)) # store run summary of each iteration # optimisation_results.append(curr_results) # print run summary per iteration # print(curr_results) # print("---------------------------------------------------") return curr_results, result_record, models if __name__ == "__main__": dataset = data_loader.load_dataset() target = data_loader.load_target() optimisation_results = optimiseParametersAndFeatures(dataset, target) data_loader.save_optimisation_results(optimisation_results)