def evaluate_joined_models_seq(base_out_folder, base_in_folder, models_to_load, cur_fold, epochs, s): # set random seeds set_seeds(s) # load the projection testing and training entrez ids for this fold training_entrez, testing_entrez = read_entrez_indexes(base_in_folder + models_to_load[0] + "/folds/fold_" + str(cur_fold)) # for each projection training_projections = [ base_in_folder + model_name + "/projections/fold_train_" + str(cur_fold) + "_projection.csv" for model_name in models_to_load ] training_datasets = merge_projections(training_projections, training_entrez) class_indexes = training_datasets.class_indexes training_datasets = training_datasets.sample(frac=1).reset_index(drop=True) training_datasets.class_indexes = class_indexes internal_training_datasets = training_datasets.iloc[:int(training_datasets. shape[0] * 0.7)] internal_training_datasets.class_indexes = training_datasets.class_indexes internal_testing_datasets = training_datasets.iloc[int(training_datasets. shape[0] * 0.7):] internal_testing_datasets.class_indexes = training_datasets.class_indexes model = train_model.JoinedModel(epochs) model.train(internal_training_datasets, internal_testing_datasets, lc_file=base_out_folder + "lcs/internal_lc_fold_" + str(cur_fold)) ids, predictions, classes = model.evaluate(internal_testing_datasets) auroc = metrics.roc_auc_score(classes, predictions, average="weighted") return auroc
def run_joined_models_tree(base_out_folder, base_in_folder, models_to_load, cur_fold, s): create_dir_structure(base_out_folder) # set random seeds set_seeds(s) # checks if result file already exists. if so, do not run the algorithm. if cur_fold != -1: f_to_check = base_out_folder + "/predictions/predictions_fold_" + str( cur_fold) if file_exists(f_to_check): print("not running") print("file exists", f_to_check) return # load the testing and training entrez ids for this fold training_entrez, testing_entrez = read_entrez_indexes( base_in_folder + models_to_load[0] + "/folds/fold_" + str(cur_fold)) testing_predictions = [ base_in_folder + model_name + "/predictions/predictions_fold_" + str(cur_fold) for model_name in models_to_load ] testing_dataset = merge_predictions(testing_predictions, models_to_load) training_predictions = [ base_in_folder + model_name + "/predictions_train/predictions_fold_" + str(cur_fold) for model_name in models_to_load ] training_dataset = merge_predictions(training_predictions, models_to_load) classes = pandas.read_csv(load_dataset.DatasetLoader.base_path + "class_labels.csv", sep=",", header=0, na_values=["?"]) classes.dropna(subset=["class_Brain.Alzheimer"], inplace=True) class_indexes = classes.columns[1:] classes.set_index("entrezId", inplace=True) training_dataset = training_dataset.join(classes) training_dataset.class_indexes = class_indexes testing_dataset = testing_dataset.join(classes) testing_dataset.class_indexes = class_indexes model = train_model.BoostedTreeModel() model.train(training_dataset, testing_dataset) ids, predictions, classes = model.evaluate(testing_dataset) CrossValidation.write_results(ids, predictions, classes, cur_fold, base_out_folder + "/predictions/") else: if file_exists(base_out_folder + "/projections/_projection.csv"): print("not running") return training_projections = [ base_in_folder + model_name + "/projections/fold_train_-1_projection.csv" for model_name in models_to_load ] training_datasets = merge_projections(training_projections, None) model = train_model.JoinedModel(epochs) model.train(training_datasets, training_datasets, lc_file=base_out_folder + "lcs/lc_fold_" + str(cur_fold)) ids, projection, classes = model.get_projection(training_datasets) cross_validation.save_projections(projection, base_out_folder + "/projections/", ids)
def run_joined_models(base_out_folder, base_in_folder, models_to_load, cur_fold, epochs, s): create_dir_structure(base_out_folder) # set random seeds set_seeds(s) # load the projection testing and training entrez ids for this fold. If the results alread exist, do nothing. if cur_fold != -1: f_to_check = base_out_folder + "/predictions/predictions_fold_" + str( cur_fold) if file_exists(f_to_check): print("not running") print("file exists", f_to_check) return training_entrez, testing_entrez = read_entrez_indexes( base_in_folder + models_to_load[0] + "/folds/fold_" + str(cur_fold)) # gets the projection training_projections = [ base_in_folder + model_name + "/projections/fold_train_" + str(cur_fold) + "_projection.csv" for model_name in models_to_load ] training_datasets = merge_projections(training_projections, training_entrez) testing_projections = [ base_in_folder + model_name + "/projections/fold_test_" + str(cur_fold) + "_projection.csv" for model_name in models_to_load ] testing_datasets = merge_projections(testing_projections, testing_entrez) # Trains the model and gets the predictions. model = train_model.JoinedModel(epochs) model.train(training_datasets, testing_datasets, lc_file=base_out_folder + "lcs/lc_fold_" + str(cur_fold)) ids, predictions, classes = model.evaluate(testing_datasets) # Writes the results. CrossValidation.write_results(ids, predictions, classes, cur_fold, base_out_folder + "/predictions/") # Runs the training algorithm to the whole dataset else: if file_exists(base_out_folder + "/projections/_projection.csv"): print("not running") return training_projections = [ base_in_folder + model_name + "/projections/fold_train_-1_projection.csv" for model_name in models_to_load ] training_datasets = merge_projections(training_projections, None) model = train_model.JoinedModel(epochs) model.train(training_datasets, training_datasets, lc_file=base_out_folder + "lcs/lc_fold_" + str(cur_fold)) ids, projection, classes = model.get_projection(training_datasets) cross_validation.save_projections(projection, base_out_folder + "/projections/", ids) for class_name in classes.columns: ids, projection, classes = model.get_projection( training_datasets, class_name) cross_validation.save_projections( projection, base_out_folder + "/projections/class_fold_train_" + class_name + "_-1", ids) ids, predictions, classes = model.evaluate(training_datasets) CrossValidation.write_results(ids, predictions, classes, cur_fold, base_out_folder + "/predictions/")