Exemple #1
0
def evaluate_joined_models_seq(base_out_folder, base_in_folder, models_to_load,
                               cur_fold, epochs, s):

    # set random seeds
    set_seeds(s)

    # load the projection testing and training entrez ids for this fold
    training_entrez, testing_entrez = read_entrez_indexes(base_in_folder +
                                                          models_to_load[0] +
                                                          "/folds/fold_" +
                                                          str(cur_fold))

    # for each projection
    training_projections = [
        base_in_folder + model_name + "/projections/fold_train_" +
        str(cur_fold) + "_projection.csv" for model_name in models_to_load
    ]
    training_datasets = merge_projections(training_projections,
                                          training_entrez)

    class_indexes = training_datasets.class_indexes
    training_datasets = training_datasets.sample(frac=1).reset_index(drop=True)
    training_datasets.class_indexes = class_indexes

    internal_training_datasets = training_datasets.iloc[:int(training_datasets.
                                                             shape[0] * 0.7)]
    internal_training_datasets.class_indexes = training_datasets.class_indexes

    internal_testing_datasets = training_datasets.iloc[int(training_datasets.
                                                           shape[0] * 0.7):]
    internal_testing_datasets.class_indexes = training_datasets.class_indexes

    model = train_model.JoinedModel(epochs)
    model.train(internal_training_datasets,
                internal_testing_datasets,
                lc_file=base_out_folder + "lcs/internal_lc_fold_" +
                str(cur_fold))

    ids, predictions, classes = model.evaluate(internal_testing_datasets)

    auroc = metrics.roc_auc_score(classes, predictions, average="weighted")

    return auroc
Exemple #2
0
def run_joined_models_tree(base_out_folder, base_in_folder, models_to_load,
                           cur_fold, s):

    create_dir_structure(base_out_folder)

    # set random seeds
    set_seeds(s)

    # checks if result file already exists. if so, do not run the algorithm.
    if cur_fold != -1:
        f_to_check = base_out_folder + "/predictions/predictions_fold_" + str(
            cur_fold)
        if file_exists(f_to_check):
            print("not running")
            print("file exists", f_to_check)
            return

        # load the testing and training entrez ids for this fold
        training_entrez, testing_entrez = read_entrez_indexes(
            base_in_folder + models_to_load[0] + "/folds/fold_" +
            str(cur_fold))

        testing_predictions = [
            base_in_folder + model_name + "/predictions/predictions_fold_" +
            str(cur_fold) for model_name in models_to_load
        ]
        testing_dataset = merge_predictions(testing_predictions,
                                            models_to_load)

        training_predictions = [
            base_in_folder + model_name +
            "/predictions_train/predictions_fold_" + str(cur_fold)
            for model_name in models_to_load
        ]

        training_dataset = merge_predictions(training_predictions,
                                             models_to_load)

        classes = pandas.read_csv(load_dataset.DatasetLoader.base_path +
                                  "class_labels.csv",
                                  sep=",",
                                  header=0,
                                  na_values=["?"])
        classes.dropna(subset=["class_Brain.Alzheimer"], inplace=True)
        class_indexes = classes.columns[1:]
        classes.set_index("entrezId", inplace=True)

        training_dataset = training_dataset.join(classes)
        training_dataset.class_indexes = class_indexes

        testing_dataset = testing_dataset.join(classes)
        testing_dataset.class_indexes = class_indexes

        model = train_model.BoostedTreeModel()
        model.train(training_dataset, testing_dataset)
        ids, predictions, classes = model.evaluate(testing_dataset)

        CrossValidation.write_results(ids, predictions, classes, cur_fold,
                                      base_out_folder + "/predictions/")
    else:

        if file_exists(base_out_folder + "/projections/_projection.csv"):
            print("not running")
            return

        training_projections = [
            base_in_folder + model_name +
            "/projections/fold_train_-1_projection.csv"
            for model_name in models_to_load
        ]
        training_datasets = merge_projections(training_projections, None)
        model = train_model.JoinedModel(epochs)
        model.train(training_datasets,
                    training_datasets,
                    lc_file=base_out_folder + "lcs/lc_fold_" + str(cur_fold))
        ids, projection, classes = model.get_projection(training_datasets)
        cross_validation.save_projections(projection,
                                          base_out_folder + "/projections/",
                                          ids)
Exemple #3
0
def run_joined_models(base_out_folder, base_in_folder, models_to_load,
                      cur_fold, epochs, s):

    create_dir_structure(base_out_folder)

    # set random seeds
    set_seeds(s)

    # load the projection testing and training entrez ids for this fold. If the results alread exist, do nothing.
    if cur_fold != -1:
        f_to_check = base_out_folder + "/predictions/predictions_fold_" + str(
            cur_fold)
        if file_exists(f_to_check):
            print("not running")
            print("file exists", f_to_check)
            return

        training_entrez, testing_entrez = read_entrez_indexes(
            base_in_folder + models_to_load[0] + "/folds/fold_" +
            str(cur_fold))

        # gets the projection
        training_projections = [
            base_in_folder + model_name + "/projections/fold_train_" +
            str(cur_fold) + "_projection.csv" for model_name in models_to_load
        ]
        training_datasets = merge_projections(training_projections,
                                              training_entrez)

        testing_projections = [
            base_in_folder + model_name + "/projections/fold_test_" +
            str(cur_fold) + "_projection.csv" for model_name in models_to_load
        ]
        testing_datasets = merge_projections(testing_projections,
                                             testing_entrez)

        # Trains the model and gets the predictions.
        model = train_model.JoinedModel(epochs)
        model.train(training_datasets,
                    testing_datasets,
                    lc_file=base_out_folder + "lcs/lc_fold_" + str(cur_fold))
        ids, predictions, classes = model.evaluate(testing_datasets)

        # Writes the results.
        CrossValidation.write_results(ids, predictions, classes, cur_fold,
                                      base_out_folder + "/predictions/")

    # Runs the training algorithm to the whole dataset
    else:
        if file_exists(base_out_folder + "/projections/_projection.csv"):
            print("not running")
            return

        training_projections = [
            base_in_folder + model_name +
            "/projections/fold_train_-1_projection.csv"
            for model_name in models_to_load
        ]
        training_datasets = merge_projections(training_projections, None)
        model = train_model.JoinedModel(epochs)
        model.train(training_datasets,
                    training_datasets,
                    lc_file=base_out_folder + "lcs/lc_fold_" + str(cur_fold))
        ids, projection, classes = model.get_projection(training_datasets)
        cross_validation.save_projections(projection,
                                          base_out_folder + "/projections/",
                                          ids)

        for class_name in classes.columns:
            ids, projection, classes = model.get_projection(
                training_datasets, class_name)
            cross_validation.save_projections(
                projection, base_out_folder +
                "/projections/class_fold_train_" + class_name + "_-1", ids)

        ids, predictions, classes = model.evaluate(training_datasets)
        CrossValidation.write_results(ids, predictions, classes, cur_fold,
                                      base_out_folder + "/predictions/")