Example #1
0
def load_data(config, base_dir):

    # load config
    feats = config['features']
    target_name = config['target_name']
    cloud = config['cloud']

    # load data
    X_train_all, X_test = load_datasets(feats, base_dir=base_dir, cloud=cloud)
    y_train_all = load_target(target_name, base_dir=base_dir, cloud=cloud)

    return X_train_all, y_train_all, X_test
Example #2
0
def subtype_select(subtype):
    """
    this function returns Features (X) and labels (y) representing subtype.
    """
    X = data_loader.load_dataset()
    y = data_loader.load_target()

    X = X.subtract(X.mean())

    subtypes = ['HER2+', 'HR+', 'Triple Neg']
    subtypes.remove(subtype)

    y = y.replace(subtype, 1)
    y = y.replace(subtypes, 0)

    return X, y
Example #3
0
def select_subtype(subtype):
    """
    this function returns Features (X) and labels (y) representing subtype.
    """
    X = data_loader.load_dataset()
    y = data_loader.load_target()

    # apply mean centering to for each region
    X = X.subtract(X.mean())

    # all subtypes
    subtypes = ['HER2+', 'HR+', 'Triple Neg']
    # remove the current subtype from the whole subtype set
    subtypes.remove(subtype)

    # re-coding the subtypes to 0 and 1
    y = y.replace(subtype, 1)
    y = y.replace(subtypes, 0)

    return X, y
Example #4
0
    models = []
    for i in range(100):
        print("iteration #", i)

        training_set, test_set, training_labels, test_labels = train_test_split(
            dataset, target, test_size=0.33, random_state=i)

        # create a dictionary to store the train and test results
        # in (as used in below functions)
        # using 2/3 original dataset - applying in the inner loop
        records, rfecv = train(training_set, training_labels, i)
        curr_results.append(records)
        models.append(rfecv)
        # using 1/3 original dataset
        result_record.append(test(rfecv, test_set, test_labels))
        # store run summary of each iteration
        # optimisation_results.append(curr_results)
        # print run summary per iteration
        # print(curr_results)
        # print("---------------------------------------------------")

    return curr_results, result_record, models


if __name__ == "__main__":
    dataset = data_loader.load_dataset()
    target = data_loader.load_target()

    optimisation_results = optimiseParametersAndFeatures(dataset, target)
    data_loader.save_optimisation_results(optimisation_results)