예제 #1
0
                    lc_file=base_out_folder + "lcs/lc_fold_" + str(cur_fold))
        ids, projection, classes = model.get_projection(training_datasets)
        cross_validation.save_projections(projection,
                                          base_out_folder + "/projections/",
                                          ids)


"""
    Main method.
    Check the run.sh script for the list of parameters.
"""
if __name__ == "__main__":
    params_sys = eval(sys.argv[1])

    if params_sys["base_model"] == "module":
        dl = load_dataset.DatasetLoader()
        params = {"epochs": params_sys["epochs"]}
        dataset = dl.load_dataset(
            params_sys["feature_type"],
            binary_features=params_sys["binary_feature"] == "True")
        run_base_models(params_sys["base_folder"], params, params_sys["seed"],
                        params_sys["fold"], dataset)

    if params_sys["base_model"] == "joined_seq":

        create_dir_structure(params_sys["base_folder"])
        selected_modules = []
        available_modules = list(params_sys["models_to_load"])
        print("Available models:", str(available_modules))

        has_improved = True
예제 #2
0
    def add_loss_op(self, pred):
        loss = utils.cross_entropy(pred, self.y_)
        return loss

    def add_training_op(self, loss):
        train_op = utils.adam_opt(loss, self.config.lr, self.loader.num_trainbatches(), 0.98)
        return train_op

if len(argv) > 1:
  chromosome = int(argv[1])
else:
  chromosome=21
config = Config()
loader = load_dataset.DatasetLoader(chromosome=chromosome, windowSize=config.window, #custom_load_dataset.DatasetLoader(chromosome=chromosome, windowSize=config.window,
                                    testBatchSize=config.test_batch_size,
                                    seed=1, test_frac=0.025, pos_frac=0.5, load_coverage=False, load_entire=True)

'''for i in range(100000):
  loader.load_chromosome_window_batch(10, 100)

print(loader.load_chromosome_window_batch(10, 5))
print(loader.chrom_index)
exit()'''

#print(loader.test_data)

conv_net = SimpleConv(config, loader)

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
예제 #3
0
        train_op = utils.adam_opt(loss, self.config.lr,
                                  self.loader.num_trainbatches(), 0.98)
        return train_op


if len(argv) < 2:
    window = 50
else:
    window = int(argv[1])

print("window {}".format(window))
config = Config(window)
loader = load_dataset.DatasetLoader(chromosome=21,
                                    windowSize=config.window,
                                    batchSize=config.batch_size,
                                    testBatchSize=config.test_batch_size,
                                    seed=1,
                                    test_frac=0.025,
                                    pos_frac=0.5,
                                    load_coverage=False)
#loader = load_full_dataset_sample_one_pos.DatasetLoader(windowSize=config.window, batchSize=config.batch_size, testBatchSize=config.test_batch_size, seed=1, test_frac=0.05, load_coverage=False)

conv_net = SimpleConv(config, loader, plotTrain=True)

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
#losses, val_accuracies = conv_net.fit(sess, save=True)

#conv_net.predictAll(sess, save=True)

#all_results = conv_net.hard_examples(sess)
#hard_positives = [x for x in all_results if x[1]]
예제 #4
0
def run_joined_models_tree(base_out_folder, base_in_folder, models_to_load,
                           cur_fold, s):

    create_dir_structure(base_out_folder)

    # set random seeds
    set_seeds(s)

    # checks if result file already exists. if so, do not run the algorithm.
    if cur_fold != -1:
        f_to_check = base_out_folder + "/predictions/predictions_fold_" + str(
            cur_fold)
        if file_exists(f_to_check):
            print("not running")
            print("file exists", f_to_check)
            return

        # load the testing and training entrez ids for this fold
        training_entrez, testing_entrez = read_entrez_indexes(
            base_in_folder + models_to_load[0] + "/folds/fold_" +
            str(cur_fold))

        testing_predictions = [
            base_in_folder + model_name + "/predictions/predictions_fold_" +
            str(cur_fold) for model_name in models_to_load
        ]
        testing_dataset = merge_predictions(testing_predictions,
                                            models_to_load)

        training_predictions = [
            base_in_folder + model_name +
            "/predictions_train/predictions_fold_" + str(cur_fold)
            for model_name in models_to_load
        ]

        training_dataset = merge_predictions(training_predictions,
                                             models_to_load)
        al = load_dataset.DatasetLoader()  ####
        classes = pandas.read_csv(al.base_path + "class_labels.csv",
                                  sep=",",
                                  header=0,
                                  na_values=["?"])
        classes.dropna(subset=["class_Brain.Alzheimer"], inplace=True)
        classes.columns = [
            "".join(c if c.isalnum() else "_" for c in str(x))
            for x in classes.columns
        ]  ########################################################
        class_indexes = classes.columns[1:]
        classes.set_index("entrezId", inplace=True)

        training_dataset = training_dataset.join(classes)
        training_dataset.class_indexes = class_indexes

        testing_dataset = testing_dataset.join(classes)
        testing_dataset.class_indexes = class_indexes

        model = train_model.BoostedTreeModel()
        model.train(training_dataset, testing_dataset)
        ids, predictions, classes = model.evaluate(testing_dataset)

        CrossValidation.write_results(ids, predictions, classes, cur_fold,
                                      base_out_folder + "/predictions/")
    else:

        if file_exists(base_out_folder + "/projections/_projection.csv"):
            print("not running")
            return

        training_projections = [
            base_in_folder + model_name +
            "/projections/fold_train_-1_projection.csv"
            for model_name in models_to_load
        ]
        training_datasets = merge_projections(training_projections, None)
        model = train_model.JoinedModel(epochs)
        model.train(training_datasets,
                    training_datasets,
                    lc_file=base_out_folder + "lcs/lc_fold_" + str(cur_fold))
        ids, projection, classes = model.get_projection(training_datasets)
        cross_validation.save_projections(projection,
                                          base_out_folder + "/projections/",
                                          ids)