lc_file=base_out_folder + "lcs/lc_fold_" + str(cur_fold)) ids, projection, classes = model.get_projection(training_datasets) cross_validation.save_projections(projection, base_out_folder + "/projections/", ids) """ Main method. Check the run.sh script for the list of parameters. """ if __name__ == "__main__": params_sys = eval(sys.argv[1]) if params_sys["base_model"] == "module": dl = load_dataset.DatasetLoader() params = {"epochs": params_sys["epochs"]} dataset = dl.load_dataset( params_sys["feature_type"], binary_features=params_sys["binary_feature"] == "True") run_base_models(params_sys["base_folder"], params, params_sys["seed"], params_sys["fold"], dataset) if params_sys["base_model"] == "joined_seq": create_dir_structure(params_sys["base_folder"]) selected_modules = [] available_modules = list(params_sys["models_to_load"]) print("Available models:", str(available_modules)) has_improved = True
def add_loss_op(self, pred): loss = utils.cross_entropy(pred, self.y_) return loss def add_training_op(self, loss): train_op = utils.adam_opt(loss, self.config.lr, self.loader.num_trainbatches(), 0.98) return train_op if len(argv) > 1: chromosome = int(argv[1]) else: chromosome=21 config = Config() loader = load_dataset.DatasetLoader(chromosome=chromosome, windowSize=config.window, #custom_load_dataset.DatasetLoader(chromosome=chromosome, windowSize=config.window, testBatchSize=config.test_batch_size, seed=1, test_frac=0.025, pos_frac=0.5, load_coverage=False, load_entire=True) '''for i in range(100000): loader.load_chromosome_window_batch(10, 100) print(loader.load_chromosome_window_batch(10, 5)) print(loader.chrom_index) exit()''' #print(loader.test_data) conv_net = SimpleConv(config, loader) sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer())
train_op = utils.adam_opt(loss, self.config.lr, self.loader.num_trainbatches(), 0.98) return train_op if len(argv) < 2: window = 50 else: window = int(argv[1]) print("window {}".format(window)) config = Config(window) loader = load_dataset.DatasetLoader(chromosome=21, windowSize=config.window, batchSize=config.batch_size, testBatchSize=config.test_batch_size, seed=1, test_frac=0.025, pos_frac=0.5, load_coverage=False) #loader = load_full_dataset_sample_one_pos.DatasetLoader(windowSize=config.window, batchSize=config.batch_size, testBatchSize=config.test_batch_size, seed=1, test_frac=0.05, load_coverage=False) conv_net = SimpleConv(config, loader, plotTrain=True) sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) #losses, val_accuracies = conv_net.fit(sess, save=True) #conv_net.predictAll(sess, save=True) #all_results = conv_net.hard_examples(sess) #hard_positives = [x for x in all_results if x[1]]
def run_joined_models_tree(base_out_folder, base_in_folder, models_to_load, cur_fold, s): create_dir_structure(base_out_folder) # set random seeds set_seeds(s) # checks if result file already exists. if so, do not run the algorithm. if cur_fold != -1: f_to_check = base_out_folder + "/predictions/predictions_fold_" + str( cur_fold) if file_exists(f_to_check): print("not running") print("file exists", f_to_check) return # load the testing and training entrez ids for this fold training_entrez, testing_entrez = read_entrez_indexes( base_in_folder + models_to_load[0] + "/folds/fold_" + str(cur_fold)) testing_predictions = [ base_in_folder + model_name + "/predictions/predictions_fold_" + str(cur_fold) for model_name in models_to_load ] testing_dataset = merge_predictions(testing_predictions, models_to_load) training_predictions = [ base_in_folder + model_name + "/predictions_train/predictions_fold_" + str(cur_fold) for model_name in models_to_load ] training_dataset = merge_predictions(training_predictions, models_to_load) al = load_dataset.DatasetLoader() #### classes = pandas.read_csv(al.base_path + "class_labels.csv", sep=",", header=0, na_values=["?"]) classes.dropna(subset=["class_Brain.Alzheimer"], inplace=True) classes.columns = [ "".join(c if c.isalnum() else "_" for c in str(x)) for x in classes.columns ] ######################################################## class_indexes = classes.columns[1:] classes.set_index("entrezId", inplace=True) training_dataset = training_dataset.join(classes) training_dataset.class_indexes = class_indexes testing_dataset = testing_dataset.join(classes) testing_dataset.class_indexes = class_indexes model = train_model.BoostedTreeModel() model.train(training_dataset, testing_dataset) ids, predictions, classes = model.evaluate(testing_dataset) CrossValidation.write_results(ids, predictions, classes, cur_fold, base_out_folder + "/predictions/") else: if file_exists(base_out_folder + "/projections/_projection.csv"): print("not running") return training_projections = [ base_in_folder + model_name + "/projections/fold_train_-1_projection.csv" for model_name in models_to_load ] training_datasets = merge_projections(training_projections, None) model = train_model.JoinedModel(epochs) model.train(training_datasets, training_datasets, lc_file=base_out_folder + "lcs/lc_fold_" + str(cur_fold)) ids, projection, classes = model.get_projection(training_datasets) cross_validation.save_projections(projection, base_out_folder + "/projections/", ids)