class Test_Grid_Search(unittest.TestCase): def setUp(self): folder = dirname(__file__) files = [ folder + "/data/dna_pos.fasta", folder + "/data/dna_neg.fasta" ] self.data = Data(files, "ACGT") self.params = {'conv_num': [1], 'kernel_num': [2, 4], 'epochs': [1, 2]} self.searcher = Grid_Search(self.params) def test_grid_search_init(self): self.assertTrue(self.searcher.params == self.params) self.assertTrue(len(self.searcher.candidates) == 4) self.assertTrue({ 'conv_num': 1, 'kernel_num': 2, 'epochs': 1 } in self.searcher.candidates) self.assertTrue({ 'conv_num': 1, 'kernel_num': 2, 'epochs': 2 } in self.searcher.candidates) self.assertTrue({ 'conv_num': 1, 'kernel_num': 4, 'epochs': 1 } in self.searcher.candidates) self.assertTrue({ 'conv_num': 1, 'kernel_num': 4, 'epochs': 2 } in self.searcher.candidates) def test_grid_search_train(self): # filter tensorflow deprecation warnings to not clutter the unittest output with warnings.catch_warnings(): warnings.simplefilter("ignore") model, table = self.searcher.train(self.data, verbose=False) self.assertTrue(isinstance(model, Model)) self.assertTrue(isinstance(table, str)) table = table.split('\n') self.assertTrue(len(table) == 9) self.assertTrue( table[0] in ["# conv_num: [1]", "# kernel_num: [2, 4]", "# epochs: [1, 2]"]) self.assertTrue( table[1] in ["# conv_num: [1]", "# kernel_num: [2, 4]", "# epochs: [1, 2]"]) self.assertTrue( table[2] in ["# conv_num: [1]", "# kernel_num: [2, 4]", "# epochs: [1, 2]"]) for word in table[3].split(): self.assertTrue( word in ["conv_num", "kernel_num", "epochs", "roc-auc"]) for line in table[4:8]: self.assertTrue(len(line.split()) == 4) self.assertTrue(table[8] == '')
print(data.get_summary()) data.train_val_test_split(portion_train=0.6, portion_val=0.2, seed=3) print(data.get_summary()) ###Model Training params = { "conv_num": [2, 3], "kernel_num": [100], "kernel_len": [8], "dropout_input": [0.1, 0.4] } searcher = Grid_Search(params) start = time() model, summary = searcher.train(data, pr_auc=True, verbose=False) stop = time() print("time in minutes: {}".format((stop - start) / 60)) print(summary) ##Perfomance evaluation predictions = model.predict(data, "test") predictions labels = data.get_labels("test") labels utils.plot_roc(labels, predictions, output_folder + "roc.png") utils.plot_prec_recall(labels, predictions, output_folder + "prec.png") print(utils.get_performance_report(labels, predictions))
output_folder = "explore_cgi/data/interim/pysster_tutorial_test_10_3_18/" if not os.path.isdir(output_folder): os.makedirs(output_folder) data = Data(["/home/ennisb/pysster/tutorials/data/alu.fa.gz", "/home/ennisb/pysster/tutorials/data/rep.fa.gz", "/home/ennisb/pysster/tutorials/data/nonrep.fa.gz"], ("ACGU", "HIMS")) print(data.get_summary()) data.train_val_test_split(portion_train=0.6, portion_val=0.2, seed=3) print(data.get_summary()) ###Model Training params = {"conv_num": [2, 3], "kernel_num": [20], "kernel_len": [20], "dropout_input": [0.1, 0.4]} searcher = Grid_Search(params) start = time() model, summary = searcher.train(data, verbose=False) stop = time() print("time in minutes: {}".format((stop-start)/60)) print(summary) ###Perfomance evaluation predictions = model.predict(data, "test") predictions labels = data.get_labels("test") labels utils.plot_roc(labels, predictions, output_folder+"roc.png") utils.plot_prec_recall(labels, predictions, output_folder+"prec.png") print(utils.get_performance_report(labels, predictions))