Exemple #1
0
class Test_Grid_Search(unittest.TestCase):
    def setUp(self):
        folder = dirname(__file__)
        files = [
            folder + "/data/dna_pos.fasta", folder + "/data/dna_neg.fasta"
        ]
        self.data = Data(files, "ACGT")
        self.params = {'conv_num': [1], 'kernel_num': [2, 4], 'epochs': [1, 2]}
        self.searcher = Grid_Search(self.params)

    def test_grid_search_init(self):
        self.assertTrue(self.searcher.params == self.params)
        self.assertTrue(len(self.searcher.candidates) == 4)
        self.assertTrue({
            'conv_num': 1,
            'kernel_num': 2,
            'epochs': 1
        } in self.searcher.candidates)
        self.assertTrue({
            'conv_num': 1,
            'kernel_num': 2,
            'epochs': 2
        } in self.searcher.candidates)
        self.assertTrue({
            'conv_num': 1,
            'kernel_num': 4,
            'epochs': 1
        } in self.searcher.candidates)
        self.assertTrue({
            'conv_num': 1,
            'kernel_num': 4,
            'epochs': 2
        } in self.searcher.candidates)

    def test_grid_search_train(self):
        # filter tensorflow deprecation warnings to not clutter the unittest output
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            model, table = self.searcher.train(self.data, verbose=False)
        self.assertTrue(isinstance(model, Model))
        self.assertTrue(isinstance(table, str))
        table = table.split('\n')
        self.assertTrue(len(table) == 9)
        self.assertTrue(
            table[0] in
            ["# conv_num: [1]", "# kernel_num: [2, 4]", "# epochs: [1, 2]"])
        self.assertTrue(
            table[1] in
            ["# conv_num: [1]", "# kernel_num: [2, 4]", "# epochs: [1, 2]"])
        self.assertTrue(
            table[2] in
            ["# conv_num: [1]", "# kernel_num: [2, 4]", "# epochs: [1, 2]"])
        for word in table[3].split():
            self.assertTrue(
                word in ["conv_num", "kernel_num", "epochs", "roc-auc"])
        for line in table[4:8]:
            self.assertTrue(len(line.split()) == 4)
        self.assertTrue(table[8] == '')
Exemple #2
0
print(data.get_summary())

data.train_val_test_split(portion_train=0.6, portion_val=0.2, seed=3)
print(data.get_summary())

###Model Training
params = {
    "conv_num": [2, 3],
    "kernel_num": [100],
    "kernel_len": [8],
    "dropout_input": [0.1, 0.4]
}
searcher = Grid_Search(params)
start = time()
model, summary = searcher.train(data, pr_auc=True, verbose=False)
stop = time()
print("time in minutes: {}".format((stop - start) / 60))

print(summary)

##Perfomance evaluation
predictions = model.predict(data, "test")
predictions

labels = data.get_labels("test")
labels

utils.plot_roc(labels, predictions, output_folder + "roc.png")
utils.plot_prec_recall(labels, predictions, output_folder + "prec.png")
print(utils.get_performance_report(labels, predictions))
output_folder = "explore_cgi/data/interim/pysster_tutorial_test_10_3_18/"
if not os.path.isdir(output_folder):
    os.makedirs(output_folder)


data = Data(["/home/ennisb/pysster/tutorials/data/alu.fa.gz", "/home/ennisb/pysster/tutorials/data/rep.fa.gz", "/home/ennisb/pysster/tutorials/data/nonrep.fa.gz"], ("ACGU", "HIMS"))
print(data.get_summary())

data.train_val_test_split(portion_train=0.6, portion_val=0.2, seed=3)
print(data.get_summary())

###Model Training
params = {"conv_num": [2, 3], "kernel_num": [20], "kernel_len": [20], "dropout_input": [0.1, 0.4]}
searcher = Grid_Search(params)
start = time()
model, summary = searcher.train(data,  verbose=False)
stop = time()
print("time in minutes: {}".format((stop-start)/60))

print(summary)

###Perfomance evaluation
predictions = model.predict(data, "test")
predictions

labels = data.get_labels("test")
labels

utils.plot_roc(labels, predictions, output_folder+"roc.png")
utils.plot_prec_recall(labels, predictions, output_folder+"prec.png")
print(utils.get_performance_report(labels, predictions))