def setUp(self):
     folder = dirname(__file__)
     files = [
         folder + "/data/dna_pos.fasta", folder + "/data/dna_neg.fasta"
     ]
     self.data = Data(files, "ACGT")
     self.params = {'conv_num': [1], 'kernel_num': [2, 4], 'epochs': [1, 2]}
     self.searcher = Grid_Search(self.params)
Exemple #2
0
class Test_Grid_Search(unittest.TestCase):
    def setUp(self):
        folder = dirname(__file__)
        files = [
            folder + "/data/dna_pos.fasta", folder + "/data/dna_neg.fasta"
        ]
        self.data = Data(files, "ACGT")
        self.params = {'conv_num': [1], 'kernel_num': [2, 4], 'epochs': [1, 2]}
        self.searcher = Grid_Search(self.params)

    def test_grid_search_init(self):
        self.assertTrue(self.searcher.params == self.params)
        self.assertTrue(len(self.searcher.candidates) == 4)
        self.assertTrue({
            'conv_num': 1,
            'kernel_num': 2,
            'epochs': 1
        } in self.searcher.candidates)
        self.assertTrue({
            'conv_num': 1,
            'kernel_num': 2,
            'epochs': 2
        } in self.searcher.candidates)
        self.assertTrue({
            'conv_num': 1,
            'kernel_num': 4,
            'epochs': 1
        } in self.searcher.candidates)
        self.assertTrue({
            'conv_num': 1,
            'kernel_num': 4,
            'epochs': 2
        } in self.searcher.candidates)

    def test_grid_search_train(self):
        # filter tensorflow deprecation warnings to not clutter the unittest output
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            model, table = self.searcher.train(self.data, verbose=False)
        self.assertTrue(isinstance(model, Model))
        self.assertTrue(isinstance(table, str))
        table = table.split('\n')
        self.assertTrue(len(table) == 9)
        self.assertTrue(
            table[0] in
            ["# conv_num: [1]", "# kernel_num: [2, 4]", "# epochs: [1, 2]"])
        self.assertTrue(
            table[1] in
            ["# conv_num: [1]", "# kernel_num: [2, 4]", "# epochs: [1, 2]"])
        self.assertTrue(
            table[2] in
            ["# conv_num: [1]", "# kernel_num: [2, 4]", "# epochs: [1, 2]"])
        for word in table[3].split():
            self.assertTrue(
                word in ["conv_num", "kernel_num", "epochs", "roc-auc"])
        for line in table[4:8]:
            self.assertTrue(len(line.split()) == 4)
        self.assertTrue(table[8] == '')
Exemple #3
0
data.load_additional_data(indel_len_feat, is_categorical=False)

print(data.get_summary())

data.train_val_test_split(portion_train=0.6, portion_val=0.2, seed=3)
print(data.get_summary())

###Model Training
params = {
    "conv_num": [2, 3],
    "kernel_num": [100],
    "kernel_len": [8],
    "dropout_input": [0.1, 0.4]
}
searcher = Grid_Search(params)
start = time()
model, summary = searcher.train(data, pr_auc=True, verbose=False)
stop = time()
print("time in minutes: {}".format((stop - start) / 60))

print(summary)

##Perfomance evaluation
predictions = model.predict(data, "test")
predictions

labels = data.get_labels("test")
labels

utils.plot_roc(labels, predictions, output_folder + "roc.png")