def measure_rbp(entry): import os from time import time from pysster import utils output_folder = entry[4] + "_pysster/" if not os.path.isdir(output_folder): os.makedirs(output_folder) start = time() # predict secondary structures utils.predict_structures(entry[0], entry[0] + ".struct", annotate=True) utils.predict_structures(entry[1], entry[1] + ".struct", annotate=True) utils.predict_structures(entry[2], entry[2] + ".struct", annotate=True) utils.predict_structures(entry[3], entry[3] + ".struct", annotate=True) from pysster.Data import Data from pysster.Model import Model # load data data = Data([entry[0] + ".struct", entry[1] + ".struct"], ("ACGU", "HIMS")) data.train_val_test_split( 0.8, 0.1999 ) # we need to have at least one test sequence, even though we have a separate test object # training params = {"kernel_len": 8} model = Model(params, data) model.train(data) # load and predict test data data_test = Data([entry[2] + ".struct", entry[3] + ".struct"], ("ACGU", "HIMS")) predictions = model.predict(data_test, "all") stop = time() print("{}, time in seconds: {}".format(entry[4], stop - start)) # performance evaluation labels = data_test.get_labels("all") utils.plot_roc(labels, predictions, output_folder + "roc.pdf") utils.plot_prec_recall(labels, predictions, output_folder + "prec.pdf") # get motifs activations = model.get_max_activations(data_test, "all") _ = model.visualize_all_kernels(activations, data_test, output_folder) # save model to drive utils.save_model(model, "{}model.pkl".format(output_folder))
class Test_Model(unittest.TestCase): def setUp(self): folder = dirname(__file__) file_name = folder + "/data/rna.fasta" self.data = Data(file_name, ("ACGU", "().")) self.params = { "conv_num": 1, "kernel_num": 3, "kernel_len": 5, "neuron_num": 2, "epochs": 3 } self.m1 = Model(self.params, self.data, seed=2) self.m2 = Model(self.params, self.data, seed=13) self.m3 = Model(self.params, self.data, seed=2) def test_model_init(self): self.assertTrue(self.m1.params["conv_num"] == 1) self.assertTrue(self.m1.params["kernel_num"] == 3) self.assertTrue(self.m1.params["kernel_len"] == 5) self.assertTrue(self.m1.params["neuron_num"] == 2) self.assertTrue(self.m1.params["activation"] == "sigmoid") self.assertTrue(self.m1.model.layers[2].get_weights()[0].shape == (5, 12, 3)) self.assertTrue( np.allclose(self.m1.model.layers[2].get_weights()[0], self.m3.model.layers[2].get_weights()[0])) self.assertFalse( np.allclose(self.m2.model.layers[2].get_weights()[0], self.m3.model.layers[2].get_weights()[0])) self.assertTrue( np.allclose(self.m1.model.layers[6].get_weights()[0], self.m3.model.layers[6].get_weights()[0])) self.assertFalse( np.allclose(self.m2.model.layers[6].get_weights()[0], self.m3.model.layers[6].get_weights()[0])) def test_model_train_predict(self): for obj in [self.m1, self.m2, self.m3]: obj.train(self.data, verbose=False) predictions = obj.predict(self.data, "test") self.assertTrue(predictions.shape == (3, 3)) self.assertTrue((predictions > 0.49).all()) self.assertTrue((predictions < 0.51).all()) predictions = obj.predict(self.data, "all") self.assertTrue(predictions.shape == (20, 3)) self.assertTrue((predictions > 0.49).all()) self.assertTrue((predictions < 0.51).all()) self.assertTrue( np.allclose(self.m1.model.layers[2].get_weights()[0], self.m3.model.layers[2].get_weights()[0], atol=0.001)) self.assertFalse( np.allclose(self.m2.model.layers[2].get_weights()[0], self.m3.model.layers[2].get_weights()[0], atol=0.001)) self.assertTrue( np.allclose(self.m1.model.layers[6].get_weights()[0], self.m3.model.layers[6].get_weights()[0], atol=0.001)) self.assertFalse( np.allclose(self.m2.model.layers[6].get_weights()[0], self.m3.model.layers[6].get_weights()[0], atol=0.001)) def test_model_get_max_activations(self): acts = self.m1.get_max_activations(self.data, 'test') self.assertTrue(acts['activations'].shape == (3, 3)) self.assertTrue(acts['labels'].shape == (3, 3)) self.assertTrue(acts['group'] == 'test') def test_model_visualize_kernel(self): acts = self.m1.get_max_activations(self.data, 'all') folder = gettempdir() + '/' # individual kernels for kernel in range(self.params['kernel_num']): motif, score = self.m1.visualize_kernel(acts, self.data, kernel, folder) self.assertTrue( isfile(folder + "motif_kernel_{}.png".format(kernel))) self.assertTrue( isfile(folder + "position_kernel_{}.png".format(kernel))) self.assertTrue( isfile(folder + "activations_kernel_{}.png".format(kernel))) remove(folder + "motif_kernel_{}.png".format(kernel)) remove(folder + "position_kernel_{}.png".format(kernel)) remove(folder + "activations_kernel_{}.png".format(kernel)) self.assertTrue(isinstance(motif, tuple)) self.assertTrue(isinstance(motif[0], Motif)) self.assertTrue(np.isclose(score, 0) or score > 0) # all kernels motifs = self.m1.visualize_all_kernels(acts, self.data, folder) self.assertTrue(len(motifs) == 3) for x in range(3): self.assertTrue(isinstance(motifs[x], tuple)) self.assertTrue(isinstance(motifs[x][0], Motif)) for kernel in range(self.params['kernel_num']): self.assertTrue( isfile(folder + "motif_kernel_{}.png".format(kernel))) self.assertTrue( isfile(folder + "position_kernel_{}.png".format(kernel))) self.assertTrue( isfile(folder + "activations_kernel_{}.png".format(kernel))) remove(folder + "motif_kernel_{}.png".format(kernel)) remove(folder + "position_kernel_{}.png".format(kernel)) remove(folder + "activations_kernel_{}.png".format(kernel)) self.assertTrue(isfile(folder + "summary.html")) remove(folder + "summary.html") def test_model_plot_clustering(self): acts = self.m1.get_max_activations(self.data, 'test') self.m1.plot_clustering(acts, gettempdir() + "/clust.png") self.assertFalse(isfile(gettempdir() + "/clust.png")) def test_model_optimized_inputs(self): self.m1.visualize_optimized_inputs(self.data, self.m1.model.layers[2].name, gettempdir() + "/test.png") self.m1.visualize_optimized_inputs(self.data, self.m1.model.layers[2].name, gettempdir() + "/test2.png", nodes=[0]) with Image.open(gettempdir() + "/test.png") as img: self.assertTrue(img.size == (1998, 1128)) with Image.open(gettempdir() + "/test2.png") as img: self.assertTrue(img.size == (1998, 376)) remove(gettempdir() + "/test.png") remove(gettempdir() + "/test2.png")