class RemoveModelsTests(unittest.TestCase): """Test cases for updating a non-empty phenoscoring db with more models""" def setUp(self): """For setup, ensure db does not exist, then build a new one""" config = CompleteTestConfig() self.dbfile = config.db remove_db(self.dbfile) self.pipeline = Phenoscoring(config) self.pipeline.build() impc = Phenoscoring(IMPCTestConfig()) impc.update() # handles for models self.desctab = ModelDescriptionTable(self.dbfile) self.phenstab = ModelPhenotypeTable(self.dbfile) self.scoretab = ModelScoreTable(self.dbfile) def tearDown(self): """At end, ensure test db is deleted.""" remove_db(self.dbfile) def test_clear_models(self): """can remove all models at once.""" # ensure that db is non empty self.assertGreater(self.desctab.count_rows(), 0) self.assertGreater(self.phenstab.count_rows(), 0) self.assertGreater(self.scoretab.count_rows(), 0) # attempt to clear everything impc = Phenoscoring(IMPCTestConfig()) impc.clearmodels() self.assertEqual(self.desctab.count_rows(), 0) self.assertEqual(self.phenstab.count_rows(), 0) self.assertEqual(self.scoretab.count_rows(), 0) def test_remove_models(self): """can remove a partial set of data""" # get an initial set of database row counts num_desc = self.desctab.count_rows() num_phens = self.phenstab.count_rows() num_score = self.scoretab.count_rows() # run a model removal using a small descriptions file config = IMPCTestConfig() config.model_descriptions = "prep-IMPC-descriptions-update.tsv" config.model_phenotypes = None impc = Phenoscoring(config) impc.remove() # the number of rows in tables should decrease self.assertLess(self.desctab.count_rows(), num_desc, "number of models should decrease") self.assertLess(self.phenstab.count_rows(), num_phens, "number of phenotypes should decrease") self.assertLess(self.scoretab.count_rows(), num_score, "number of score entries should decrease")
def setUp(self): """For setup, ensure db does not exist, then build a new one""" config = CompleteTestConfig() self.dbfile = config.db remove_db(self.dbfile) self.pipeline = Phenoscoring(config) self.pipeline.build() impc = Phenoscoring(IMPCTestConfig()) impc.update() # handles for models self.desctab = ModelDescriptionTable(self.dbfile) self.phenstab = ModelPhenotypeTable(self.dbfile) self.scoretab = ModelScoreTable(self.dbfile)
def test_compute_scores(self): """perform packet calculations.""" modelnames = ["MGI_MA:001_hom", "MGI_MA:001_het"] refnames = ["DISEASE:1", "DISEASE:3"] packets = prep_compute_packets(self.config, references=refnames, models=modelnames) self.assertEqual(len(packets), 1, "one packet only") packets[0].run() scoretab = ModelScoreTable(self.dbfile) numscores = scoretab.count_rows() self.assertEqual(numscores, 4)
def test_build_both(self): """can update database sequentially with MGI and IMPC""" mgi = Phenoscoring(MGITestConfig()) mgi.update() impc = Phenoscoring(IMPCTestConfig()) impc.update() desctab = ModelDescriptionTable(self.dbfile) self.assertEqual(desctab.count_rows(), 14, "8 IMPC, 6 MGI") modeltab = ModelPhenotypeTable(self.dbfile) self.assertEqual(modeltab.count_rows(), 23, "9 MGI, 14 IMPC") scoretab = ModelScoreTable(self.dbfile) self.assertGreater(scoretab.count_rows(), 0, "score table is non-empty")
def get_highscore_pairs(dbpath, threshold): """get an array of pairs (model, reference) with high scores.""" result = [] generator = DBGenerator(ModelScoreTable(dbpath)) for row in generator.next(): if row["general"] > threshold and row["specific"] > threshold: result.append((row["model"], row["reference"])) return result
def setUpClass(cls): """For setup, ensure db does not exist.""" config = MGITestConfig() config.scale_oo_scores = False cls.dbfile = config.db config.obo = check_file(config.obo, config.db) cls.pipeline = Phenoscoring(config) cls.pipeline.build() # first add some rows to the db by hand model = ModelScoreTable(config.db) model.add("model:1", "DISEASE:1", "stamp", 0.95, 0.98) model.add("model:2", "DISEASE:1", "stamp", 0.94, 0.96) model.add("model:3", "DISEASE:1", "stamp", 0.24, 0.96) model.add("model:4", "DISEASE:2", "stamp", 0.92, 0.95) model.add("model:5", "DISEASE:2", "stamp", 0.86, 0.85) model.add("model:6", "DISEASE:3", "stamp", 0.96, 0.95) model.save()
def test_update_skip_compute(self): """run update but skip score calculation""" # extract current number of models and scores desctab = ModelDescriptionTable(self.dbfile) scorestab = ModelScoreTable(self.dbfile) num_models = desctab.count_rows() num_scores = scorestab.count_rows() # run an update, but without computing scores config = MGITestConfig() config.skip_compute = True mgi = Phenoscoring(config) mgi.update() self.assertGreater(desctab.count_rows(), num_models, "number of models should increase") self.assertEqual(scorestab.count_rows(), num_scores, "number of scores should remain")
def test_recompute(self): """recompute drops scores and recreates them.""" generator_before = DBGenerator(ModelScoreTable(self.dbfile)) before = [] for row in generator_before.next(): before.append(row) self.assertGreater(len(before), 0, "db should be set up with some scores") # recomputing should drop the scores and recreate them self.pipeline.recompute() generator_after = DBGenerator(ModelScoreTable(self.dbfile)) after = [] for row in generator_after.next(): after.append(row) self.assertEqual(len(before), len(after), "recomputing should give same result structure")
def test_compute_gives_stamps(self): """perform packet calculations.""" modelnames = ["MGI_MA:001_hom", "MGI_MA:001_het"] refnames = ["DISEASE:1", "DISEASE:3"] packets = prep_compute_packets(self.config, references=refnames, models=modelnames) packets[0].run() generator = DBGenerator(ModelScoreTable(self.dbfile)) stamps = [] for row in generator.next(): stamps.append(row["timestamp"]) self.assertFalse(stamps[0] is None) self.assertFalse(stamps[1] is None)
def setUp(self): """upon setup clear scores for all models""" ModelScoreTable(self.dbfile).empty()