def build(self): """create a db for phenoscoring, includes setup and table-filling.""" # create db with empty tables dbpath, config = self.setup() # avoid work if setup decided db exists and build can be skipped if dbpath is None: return # check prerequisite files obopath = check_file(config.obo, dbpath, "obo") refpath = check_file(config.reference_phenotypes, dbpath, "reference_phenotypes") freqpath = check_file(config.phenotype_frequencies, dbpath, "phenotype_frequencies") self.logger.msg1("Loading ontology") obo = MinimalObo(obopath, True) self.logger.msg1("Preparing phenotype frequencies") fill_phenotype_frequency_table(dbpath, freqpath) # fill database with data self.logger.msg1("Preparing references") fill_concise_reference_table(dbpath, refpath) fill_complete_reference_table(dbpath, obo, config) self._end()
def test_detects_missing(self): """neither primary or fallback file exist.""" file_path = join("tests", "not-a-dir", "small.obo") fallback_path = join("tests", "also-not-a-dir", "small.obo") with self.assertRaises(Exception): check_file(file_path, fallback_path)
def setUpClass(cls): """create a new db with some model and references definitions.""" config = MGITestConfig() config.action = "build" cls.dbfile = config.db config.obo = check_file(config.obo, config.db) # build a new database cls.pipeline = Phenoscoring(config) cls.pipeline.build() # add model and definitions (don't compute scores) desc_file = check_file(config.model_descriptions, config.db) phen_file = check_file(config.model_phenotypes, config.db) cls.pipeline._update(desc_file, phen_file)
def setUpClass(cls): """create a new db with model definitions""" cls.config = config = IMPCTestConfig() cls.dbfile = dbfile = cls.config.db remove_db(cls.dbfile) config.scale_oo_scores = False config.obo = check_file(config.obo, dbfile) cls.desc_file = check_file(config.model_descriptions, dbfile) cls.phen_file = check_file(config.model_phenotypes, dbfile) # build a db cls.pipeline = Phenoscoring(config) cls.pipeline.build() # add some model definitions (don't compute) cls.pipeline.update()
def setUpClass(cls): """For setup, ensure db does not exist.""" config = CompleteTestConfig() config.null_prior = 0.2 cls.dbfile = config.db cls.pipeline = Phenoscoring(config) cls.pipeline.build() obopath = check_file(config.obo, config.db, "obo") cls.obo = MinimalObo(obopath, True) # a dummy set of default values cls.obodefaults = dict.fromkeys(cls.obo.ids(), 0.2) cls.obozeros = dict.fromkeys(cls.obo.ids(), 0) cls.ref_priors = get_ref_priors(config.db) cls.rs, cls.rs2 = get_refsets(config.db, ref_priors=cls.ref_priors) cls.rs.learn_obo(cls.obo) cls.rs2.learn_obo(cls.obo) # for testing individual configurations cls.y3model = Representation(name="Y3").set("Y:003", 0.8) cls.refA = Representation(name="refA").set("Y:002", 1) cls.refA.defaults(cls.obozeros) cls.refB = Representation(name="refB").set("Y:002", 1) cls.refB.defaults(cls.obozeros)
def test_uses_fallback(self): """check doesn't find file, returns fallback.""" file_path = join("tests", "not-a-dir", "small.obo") fallback_path = join("tests", "testdata", "small.obo") selected_path = check_file(file_path, fallback_path, required=None) self.assertEqual(selected_path, fallback_path)
def export_representations(self): """write matrix representations for models and refs to disk""" dbpath, config = self._start() self.logger.msg1("Loading ontology") obo_path = check_file(config.obo, dbpath, "obo") self.obo = MinimalObo(obo_path, True) self._export_reference_representations() self._export_model_representations(config) self._end()
def update(self): """add model descriptions and phenotypes to the database.""" dbpath, config = self._start() self.config.obo = check_file(config.obo, dbpath, "obo") desc_file = check_file(config.model_descriptions, dbpath, "model_descriptions", allow_none=True) phen_file = check_file(config.model_phenotypes, dbpath, "model_phenotypes", allow_none=True) summary = self._update(desc_file, phen_file) if len(summary["incorrect_ids"]) == 0 and not config.skip_compute: self._compute(models=summary["new_phenotypes"]) self._end()
def remove(self): """remove certain model descriptions and phenotypes from database.""" dbpath, config = self._start() desc_file = check_file(config.model_descriptions, dbpath, "model_descriptions", allow_none=False) self.logger.msg1("Reading model ids") ids = values_in_column(desc_file, "id") self.logger.msg1("Deleting models: " + str(len(ids))) delete_models(dbpath, ids) self._end()
def explain(self): """Perform a verbose calculation of inference scores. The prep for this function is similar as for compute(). Once the relevant data is loaded from the db, the calculations are performed and recorded manually. """ self.logger.verbose = False dbpath, config = self._start() if config.explain not in ["specific", "general"]: return "--explain must be 'general' or 'specific'" config.obo = check_file(config.obo, dbpath, "obo") # allow user to pass several model/reference pairs models = config.model.split(",") references = config.reference.split(",") M = len(models) if len(references) != M: raise Exception("incompatible number of models and references") # use the packet to load information from the db, refset and models packet = prep_compute_packets(self.config, references=references, models=models, partition_size=M)[0] packet.prep() refset = packet.general_refset if config.explain == "specific": refset = packet.specific_refset refset.learn_obo(MinimalObo(config.obo)) allresults = [None] * M for i, (modelid, refid) in enumerate(zip(models, references)): data = packet.models[modelid] result = refset.inference_chain(data, refid, verbose=True, fp_penalty=config.fp_penalty) allresults[i] = result.to_json(nodata=config.explain_nodata) return "[" + (",".join(allresults)) + "]"
def setUpClass(cls): """For setup, ensure db does not exist.""" config = MGITestConfig() config.scale_oo_scores = False cls.dbfile = config.db config.obo = check_file(config.obo, config.db) cls.pipeline = Phenoscoring(config) cls.pipeline.build() # first add some rows to the db by hand model = ModelScoreTable(config.db) model.add("model:1", "DISEASE:1", "stamp", 0.95, 0.98) model.add("model:2", "DISEASE:1", "stamp", 0.94, 0.96) model.add("model:3", "DISEASE:1", "stamp", 0.24, 0.96) model.add("model:4", "DISEASE:2", "stamp", 0.92, 0.95) model.add("model:5", "DISEASE:2", "stamp", 0.86, 0.85) model.add("model:6", "DISEASE:3", "stamp", 0.96, 0.95) model.save()
help="mgi model descriptions") parser.add_argument("--mgi_phen", action="store", required=True, help="mgi model phenotypes") parser.add_argument("--output", action="store", required=True, help="prefix for output files") # ################################################################## # Execute the program if module is used as an executable if __name__ == "__main__": config = parser.parse_args() config.impc_desc = check_file(config.impc_desc, required="impc_desc") config.impc_phen = check_file(config.impc_phen, required="impc_phen") config.mgi_desc = check_file(config.mgi_desc, required="mgi_desc") config.mgi_phen = check_file(config.mgi_phen, required="mgi_phen") timestamp = now_timestamp() # load impc and mgi models into memory impc_models = get_file_models(config.impc_desc, timestamp) impc_phenotypes = get_file_phenotypes(config.impc_phen, timestamp) mgi_models = get_file_models(config.mgi_desc, timestamp) mgi_phenotypes = get_file_phenotypes(config.mgi_phen, timestamp) # get all allele_ids from impc and mgi def model_alleles(models): """scan a set of models and get a set of allele_id""" result = set()
# ################################################################## # Execute the program if module is used as an executable if __name__ == "__main__": config = parser.parse_args() tprfpr = (config.tpr, config.fpr) fe = filter_entities fe_cat = filter_entities_cat threshold = config.threshold if config.action == "MGI": # action to parse mouse phenotype models from MGI check_file(config.input, required="input") check_file(config.obo) obo = MinimalObo(config.obo) models = prep_MGI(config.input, tprfpr, obo) # write out all models and subsets genotype_models = fe_cat(models, set(["genotype"])) marker_models = fe_cat(models, set(["marker"])) write_models(genotype_models, config.output + "-genotype-universal") write_models(marker_models, config.output + "-marker-universal") # compute and write priors based on certain types of models categories = set(config.priors.split(",")) priors, num_models = get_priors_from_models(models, categories, obo, dark=config.dark_count) print("Number of models used to inform prior: " + str(num_models))
def test_detects_required(self): """check raises exception when a required file is not present.""" with self.assertRaises(Exception): check_file(None, None, required=True)
def test_allow_none(self): """allows a file check to return None.""" result = check_file(None, None, required="aaa", allow_none=True) self.assertEqual(result, None)
def test_check(self): """check succeeds.""" file_path = join("tests", "testdata", "small.obo") selected_path = check_file(file_path, None, required=None) self.assertEqual(selected_path, file_path)