def get_model_representations(dbpath, obo, log=None, log_prefix="", model_names=None): """transfer model phenotype data into representations""" if model_names is None: model_names = get_model_names(dbpath) model_names_set = set(model_names) result = dict() for m in model_names: result[m] = Representation(name=m) phen_priors = get_phenotype_priors(dbpath) generator = DBGenerator(ModelPhenotypeTable(dbpath)) for row in generator.next(): m, phenotype = row["id"], obo.canonical(row["phenotype"]) # avoid cases - irrelevant model, obsolete phenotype if m not in model_names_set: continue if obo.has(phenotype) and not obo.valid(phenotype): phenotype = obo.replaced_by(phenotype) if phenotype is None: if log is not None: msg = "Skipping phenotype " + row["phenotype"] msg += " in model " + m log(log_prefix + " - " + msg) continue result[m] = add_data_to_model(result[m], phenotype, row["value"], row["TPR"], row["FPR"], phen_priors) return result
def get_complete_null(dbpath): """create a complete representation for the null reference""" result = Representation(name="null") tab = ReferenceCompletePhenotypeTable(dbpath) generator = DBGenerator(tab, where=dict(id="null")) for row in generator.next(): result.set(row["phenotype"], row["value"]) return result
def get_phenotype_priors(dbpath): """Create a dict with prior probabilities for all phenotypes.""" generator = DBGenerator(PhenotypeFrequencyTable(dbpath)) result = dict() for row in generator.next(): result[row["phenotype"]] = float(row["frequency"]) return result
def get_highscore_pairs(dbpath, threshold): """get an array of pairs (model, reference) with high scores.""" result = [] generator = DBGenerator(ModelScoreTable(dbpath)) for row in generator.next(): if row["general"] > threshold and row["specific"] > threshold: result.append((row["model"], row["reference"])) return result
def descriptions_contain(self, key, query): """scan an descriptions table; check some row[key] contains a query.""" hit = False generator = DBGenerator(ModelDescriptionTable(self.dbfile)) for row in generator.next(): if query in str(row[key]): hit = True return hit
def make_ref_priors(dbpath, prior=0.01): """Create a dict with prior probabilities for all references.""" # scan a db table to identify reference names, assign each a prior value result = dict() generator = DBGenerator(ReferenceConcisePhenotypeTable(dbpath)) for row in generator.next(): result[row["id"]] = prior result["null"] = max(prior, 1-sum(result.values())) return result
def get_concise_refdict(dbpath): """transfer information on concise reference phenotypes into a dict.""" refdict = dict() refdict["null"] = Representation(name="null") refgenerator = DBGenerator(ReferenceConcisePhenotypeTable(dbpath)) for row in refgenerator.next(): rowid = row["id"] if rowid not in refdict: refdict[rowid] = Representation(name=rowid) refdict[rowid].set(row["phenotype"], row["value"]) return refdict
def get_reference_neighbors(dbpath, k): """create mappings to nearest neighrbors""" result = dict() refgenerator = DBGenerator(ReferenceNeighborsTable(dbpath)) for row in refgenerator.next(): rowid = row["id"] if rowid not in result: result[rowid] = [""]*k rowrank = int(row["rank"]) if rowrank <= k: result[rowid][rowrank-1] = row["neighbor"] return result
def get_db_models(dbpath): """get descriptions of all models currently in database. :param dbpath: string, path to database file :return: dict with Entities carrying model descriptions All entities are without phenotypes! """ # scan references table and get reference names generator = DBGenerator(ModelDescriptionTable(dbpath)) result = dict() for row in generator.next(): result[row["id"]] = make_model(row) return result
def get_ref_priors(dbpath, references=None): """Create a dict with prior probabilities for references :param dbpath: path to db :param references: set with reference names to include (or None to get the entire table) :return: dictionary mapping references to prior probabilities """ generator = DBGenerator(ReferencePriorsTable(dbpath)) result = dict() for row in generator.next(): if references is None or row["id"] in references: result[row["id"]] = float(row["value"]) return result
def test_complete_references_number_phens(self): """complete phenotype table has all entries for the null model""" phen_priors = get_phenotype_priors(self.dbfile) nullphen = set() D1phen = set() generator = DBGenerator(ReferenceCompletePhenotypeTable(self.dbfile)) for row in generator.next(): if row["id"] == "null": nullphen.add(row["phenotype"]) if row["id"] == "DISEASE:1": D1phen.add(row["phenotype"]) # null should have all phens self.assertEqual(len(nullphen), len(phen_priors)) # disease phenotypes can omit some self.assertLessEqual(len(D1phen), len(phen_priors))
def test_compute_gives_stamps(self): """perform packet calculations.""" modelnames = ["MGI_MA:001_hom", "MGI_MA:001_het"] refnames = ["DISEASE:1", "DISEASE:3"] packets = prep_compute_packets(self.config, references=refnames, models=modelnames) packets[0].run() generator = DBGenerator(ModelScoreTable(self.dbfile)) stamps = [] for row in generator.next(): stamps.append(row["timestamp"]) self.assertFalse(stamps[0] is None) self.assertFalse(stamps[1] is None)
def get_refsets(dbpath, ref_priors=None, phenotype_priors=None): """create ReferenceSets objects with general and specific phenotypes :param dbpath: path to phenoscoring db :param ref_priors: dictionary with priors for references (if None, fetched from db) :param phenotype_priors: dictionary with priors for all featurs (if None, fetched from db) :return: two ReferenceSets objects """ # at first create just a dictionary of representations general_dict, specific_dict = dict(), dict() if phenotype_priors is None: phenotype_priors = get_phenotype_priors(dbpath) if ref_priors is None: ref_priors = get_ref_priors(dbpath) # create empty Representations for each reference nullrep = get_complete_null(dbpath) phenotypes = nullrep.keys() for id in ref_priors.keys(): general_dict[id] = nullrep.copy(name=id) specific_dict[id] = nullrep.copy(name=id) # fill the representations with values phentab = ReferenceCompletePhenotypeTable(dbpath) if len(ref_priors) == 1: refname = list(ref_priors.keys())[0] generator = DBGenerator(phentab, where=dict(id=refname)) else: generator = DBGenerator(phentab) for row in generator.next(): id, phen = row["id"], row["phenotype"] if id in ref_priors: general_dict[id].set(phen, row["value"]) specific_dict[id].set(phen, row["specific_value"]) # transfer representations into ReferenceSets general = ReferenceSet(ref_priors, phenotypes, phenotype_priors) specific = ReferenceSet(ref_priors, phenotypes, phenotype_priors) for refid in general_dict.keys(): general.add(general_dict[refid]) specific.add(specific_dict[refid]) return general, specific
def export(self, out=sys.stdout): """connect to a database and export one of the tables line-by-line""" tablemodel = None for x in self.tables: if x.name == self.config.table: tablemodel = x if tablemodel is None: return # output the header tableinstance = tablemodel(self.dbpath) fieldnames = list(tableinstance.fieldnames()) out.write("\t".join(fieldnames) + "\n") # output the table contents generator = DBGenerator(tablemodel(self.dbpath)) for row in generator.next(): temp = [str(row[_]) for _ in fieldnames] out.write("\t".join(temp) + "\n")
def test_recompute(self): """recompute drops scores and recreates them.""" generator_before = DBGenerator(ModelScoreTable(self.dbfile)) before = [] for row in generator_before.next(): before.append(row) self.assertGreater(len(before), 0, "db should be set up with some scores") # recomputing should drop the scores and recreate them self.pipeline.recompute() generator_after = DBGenerator(ModelScoreTable(self.dbfile)) after = [] for row in generator_after.next(): after.append(row) self.assertEqual(len(before), len(after), "recomputing should give same result structure")