예제 #1
0
def get_model_representations(dbpath,
                              obo,
                              log=None,
                              log_prefix="",
                              model_names=None):
    """transfer model phenotype data into representations"""

    if model_names is None:
        model_names = get_model_names(dbpath)
    model_names_set = set(model_names)
    result = dict()
    for m in model_names:
        result[m] = Representation(name=m)
    phen_priors = get_phenotype_priors(dbpath)
    generator = DBGenerator(ModelPhenotypeTable(dbpath))
    for row in generator.next():
        m, phenotype = row["id"], obo.canonical(row["phenotype"])
        # avoid cases  - irrelevant model, obsolete phenotype
        if m not in model_names_set:
            continue
        if obo.has(phenotype) and not obo.valid(phenotype):
            phenotype = obo.replaced_by(phenotype)
        if phenotype is None:
            if log is not None:
                msg = "Skipping phenotype " + row["phenotype"]
                msg += " in model " + m
                log(log_prefix + " - " + msg)
            continue
        result[m] = add_data_to_model(result[m], phenotype, row["value"],
                                      row["TPR"], row["FPR"], phen_priors)
    return result
예제 #2
0
def get_complete_null(dbpath):
    """create a complete representation for the null reference"""

    result = Representation(name="null")
    tab = ReferenceCompletePhenotypeTable(dbpath)
    generator = DBGenerator(tab, where=dict(id="null"))
    for row in generator.next():
        result.set(row["phenotype"], row["value"])
    return result
예제 #3
0
def get_phenotype_priors(dbpath):
    """Create a dict with prior probabilities for all phenotypes."""

    generator = DBGenerator(PhenotypeFrequencyTable(dbpath))
    result = dict()
    for row in generator.next():
        result[row["phenotype"]] = float(row["frequency"])

    return result
예제 #4
0
def get_highscore_pairs(dbpath, threshold):
    """get an array of pairs (model, reference) with high scores."""

    result = []
    generator = DBGenerator(ModelScoreTable(dbpath))
    for row in generator.next():
        if row["general"] > threshold and row["specific"] > threshold:
            result.append((row["model"], row["reference"]))
    return result
예제 #5
0
    def descriptions_contain(self, key, query):
        """scan an descriptions table; check some row[key] contains a query."""

        hit = False
        generator = DBGenerator(ModelDescriptionTable(self.dbfile))
        for row in generator.next():
            if query in str(row[key]):
                hit = True
        return hit
예제 #6
0
def make_ref_priors(dbpath, prior=0.01):
    """Create a dict with prior probabilities for all references."""
    
    # scan a db table to identify reference names, assign each a prior value
    result = dict()    
    generator = DBGenerator(ReferenceConcisePhenotypeTable(dbpath))
    for row in generator.next():
        result[row["id"]] = prior        
    result["null"] = max(prior, 1-sum(result.values()))
    return result
예제 #7
0
def get_concise_refdict(dbpath):
    """transfer information on concise reference phenotypes into a dict."""
    
    refdict = dict()
    refdict["null"] = Representation(name="null")
    refgenerator = DBGenerator(ReferenceConcisePhenotypeTable(dbpath))
    for row in refgenerator.next():
        rowid = row["id"]
        if rowid not in refdict:
            refdict[rowid] = Representation(name=rowid)
        refdict[rowid].set(row["phenotype"], row["value"])    
    return refdict
예제 #8
0
def get_reference_neighbors(dbpath, k):
    """create mappings to nearest neighrbors"""

    result = dict()
    refgenerator = DBGenerator(ReferenceNeighborsTable(dbpath))
    for row in refgenerator.next():
        rowid = row["id"]
        if rowid not in result:
            result[rowid] = [""]*k
        rowrank = int(row["rank"])
        if rowrank <= k:
            result[rowid][rowrank-1] = row["neighbor"]
    return result
예제 #9
0
def get_db_models(dbpath):
    """get descriptions of all models currently in database.
    
    :param dbpath: string, path to database file
    :return: dict with Entities carrying model descriptions
        All entities are without phenotypes!
    """

    # scan references table and get reference names
    generator = DBGenerator(ModelDescriptionTable(dbpath))
    result = dict()
    for row in generator.next():
        result[row["id"]] = make_model(row)
    return result
예제 #10
0
def get_ref_priors(dbpath, references=None):
    """Create a dict with prior probabilities for references

    :param dbpath: path to db
    :param references: set with reference names to include
        (or None to get the entire table)
    :return: dictionary mapping references to prior probabilities
    """

    generator = DBGenerator(ReferencePriorsTable(dbpath))
    result = dict()
    for row in generator.next():
        if references is None or row["id"] in references:
            result[row["id"]] = float(row["value"])
    return result
예제 #11
0
 def test_complete_references_number_phens(self):
     """complete phenotype table has all entries for the null model"""
                         
     phen_priors = get_phenotype_priors(self.dbfile)
     nullphen = set()
     D1phen = set()
     generator = DBGenerator(ReferenceCompletePhenotypeTable(self.dbfile))
     for row in generator.next():
         if row["id"] == "null":
             nullphen.add(row["phenotype"])
         if row["id"] == "DISEASE:1":
             D1phen.add(row["phenotype"])
     # null should have all phens
     self.assertEqual(len(nullphen), len(phen_priors))
     # disease phenotypes can omit some
     self.assertLessEqual(len(D1phen), len(phen_priors))
예제 #12
0
    def test_compute_gives_stamps(self):
        """perform packet calculations."""

        modelnames = ["MGI_MA:001_hom", "MGI_MA:001_het"]
        refnames = ["DISEASE:1", "DISEASE:3"]
        packets = prep_compute_packets(self.config,
                                       references=refnames,
                                       models=modelnames)
        packets[0].run()

        generator = DBGenerator(ModelScoreTable(self.dbfile))
        stamps = []
        for row in generator.next():
            stamps.append(row["timestamp"])
        self.assertFalse(stamps[0] is None)
        self.assertFalse(stamps[1] is None)
예제 #13
0
def get_refsets(dbpath, ref_priors=None, phenotype_priors=None):
    """create ReferenceSets objects with general and specific phenotypes

    :param dbpath: path to phenoscoring db
    :param ref_priors: dictionary with priors for references
        (if None, fetched from db)
    :param phenotype_priors: dictionary with priors for all featurs
        (if None, fetched from db)
    :return: two ReferenceSets objects
    """

    # at first create just a dictionary of representations
    general_dict, specific_dict = dict(), dict()

    if phenotype_priors is None:
        phenotype_priors = get_phenotype_priors(dbpath)
    if ref_priors is None:
        ref_priors = get_ref_priors(dbpath)

    # create empty Representations for each reference
    nullrep = get_complete_null(dbpath)
    phenotypes = nullrep.keys()
    for id in ref_priors.keys():
        general_dict[id] = nullrep.copy(name=id)
        specific_dict[id] = nullrep.copy(name=id)

    # fill the representations with values
    phentab = ReferenceCompletePhenotypeTable(dbpath)
    if len(ref_priors) == 1:
        refname = list(ref_priors.keys())[0]
        generator = DBGenerator(phentab, where=dict(id=refname))
    else:
        generator = DBGenerator(phentab)
    for row in generator.next():
        id, phen = row["id"], row["phenotype"]
        if id in ref_priors:
            general_dict[id].set(phen, row["value"])
            specific_dict[id].set(phen, row["specific_value"])

    # transfer representations into ReferenceSets
    general = ReferenceSet(ref_priors, phenotypes, phenotype_priors)
    specific = ReferenceSet(ref_priors, phenotypes, phenotype_priors)
    for refid in general_dict.keys():
        general.add(general_dict[refid])
        specific.add(specific_dict[refid])

    return general, specific
예제 #14
0
    def export(self, out=sys.stdout):
        """connect to a database and export one of the tables line-by-line"""

        tablemodel = None
        for x in self.tables:
            if x.name == self.config.table:
                tablemodel = x

        if tablemodel is None:
            return

        # output the header
        tableinstance = tablemodel(self.dbpath)
        fieldnames = list(tableinstance.fieldnames())
        out.write("\t".join(fieldnames) + "\n")
        # output the table contents
        generator = DBGenerator(tablemodel(self.dbpath))
        for row in generator.next():
            temp = [str(row[_]) for _ in fieldnames]
            out.write("\t".join(temp) + "\n")
예제 #15
0
    def test_recompute(self):
        """recompute drops scores and recreates them."""

        generator_before = DBGenerator(ModelScoreTable(self.dbfile))
        before = []
        for row in generator_before.next():
            before.append(row)
        self.assertGreater(len(before), 0,
                           "db should be set up with some scores")

        # recomputing should drop the scores and recreate them
        self.pipeline.recompute()

        generator_after = DBGenerator(ModelScoreTable(self.dbfile))
        after = []
        for row in generator_after.next():
            after.append(row)
        self.assertEqual(len(before), len(after),
                         "recomputing should give same result structure")