Beispiel #1
0
def get_priors_from_models(models, categories, obo, dark=1):
    """Compute cohort-wide phenotype frequencies
    
    Arguments:
        models        dictionary of Entity objects
        categories    set, determines what models to use in the calculation
        obo           object of class Obo
        dark          integer, dark count for phenotype normalization
        
    Returns:
        dict mapping phenotypes (from obo) to values [0,1]
        integer, number of models used to inform the prior
    """

    # get a subset of the models that satisfy the criteria
    all = [obj for _, obj in models.items()]
    hits = filter_entities_cat(all, categories)

    # transfer phenotypes into representations
    obodefaults = dict.fromkeys(obo.ids(), 0)
    freqcounts = dict.fromkeys(list(obo.ids()), dark)
    for entity in hits:
        # prepare concise representations
        rep = Representation(name=entity.id)
        for datum in entity.data:
            rep.set(datum.phenotype, datum.value)
        # convert to complete representation
        rep.impute(obo, obodefaults)
        # count phenotypes
        for phenotype in obo.ids():
            freqcounts[phenotype] += rep.data[phenotype]

    # convert counts into frequencies
    result = counts_p(freqcounts, len(hits), dark)
    return result, len(hits)
Beispiel #2
0
    def test_impute_fromseeds_highfirst(self):
        """imputing values from manually-specified seeds."""

        rr = Representation(dict())
        # specify data for two children, DOID:4 is higher in tree, so should gain
        rr.set("DOID:0014667", 0.4)
        rr.set("DOID:0080015", 0.3)

        rr.impute(self.obo,
                  self.obodef,
                  seeds=["DOID:0014667", "DOID:0080015"])

        self.assertAlmostEqual(rr.get("DOID:0014667"),
                               0.4,
                               msg="should remain")
        self.assertAlmostEqual(rr.get("DOID:0080015"),
                               0.3,
                               msg="should remain")
        self.assertAlmostEqual(
            rr.get("DOID:4"),
            1 - ((1 - 0.4) * (1 - 0.3) * (1 - 0.2)),
            msg="ancestor gains from two children (and prior)")
        self.assertAlmostEqual(rr.get("DOID:655"),
                               0.2,
                               msg="remain; new DOID:4")
Beispiel #3
0
    def test_impute_fromseeds_lowfirst(self):
        """imputing values from manually-specified seeds."""

        rr = Representation(dict())
        ## specify an inconsistent set of values, DOID:4 is higher in tree, so cannot
        ## have a lower value than DOID:0014667
        rr.set("DOID:0014667", 0.4).set("DOID:4", 0.1)
        rr.impute(self.obo, self.obodef, seeds=["DOID:4", "DOID:0014667"])
        self.assertEqual(rr.get("DOID:0080015"), 0.1, "child of DOID:4")
        self.assertEqual(rr.get("DOID:655"), 0.1, "child of DOID:4")
Beispiel #4
0
    def test_impute_down_ordering(self):
        """updating values in representation via negative evidence."""

        r1 = Representation(dict())
        r1.set("DOID:3650", 0.01).set("DOID:0014667", 0.05)
        r2 = Representation(dict())
        r2.set("DOID:3650", 0.01).set("DOID:0014667", 0.05)
        # imputation down should not depend on order of the seeds
        r1.impute(self.obo, self.obodef, seeds=["DOID:3650", "DOID:0014667"])
        r2.impute(self.obo, self.obodef, seeds=["DOID:0014667", "DOID:3650"])

        self.assertEqual(r1.data, r2.data, "all values the same")
Beispiel #5
0
    def test_get_reference(self):
        """extract one reference from a representation set."""

        r1 = Representation(name="refA").set("DOID:0014667", 0.4)
        r1.impute(obo, obodefaults)
        r2 = Representation(name="refB").set("DOID:0080015", 0.6)
        r2.impute(obo, obodefaults)

        rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=obo.ids())
        rs.add(r1).add(r2)

        r3 = rs.get_representation("refA")
        self.assertTrue(r3.equal(r1))
Beispiel #6
0
    def test_sum_with_impute(self):
        """sum of values associated with the representation."""

        rr = Representation(dict())
        rr.set("DOID:0014667", 1)
        sum1 = rr.sum()
        rr.impute(self.obo, self.obodef)
        sum2 = rr.sum()

        self.assertEqual(sum1, 1, "value of one phenotype")
        self.assertGreater(sum2,
                           2,
                           msg="value for one phenotype+ancestors+defaults")
Beispiel #7
0
    def test_impute_up_avoid_doubles(self):
        """updating values in representation via positive evidence in DAG"""

        rr = Representation(dict())
        # DOID:11044 in test ontology has two paths to root (DOID:4)
        # one is direct (a shortcut)
        # another path is through 0080015
        rr.set("DOID:11044", 0.4)
        rr.impute(self.obo, self.obodef)

        self.assertGreater(rr.get("DOID:0080015"), 0.2,
                           "ancestor should increase")
        self.assertAlmostEqual(
            rr.get("DOID:0080015"),
            rr.get("DOID:4"),
            msg="4 should get bumped once, despite two paths from 11044")
Beispiel #8
0
    def test_add_incrementally(self):
        """transferring values into a representation set."""

        r1 = Representation(name="refA").set("DOID:0014667", 0.4)
        r1.impute(obo, obodefaults)

        r2 = Representation(name="refB").set("DOID:0080015", 0.6)
        r2.impute(obo, obodefaults)

        rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=obo.ids())
        rs.add(r1).add(r2)

        self.assertEqual(rs.get("DOID:0014667", "refA"), 0.4,
                         "refset should contain inserted data")
        self.assertEqual(rs.get("DOID:0080015", "refB"), 0.6,
                         "refset should contain inserted data")
        self.assertEqual(rs.get("DOID:4", "refB"), 0.6,
                         "refset should contain imputed data")
Beispiel #9
0
    def test_impute_down(self):
        """updating values in representation via negative evidence."""

        rr = Representation(dict())
        rr.set("unrelated", 0.8)
        rr.set("DOID:0014667", 0.05)
        rr.impute(self.obo, self.obodef)

        self.assertAlmostEqual(rr.get("unrelated"), 0.8,
                               "out-of-ontology terms remain")
        self.assertAlmostEqual(rr.get("DOID:0014667"), 0.05,
                               "set value should remain")
        self.assertAlmostEqual(rr.get("DOID:4"), 0.2,
                               "ancestors should remain")
        self.assertAlmostEqual(rr.get("DOID:0080015"), 0.2,
                               "unrelated terms get default")
        self.assertAlmostEqual(rr.get("DOID:655"), 0.05,
                               "children are unaffected")
Beispiel #10
0
    def test_impute_up_always_increases(self):
        """updating values in representation via positive evidence."""

        rr = Representation(dict())
        rr.set("DOID:3650", 0.25)
        defaults = self.obodef.copy()
        defaults["DOID:0014667"] = 0.5
        defaults["DOID:4"] = 1
        rr.impute(self.obo, defaults)

        self.assertEqual(rr.get("DOID:3650"), 0.25, "set value should remain")
        self.assertGreater(rr.get("DOID:0060158"), 0.25,
                           "ancestors should receive greater score")
        self.assertEqual(rr.get("DOID:655"), 0.2,
                         "unrelated should stay at default")
        # ancestor that has already a higher score than what is propagated
        self.assertGreater(
            rr.get("DOID:0014667"), 0.5,
            "ancestor should receive score greater than its prior")
Beispiel #11
0
    def test_impute_fromseeds_auto(self):
        """imputing values from automatically-ordered seeds."""

        # specify an inconsistent set of values, DOID:4 is higher in tree, so cannot
        # have a lower value than DOID:0014667
        # However, low DOID:4 can impact on other branches
        rr1 = Representation(dict())
        rr1.set("DOID:0014667", 0.4).set("DOID:4", 0.1)
        rr1.impute(self.obo, self.obodef)

        # auto seeds
        rr2 = Representation(dict())
        rr2.set("DOID:0014667", 0.4).set("DOID:4", 0.1)
        rr2.impute(self.obo, self.obodef)

        # auto seeds, different initial ordering
        rr3 = Representation(dict())
        rr3.set("DOID:4", 0.1).set("DOID:0014667", 0.4)
        rr3.impute(self.obo, self.obodef)

        self.assertTrue(rr1.data == rr2.data,
                        "auto and manual should have same data")
        self.assertTrue(rr2.data == rr3.data,
                        "should be = regardless of input order")

        self.assertGreater(rr1.data["DOID:0014667"], 0.2,
                           "DOID:0014667 increase by direct evidence")
        self.assertGreater(rr1.data["DOID:4"], 0.2,
                           "DOID:4 increases driven by 0014667")
        self.assertEqual(rr1.data["DOID:11044"], 0.1,
                         "low raw DOID:4 propagates down")
Beispiel #12
0
    def test_impute_up(self):
        """updating values in representation via positive evidence."""

        rr = Representation(dict())
        rr.set("unrelated", 0.8)
        rr.set("DOID:0014667", 0.4)
        rr.impute(self.obo, self.obodef)

        self.assertEqual(rr.get("unrelated"),
                         0.8,
                         msg="out-of-ontology terms remain")
        self.assertEqual(rr.get("DOID:0014667"),
                         0.4,
                         msg="set value should remain")
        self.assertGreater(rr.get("DOID:4"),
                           0.4,
                           msg="ancestors should receive greater score")
        self.assertEqual(rr.get("DOID:0080015"),
                         0.2,
                         msg="unrelated terms get default")
        self.assertEqual(rr.get("DOID:655"),
                         0.2,
                         msg="children are unaffected")