コード例 #1
0
    def test_positive_parent_multi(self):
        """fetching a parent term when terms have multiple parents."""

        # load an ontology in which Y7 is connected to both Y2 and Y1
        Yfile = join(testdir, "Ymulti.obo")
        Yobo = MinimalObo(Yfile)
        Ydefaults = dict.fromkeys(Yobo.ids(), 0.0001)
        Ydefaults["Y:003"] = 0.0002
        Ydefaults["Y:005"] = 0.0002
        # make slight variations of representations
        rs = ReferenceSet(dict(refA=0.5, refB=0.5),
                          ids=Yobo.ids(),
                          row_priors=Ydefaults)
        refA = Representation(name="refA")
        refA.set("Y:002", 0.5).set("Y:005", 1).impute(Yobo, Ydefaults)
        refB = Representation(name="refB")
        refB.set("Y:001", 0.5).impute(Yobo, Ydefaults)
        rs.add(refA).add(refB)
        rs.learn_obo(Yobo)

        self.assertEqual(
            rs._positive_ancestor(rs.columns["refA"], rs.rows["Y:007"]),
            rs.rows["Y:002"], "Y2 is a positive ancestor")
        self.assertEqual(
            rs._positive_ancestor(rs.columns["refB"], rs.rows["Y:007"]),
            rs.rows["Y:001"], "Y1 is a positive immediate parent")
コード例 #2
0
    def test_impute_fromseeds_highfirst(self):
        """imputing values from manually-specified seeds."""

        rr = Representation(dict())
        # specify data for two children, DOID:4 is higher in tree, so should gain
        rr.set("DOID:0014667", 0.4)
        rr.set("DOID:0080015", 0.3)

        rr.impute(self.obo,
                  self.obodef,
                  seeds=["DOID:0014667", "DOID:0080015"])

        self.assertAlmostEqual(rr.get("DOID:0014667"),
                               0.4,
                               msg="should remain")
        self.assertAlmostEqual(rr.get("DOID:0080015"),
                               0.3,
                               msg="should remain")
        self.assertAlmostEqual(
            rr.get("DOID:4"),
            1 - ((1 - 0.4) * (1 - 0.3) * (1 - 0.2)),
            msg="ancestor gains from two children (and prior)")
        self.assertAlmostEqual(rr.get("DOID:655"),
                               0.2,
                               msg="remain; new DOID:4")
コード例 #3
0
ファイル: priors.py プロジェクト: tkonopka/phenoscoring
def get_priors_from_models(models, categories, obo, dark=1):
    """Compute cohort-wide phenotype frequencies
    
    Arguments:
        models        dictionary of Entity objects
        categories    set, determines what models to use in the calculation
        obo           object of class Obo
        dark          integer, dark count for phenotype normalization
        
    Returns:
        dict mapping phenotypes (from obo) to values [0,1]
        integer, number of models used to inform the prior
    """

    # get a subset of the models that satisfy the criteria
    all = [obj for _, obj in models.items()]
    hits = filter_entities_cat(all, categories)

    # transfer phenotypes into representations
    obodefaults = dict.fromkeys(obo.ids(), 0)
    freqcounts = dict.fromkeys(list(obo.ids()), dark)
    for entity in hits:
        # prepare concise representations
        rep = Representation(name=entity.id)
        for datum in entity.data:
            rep.set(datum.phenotype, datum.value)
        # convert to complete representation
        rep.impute(obo, obodefaults)
        # count phenotypes
        for phenotype in obo.ids():
            freqcounts[phenotype] += rep.data[phenotype]

    # convert counts into frequencies
    result = counts_p(freqcounts, len(hits), dark)
    return result, len(hits)
コード例 #4
0
    def test_set_feature_float(self):
        """can set and retrieve values"""

        rr = Representation()
        rr.set("abc", 1)
        self.assertEqual(rr.get("abc"), 1.0)
        self.assertTrue(type(rr.get("abc")) is float)
コード例 #5
0
    def test_has(self):
        """querying whether a value has been set."""

        rr = Representation(dict(xyz=0.2))
        rr.set("bob", 0.4)
        self.assertTrue(rr.has("xyz"), "set in constructor")
        self.assertTrue(rr.has("bob"), "set manually")
        self.assertFalse(rr.has("alice"), "not set")
コード例 #6
0
    def test_bg_nonverbose(self):
        """compute with background value leaves score the same."""

        neutral = Representation(name="TP")
        neutral.set("Y:004", self.priors["Y:004"])
        chain = self.rs.inference_chain(neutral, "refA", verbose=False)
        chain.evaluate()
        self.assertEqual(chain.posterior, chain.prior)
コード例 #7
0
ファイル: dbhelpers.py プロジェクト: tkonopka/phenoscoring
def get_complete_null(dbpath):
    """create a complete representation for the null reference"""

    result = Representation(name="null")
    tab = ReferenceCompletePhenotypeTable(dbpath)
    generator = DBGenerator(tab, where=dict(id="null"))
    for row in generator.next():
        result.set(row["phenotype"], row["value"])
    return result
コード例 #8
0
    def test_keys(self):
        """setting and getting from a generic representation."""

        rr = Representation(dict(xyz=0.2))
        rr.set("bob", 0.4)
        rr.set("xyz", 0.3)
        rr.defaults(self.defaults)
        self.assertEqual(rr.keys(), ["abc", "xyz", "bob"],
                         "keys should have defaults and non-defaults")
コード例 #9
0
    def test_impute_fromseeds_lowfirst(self):
        """imputing values from manually-specified seeds."""

        rr = Representation(dict())
        ## specify an inconsistent set of values, DOID:4 is higher in tree, so cannot
        ## have a lower value than DOID:0014667
        rr.set("DOID:0014667", 0.4).set("DOID:4", 0.1)
        rr.impute(self.obo, self.obodef, seeds=["DOID:4", "DOID:0014667"])
        self.assertEqual(rr.get("DOID:0080015"), 0.1, "child of DOID:4")
        self.assertEqual(rr.get("DOID:655"), 0.1, "child of DOID:4")
コード例 #10
0
 def test_priors_reps(self):
     """generate priors for phenotypes from representations."""
    
     repA = Representation(name="A")
     repA.set("Y:006", 1)
     repB = Representation(name="B")
     repB.set("Y:001", 0.8)
     
     reps = dict(A=repA, B=repB)
     priors, num = get_priors_from_reps(reps, obo)
     self.assertGreater(num, 0)
コード例 #11
0
    def test_impute_down_ordering(self):
        """updating values in representation via negative evidence."""

        r1 = Representation(dict())
        r1.set("DOID:3650", 0.01).set("DOID:0014667", 0.05)
        r2 = Representation(dict())
        r2.set("DOID:3650", 0.01).set("DOID:0014667", 0.05)
        # imputation down should not depend on order of the seeds
        r1.impute(self.obo, self.obodef, seeds=["DOID:3650", "DOID:0014667"])
        r2.impute(self.obo, self.obodef, seeds=["DOID:0014667", "DOID:3650"])

        self.assertEqual(r1.data, r2.data, "all values the same")
コード例 #12
0
    def test_copy(self):
        """can copy a representation into a new object."""

        r1 = Representation(self.defaults, name="hello")
        r1.set("abc", 0.5)
        result = r1.copy()

        self.assertEqual(r1.name, result.name)
        self.assertEqual(r1.get("abc"), result.get("abc"))
        result.set("abc", 0.75)
        self.assertEqual(r1.get("abc"), 0.5)
        self.assertEqual(result.get("abc"), 0.75)
コード例 #13
0
    def test_general_representation_get(self):
        """setting and getting from a generic representation."""

        rr = Representation(dict(xyz=0.2))
        rr.set("bob", 0.4)
        rr.set("xyz", 0.3)
        rr.defaults(self.defaults)
        self.assertEqual(rr.get("bob"), 0.4,
                         "value should come from manual input")
        self.assertEqual(rr.get("abc"), 0.1,
                         "value should come from defaults dict")
        self.assertEqual(rr.get("xyz"), 0.3,
                         "value should come from manual override")
コード例 #14
0
    def test_sum_with_impute(self):
        """sum of values associated with the representation."""

        rr = Representation(dict())
        rr.set("DOID:0014667", 1)
        sum1 = rr.sum()
        rr.impute(self.obo, self.obodef)
        sum2 = rr.sum()

        self.assertEqual(sum1, 1, "value of one phenotype")
        self.assertGreater(sum2,
                           2,
                           msg="value for one phenotype+ancestors+defaults")
コード例 #15
0
    def test_general_representation_get2(self):
        """setting and getting from a generic representation."""

        # Similar to previous, but setting defaults before the specifics
        rr = Representation(dict(abc=0.1, xyz=0.2))
        rr.defaults(self.defaults)
        rr.set("bob", 0.4)
        rr.set("xyz", 0.3)
        self.assertEqual(rr.get("bob"), 0.4,
                         "value should come from manual input")
        self.assertEqual(rr.get("abc"), 0.1,
                         "value should come from defaults dict")
        self.assertEqual(rr.get("xyz"), 0.3,
                         "value should come from manual override")
コード例 #16
0
    def test_impute_up_avoid_doubles(self):
        """updating values in representation via positive evidence in DAG"""

        rr = Representation(dict())
        # DOID:11044 in test ontology has two paths to root (DOID:4)
        # one is direct (a shortcut)
        # another path is through 0080015
        rr.set("DOID:11044", 0.4)
        rr.impute(self.obo, self.obodef)

        self.assertGreater(rr.get("DOID:0080015"), 0.2,
                           "ancestor should increase")
        self.assertAlmostEqual(
            rr.get("DOID:0080015"),
            rr.get("DOID:4"),
            msg="4 should get bumped once, despite two paths from 11044")
コード例 #17
0
    def test_positive_parent(self):
        """fetching a parent term that has a positive value."""

        rs = ReferenceSet(dict(refA=0.5), ids=Yobo.ids(), row_priors=Ydefaults)
        refA = Representation(name="refA")
        refA.set("Y:002", 0.5).set("Y:005", 1).impute(Yobo, Ydefaults)
        rs.add(refA)
        rs.learn_obo(Yobo)

        refAindex = rs.columns["refA"]
        self.assertEqual(rs._positive_ancestor(refAindex, rs.rows["Y:002"]),
                         rs.rows["Y:002"], "Y2 is itself is positive")
        self.assertEqual(rs._positive_ancestor(refAindex, rs.rows["Y:007"]),
                         rs.rows["Y:002"], "Y2 is immediate parent of Y7")
        self.assertEqual(rs._positive_ancestor(refAindex, rs.rows["Y:006"]),
                         rs.rows["Y:005"], "Y5 is immediate parent of Y6")
コード例 #18
0
    def test_impute_down(self):
        """updating values in representation via negative evidence."""

        rr = Representation(dict())
        rr.set("unrelated", 0.8)
        rr.set("DOID:0014667", 0.05)
        rr.impute(self.obo, self.obodef)

        self.assertAlmostEqual(rr.get("unrelated"), 0.8,
                               "out-of-ontology terms remain")
        self.assertAlmostEqual(rr.get("DOID:0014667"), 0.05,
                               "set value should remain")
        self.assertAlmostEqual(rr.get("DOID:4"), 0.2,
                               "ancestors should remain")
        self.assertAlmostEqual(rr.get("DOID:0080015"), 0.2,
                               "unrelated terms get default")
        self.assertAlmostEqual(rr.get("DOID:655"), 0.05,
                               "children are unaffected")
コード例 #19
0
ファイル: prep_refs.py プロジェクト: tkonopka/phenoscoring
def make_target_reference(reference, oomap, oo_median=None):
    """convert a single representation from one ontology to another."""

    result = Representation(name=reference.name)
    result.title = reference.title

    for phenotype, value in reference.data.items():
        for oo_phenotype, oo_score in oomap[phenotype]:
            # perhaps compute a rescaled oo value
            newvalue = value
            if oo_median is not None:
                newvalue = value * tanh(oo_score / oo_median)
            # always take the larger value if previously set
            if result.has(oo_phenotype):
                newvalue = max(newvalue, result.get(oo_phenotype))
            result.set(oo_phenotype, newvalue)

    return result
コード例 #20
0
    def test_impute_up_always_increases(self):
        """updating values in representation via positive evidence."""

        rr = Representation(dict())
        rr.set("DOID:3650", 0.25)
        defaults = self.obodef.copy()
        defaults["DOID:0014667"] = 0.5
        defaults["DOID:4"] = 1
        rr.impute(self.obo, defaults)

        self.assertEqual(rr.get("DOID:3650"), 0.25, "set value should remain")
        self.assertGreater(rr.get("DOID:0060158"), 0.25,
                           "ancestors should receive greater score")
        self.assertEqual(rr.get("DOID:655"), 0.2,
                         "unrelated should stay at default")
        # ancestor that has already a higher score than what is propagated
        self.assertGreater(
            rr.get("DOID:0014667"), 0.5,
            "ancestor should receive score greater than its prior")
コード例 #21
0
    def test_equality(self):
        """checking content of representations."""

        r1 = Representation(self.defaults, name="hello")
        r2 = Representation(self.defaults, name="hello")
        r3 = Representation(self.defaults, name="bye")
        r4 = Representation(self.defaults, name="hello")
        r4.set("abc", 100)
        r5 = Representation()
        r6 = Representation(self.defaults, name="hello")
        r6.set("qqq", 20)

        self.assertTrue(r1.equal(r2), "all is the same")
        self.assertFalse(r1.equal(5), "argument is not a Representation")
        self.assertFalse(r1.equal(r3), "same content, but different name")
        self.assertFalse(r1.equal(r4), "same name, but different content")
        self.assertFalse(r1.equal(r5), "r5 is empty")
        self.assertFalse(r1.equal(r6), "r6 has more keys")
        self.assertFalse(r1.equal(range(4)), "must compare to Representation")
コード例 #22
0
    def test_impute_fromseeds_auto(self):
        """imputing values from automatically-ordered seeds."""

        # specify an inconsistent set of values, DOID:4 is higher in tree, so cannot
        # have a lower value than DOID:0014667
        # However, low DOID:4 can impact on other branches
        rr1 = Representation(dict())
        rr1.set("DOID:0014667", 0.4).set("DOID:4", 0.1)
        rr1.impute(self.obo, self.obodef)

        # auto seeds
        rr2 = Representation(dict())
        rr2.set("DOID:0014667", 0.4).set("DOID:4", 0.1)
        rr2.impute(self.obo, self.obodef)

        # auto seeds, different initial ordering
        rr3 = Representation(dict())
        rr3.set("DOID:4", 0.1).set("DOID:0014667", 0.4)
        rr3.impute(self.obo, self.obodef)

        self.assertTrue(rr1.data == rr2.data,
                        "auto and manual should have same data")
        self.assertTrue(rr2.data == rr3.data,
                        "should be = regardless of input order")

        self.assertGreater(rr1.data["DOID:0014667"], 0.2,
                           "DOID:0014667 increase by direct evidence")
        self.assertGreater(rr1.data["DOID:4"], 0.2,
                           "DOID:4 increases driven by 0014667")
        self.assertEqual(rr1.data["DOID:11044"], 0.1,
                         "low raw DOID:4 propagates down")
コード例 #23
0
 def test_underflow(self):
     """attempt to get underflow in individual p."""  
     
     # let model have very sure values
     model = Representation(name="underflow")
     model.set("Y:007", 0.00001).set("Y:004", 1).set("Y:003", 1)
     # let ref universe have two annotations and one null
     refA = Representation(name="refA").set("Y:003", 1)        
     refB = Representation(name="refB").set("Y:003", 1)        
     rs = ReferenceSet(dict(null=0.98, refA=0.001, refB=0.001), 
                       ids=self.obo.ids())
     rs.add(refA).add(refB)
     rs.learn_obo(self.obo)
     rs.prep()        
             
     result = rs.inference(model, verbose=True)                 
     self.assertGreaterEqual(result["refA"], 0, 
                     msg="must always be a number, even if zero")        
     self.assertGreaterEqual(result["refB"], 0, 
                     msg="must always be a number, even if zero")
     self.assertGreaterEqual(result["refB"], 0, 
                     msg="must always be a number, even if zero")        
コード例 #24
0
    def test_impute_up(self):
        """updating values in representation via positive evidence."""

        rr = Representation(dict())
        rr.set("unrelated", 0.8)
        rr.set("DOID:0014667", 0.4)
        rr.impute(self.obo, self.obodef)

        self.assertEqual(rr.get("unrelated"),
                         0.8,
                         msg="out-of-ontology terms remain")
        self.assertEqual(rr.get("DOID:0014667"),
                         0.4,
                         msg="set value should remain")
        self.assertGreater(rr.get("DOID:4"),
                           0.4,
                           msg="ancestors should receive greater score")
        self.assertEqual(rr.get("DOID:0080015"),
                         0.2,
                         msg="unrelated terms get default")
        self.assertEqual(rr.get("DOID:655"),
                         0.2,
                         msg="children are unaffected")
コード例 #25
0
    def test_subset(self):
        """make a refset smaller by ignoring some features."""

        # create a reference set
        rs = ReferenceSet(OrderedDict(refA=0.5, refB=0.5),
                          ids=testfeatures,
                          row_priors=zerovals)

        # add some data to the two representations
        r1 = Representation(name="refA")
        r1.set("a", 0.1).set("b", 0.2).set("c", 0.3).set("d", 0.4)
        r2 = Representation(name="refB")
        r2.set("c", 0.6).set("d", 0.7).set("e", 0.8)
        rs.add(r1).add(r2)
        # manually create arrays with the reference set data
        expected_raw_A = [0.1, 0.2, 0.3, 0.4, 0.0]
        expected_raw_B = [0.0, 0.0, 0.6, 0.7, 0.8]
        self.assertTrue(list(rs.data[0]), expected_raw_A)
        self.assertTrue(list(rs.data[1]), expected_raw_B)
        # subset to a smaller number of features
        # myids - here c is repeated twice, z is not in the original features
        myids = ["e", "c", "a", "z", "c"]
        rs = ReferenceMatrix(rs, myids)
        # check new shape (three features and two references)
        self.assertEqual(len(rs.rows), 3)
        self.assertEqual(len(rs.row_names), 3)
        self.assertEqual(rs.data.shape, (3, 2))
        # check that the relevant rows are present
        result = set(rs.rows.keys())
        expected = set(myids)
        expected.remove("z")
        self.assertEqual(result, expected)
        # check data subset in output
        output_A = [0.1, 0.3, 0.0]
        output_B = [0.0, 0.6, 0.8]
        self.assertEqual(sum(rs.data[:, 0]), sum(output_A))
        self.assertEqual(sum(rs.data[:, 1]), sum(output_B))
コード例 #26
0
    def test_inference_chain(self):
        """compute an inference chain."""

        # create a reference set
        refA = Representation(name="refA")
        refA.set("Y:002", 1).impute(Yobo, Ydefaults)
        refB = Representation(name="refB")
        refB.set("Y:001", 1).impute(Yobo, Ydefaults)
        rs = ReferenceSet(dict(refA=0.5, refB=0.5),
                          ids=Yobo.ids(),
                          row_priors=Ydefaults)
        rs.add(refA).add(refB)
        rs.learn_obo(Yobo)

        # compute a chain object explaining scoring steps
        chain = rs.inference_chain(refA, "refB", verbose=True)
        self.assertEqual(chain.__dict__["model"], "refA")
        self.assertEqual(chain.__dict__["reference"], "refB")
        self.assertGreater(len(chain.data), 2,
                           "data chain should describe multiple features")
        self.assertTrue("background" in chain.data[0].__dict__,
                        "chain data have comparison information")
        self.assertTrue("result" in chain.data[0].__dict__,
                        "chain data have TP/FP/etc codes")
コード例 #27
0
    def test_FP_with_fp_penalty(self):
        """FP increases more with lower fp_penalty"""

        # make a new reference set with different priors
        priors2 = self.priors.copy()
        priors2["Y:003"] = 0.4
        priors2["Y:002"] = 0.15
        priors2["Y:007"] = 0.1
        rs2 = ReferenceSet(dict(null=0.4, ref=0.3),
                           ids=Yobo.ids(),
                           row_priors=priors2)
        ref = Representation(name="ref")
        ref.set("Y:001", 1).impute(Yobo, priors2)
        ref.set("Y:007", priors2["Y:007"] / 2)
        rs2.add(self.refnull).add(ref)
        rs2.learn_obo(Yobo)

        FP = Representation(name="model").set("Y:007", 0.35)
        chain1 = rs2.inference_chain(FP, "ref", verbose=True, fp_penalty=0.1)
        chain1.evaluate_inference()
        self.assertGreater(chain1.posterior, chain1.prior)
        chain2 = rs2.inference_chain(FP, "ref", verbose=True, fp_penalty=1)
        chain2.evaluate_inference()
        self.assertLess(chain2.posterior, chain1.posterior)
コード例 #28
0
class ReferenceMatrixTests(unittest.TestCase):
    """Test cases for computing average representation of neighbors"""
    def setUp(self):
        """prepare a reference set with a few references."""

        self.refnull = Representation(data=null_defaults, name="null")
        self.refA = Representation(data=Ydefaults, name="refA")
        self.refA.set("Y:004", 1)
        self.refB = Representation(data=Ydefaults, name="refB")
        self.refB.set("Y:004", 1).set("Y:001", 0.6).set("Y:003", 0.5)
        self.refC = Representation(data=Ydefaults, name="refC")
        self.refC.set("Y:004", 1).set("Y:001", 0.5)
        self.refD = Representation(data=Ydefaults, name="refD")
        self.refD.set("Y:004", 0.1)
        ref_priors = dict(null=0.1, refA=0.1, refB=0.1, refC=0.1, refD=0.1)
        rs = ReferenceSet(ref_priors, ids=Ydefaults.keys())
        rs.add(self.refnull)
        rs.add(self.refA).add(self.refB)
        rs.add(self.refC).add(self.refD)
        self.rm = ReferenceMatrix(rs, list(Ydefaults.keys()))

    def test_neighbors_cosine(self):
        """identify neighboring/similar references."""

        indexes = dict()
        for _ in ["refA", "refB", "refC", "refD"]:
            indexes[_] = self.rm.columns[_]

        result = self.rm.nearest_neighbors("refC", 2)
        expected = ["refB", "refA"]
        self.assertEqual(result, expected)

    def test_average(self):
        """compute an average of several representations."""

        result = self.rm.get_average(["refB", "refC"])
        self.assertAlmostEqual(result.get("Y:004"), 1.0)
        self.assertAlmostEqual(result.get("Y:001"), 0.55)
        self.assertAlmostEqual(result.get("Y:007"), Ydefaults["Y:007"])
コード例 #29
0
    def test_set_feature(self):
        """can set and retrieve values"""

        rr = Representation()
        rr.set("abc", 0.2)
        self.assertEqual(rr.get("abc"), 0.2)