예제 #1
0
    def test_scores_bad_input(self):
        """inference function should raise with bad input."""

        # model is empty
        rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=obo.ids())
        with self.assertRaises(Exception):
            rs.inference("refA")
예제 #2
0
    def test_add_without_name_raises(self):
        """adding a representation without a name raises exceptions."""

        r1 = Representation().set("DOID:0014667", 0.4)
        rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=obo.ids())
        with self.assertRaises(Exception):
            rs.add(r1)
예제 #3
0
    def setUp(self):
        """prepare a reference set with some data."""

        # let ref universe have two annotations and one null
        refA = Representation(data=dict(a=1, b=0.8), name="refA")
        refA.defaults(zerovals)
        refB = Representation(data=dict(a=1, d=0.2), name="refB")
        refB.defaults(zerovals)
        self.rs = ReferenceSet(dict(null=0.7, refA=0.15, refB=0.15),
                               ids=zerovals.keys())
        self.rs.add(refA).add(refB)
        self.rs.prep()
예제 #4
0
def dict2referenceset(repdict, feature_ids, priors):
    """create a representation set, using imputation

    :param repdict: a dictionary of Representation objects
    :param feature_ids: list with all features
    :param priors: dict linking name to a prior probability
    :return: ReferenceSet object
    """
    
    result = ReferenceSet(priors, feature_ids)    
    for id, representation in repdict.items():        
        result.add(representation)
    return result
예제 #5
0
    def test_get_reference(self):
        """extract one reference from a representation set."""

        r1 = Representation(name="refA").set("DOID:0014667", 0.4)
        r1.impute(obo, obodefaults)
        r2 = Representation(name="refB").set("DOID:0080015", 0.6)
        r2.impute(obo, obodefaults)

        rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=obo.ids())
        rs.add(r1).add(r2)

        r3 = rs.get_representation("refA")
        self.assertTrue(r3.equal(r1))
예제 #6
0
    def test_positive_parent_multi(self):
        """fetching a parent term when terms have multiple parents."""

        # load an ontology in which Y7 is connected to both Y2 and Y1
        Yfile = join(testdir, "Ymulti.obo")
        Yobo = MinimalObo(Yfile)
        Ydefaults = dict.fromkeys(Yobo.ids(), 0.0001)
        Ydefaults["Y:003"] = 0.0002
        Ydefaults["Y:005"] = 0.0002
        # make slight variations of representations
        rs = ReferenceSet(dict(refA=0.5, refB=0.5),
                          ids=Yobo.ids(),
                          row_priors=Ydefaults)
        refA = Representation(name="refA")
        refA.set("Y:002", 0.5).set("Y:005", 1).impute(Yobo, Ydefaults)
        refB = Representation(name="refB")
        refB.set("Y:001", 0.5).impute(Yobo, Ydefaults)
        rs.add(refA).add(refB)
        rs.learn_obo(Yobo)

        self.assertEqual(
            rs._positive_ancestor(rs.columns["refA"], rs.rows["Y:007"]),
            rs.rows["Y:002"], "Y2 is a positive ancestor")
        self.assertEqual(
            rs._positive_ancestor(rs.columns["refB"], rs.rows["Y:007"]),
            rs.rows["Y:001"], "Y1 is a positive immediate parent")
예제 #7
0
 def test_between2(self):
     """inference when model equally similar to two refs"""  
     
     # let ref universe have two annotations
     rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=self.obo.ids(),
                       row_priors=self.obodefaults)
     rs.add(self.refA).add(self.refB)
     rs.learn_obo(self.obo)
     rs.prep()
     
     inf = rs.inference(self.y3model)
     self.assertAlmostEqual(inf["refA"], inf["refB"], msg="equally likely")        
예제 #8
0
    def setUpClass(cls):
        # create with sibling diseases Y:002 and Y:001 are siblings
        # set some phenotype priors that are nonzero
        cls.priors = dict()
        cls.priors["Y:004"] = 0.66
        cls.priors["Y:005"] = cls.priors["Y:006"] = 0.25
        cls.priors["Y:003"] = 0.66
        cls.priors["Y:001"] = cls.priors["Y:002"] = 0.33
        cls.priors["Y:007"] = cls.priors["Y:008"] = 0.25

        cls.refnull = Representation(name="null")
        # refA has a negative phenotype
        cls.refA = Representation(name="refA")
        cls.refA.set("Y:002", 0.1).impute(Yobo, cls.priors)
        # refB has a negative and positive phenotypes
        cls.refB = Representation(name="refB")
        cls.refB.set("Y:001", 0.01).set("Y:006", 0.8).impute(Yobo, cls.priors)
        # refB2 has a weaker positive phenotype
        cls.refB2 = Representation(name="refB2")
        cls.refB2.set("Y:001", 0.1).set("Y:006", 0.5).impute(Yobo, cls.priors)
        cls.rs = ReferenceSet(dict(null=0.4, refA=0.3, refB=0.3, refB2=0.3),
                              ids=Yobo.ids(),
                              row_priors=cls.priors)
        cls.rs.add(cls.refnull).add(cls.refA).add(cls.refB).add(cls.refB2)
        cls.rs.learn_obo(Yobo)
예제 #9
0
 def setUpClass(cls):
     # set some phenotype priors that are nonzero
     cls.priors = dict()
     cls.priors["Y:004"] = 0.66
     cls.priors["Y:005"] = cls.priors["Y:006"] = 0.25
     cls.priors["Y:003"] = 0.66
     cls.priors["Y:001"] = cls.priors["Y:002"] = 0.33
     cls.priors["Y:007"] = cls.priors["Y:008"] = 0.25
     # create reference set with some strong phenotypes
     cls.refnull = Representation(name="null")
     cls.refA = Representation(name="refA")
     cls.refB = Representation(name="refB")
     cls.refA.set("Y:002", 1).impute(Yobo, cls.priors)
     cls.refB = Representation(name="refB")
     cls.refB.set("Y:001", 1).impute(Yobo, cls.priors)
     # reset missing phenotypes to smaller-than-prior
     for k, v in cls.priors.items():
         if cls.refA.get(k) == v:
             cls.refA.set(k, v / 2)
         if cls.refB.get(k) == v:
             cls.refB.set(k, v / 2)
     cls.rs = ReferenceSet(dict(null=0.3, refA=0.3, refB=0.3),
                           ids=Yobo.ids(),
                           row_priors=cls.priors)
     cls.rs.add(cls.refnull).add(cls.refA).add(cls.refB)
     cls.rs.learn_obo(Yobo)
예제 #10
0
 def test_between_refs_and_null(self):
     """inference when model is similar to two refs and a there is a null"""  
     
     # let ref universe have two annotations and one null
     rs = ReferenceSet(dict(null=0.8, refA=0.15, refB=0.15), 
                       ids=self.obo.ids(), row_priors=self.obodefaults)
     rs.add(self.refA).add(self.refB)        
     rs.learn_obo(self.obo)
     rs.prep()
     
     inf = rs.inference(self.y3model)            
     self.assertAlmostEqual(inf["refA"], inf["refB"], 
                            msg="equally likely")
예제 #11
0
    def test_empty_representation(self):
        """creating a new set of references."""

        rs = ReferenceSet(dict(refA=0.5, refB=1), ids=obo.ids())
        num_ids = len(obo.ids())
        self.assertEqual(len(rs.data), 2, "refset should allocate memory")
        self.assertEqual(len(rs.data[0]), num_ids)
        self.assertEqual(len(rs.data[1]), num_ids)
예제 #12
0
    def test_str(self):
        """getting a quick string with the content."""

        rs = ReferenceSet(dict(null=0.7, refA=0.15, refB=0.15),
                          ids=zerovals.keys())
        result = str(rs)
        self.assertTrue("refA" in result)
        self.assertFalse("hello" in result)
예제 #13
0
 def test_difference_in_priors(self):
     """inference when model matches two references, 
     but have different priors"""  
     
     # let ref universe have two annotations and one null
     rs = ReferenceSet(dict(null=0.85, refA=0.05, refB=0.1), 
                       ids=self.obo.ids(), row_priors=self.obodefaults)
     rs.add(self.refA).add(self.refB)
     rs.learn_obo(self.obo)
     rs.prep()
     
     inf = rs.inference(self.y3model)             
     self.assertLess(inf["refA"], inf["refB"], 
                     msg="equal match, but A has weaker prior")
예제 #14
0
 def test_model_nodata(self):
     """inference when references are unequal but model has no data"""  
     
     model = Representation(name="nodata")
     # let ref universe have two annotations and one null
     rs = ReferenceSet(dict(null=0.8, refA=0.1, refB=0.1), 
                       ids=self.obo.ids(), row_priors=self.obodefaults)
     rs.add(self.refA).add(self.refB)
     rs.learn_obo(self.obo)
     rs.prep()
     
     inf = rs.inference(model)             
     self.assertAlmostEqual(inf["refA"], inf["refB"], 
                            msg="equally likely")
예제 #15
0
    def test_positive_parent(self):
        """fetching a parent term that has a positive value."""

        rs = ReferenceSet(dict(refA=0.5), ids=Yobo.ids(), row_priors=Ydefaults)
        refA = Representation(name="refA")
        refA.set("Y:002", 0.5).set("Y:005", 1).impute(Yobo, Ydefaults)
        rs.add(refA)
        rs.learn_obo(Yobo)

        refAindex = rs.columns["refA"]
        self.assertEqual(rs._positive_ancestor(refAindex, rs.rows["Y:002"]),
                         rs.rows["Y:002"], "Y2 is itself is positive")
        self.assertEqual(rs._positive_ancestor(refAindex, rs.rows["Y:007"]),
                         rs.rows["Y:002"], "Y2 is immediate parent of Y7")
        self.assertEqual(rs._positive_ancestor(refAindex, rs.rows["Y:006"]),
                         rs.rows["Y:005"], "Y5 is immediate parent of Y6")
예제 #16
0
 def test_baddata(self):
     """inference should raise when input is bad"""  
     
     rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=["a", "b", "c"])
     rs.prep()        
     with self.assertRaises(Exception) as e:
         rs.inference(5)
예제 #17
0
    def test_prep_row_priors(self):
        """prepare row priors."""

        # let ref universe have two annotations and one null
        refA = Representation(data=dict(a=1), name="refA")
        refA.defaults(zerovals)
        refB = Representation(data=dict(a=1, b=0.8), name="refB")
        refB.defaults(zerovals)
        rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=zerovals.keys())
        rs.add(refA).add(refB)
        # compute feature priors
        rs.prep()
        # row_priors should gain key/values for all features
        expected_features = set(zerovals.keys())
        self.assertEqual(set(rs.row_names), expected_features)
        # features declared in representations should get reasonable priors
        a_index = rs.rows["a"]
        b_index = rs.rows["b"]
        d_index = rs.rows["d"]
        self.assertEqual(rs.row_priors[a_index], 1,
                         "refA and refB both have a")
        self.assertEqual(rs.row_priors[b_index], 0.4,
                         "only refB has b, so 0.8/2")
        self.assertEqual(rs.row_priors[d_index], 0.2,
                         "value is 1/num features")
예제 #18
0
    def test_learn_from_obo(self):
        """create parents_of tuples for all features"""

        r1 = Representation(name="refA").set("DOID:0014667", 0.4)
        rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=obo.ids())
        rs.add(r1)
        self.assertEqual(rs.parents, None)
        rs.learn_obo(obo)
        self.assertEqual(len(rs.parents), len(obo.ids()))
예제 #19
0
    def test_subset(self):
        """make a refset smaller by ignoring some features."""

        # create a reference set
        rs = ReferenceSet(OrderedDict(refA=0.5, refB=0.5),
                          ids=testfeatures,
                          row_priors=zerovals)

        # add some data to the two representations
        r1 = Representation(name="refA")
        r1.set("a", 0.1).set("b", 0.2).set("c", 0.3).set("d", 0.4)
        r2 = Representation(name="refB")
        r2.set("c", 0.6).set("d", 0.7).set("e", 0.8)
        rs.add(r1).add(r2)
        # manually create arrays with the reference set data
        expected_raw_A = [0.1, 0.2, 0.3, 0.4, 0.0]
        expected_raw_B = [0.0, 0.0, 0.6, 0.7, 0.8]
        self.assertTrue(list(rs.data[0]), expected_raw_A)
        self.assertTrue(list(rs.data[1]), expected_raw_B)
        # subset to a smaller number of features
        # myids - here c is repeated twice, z is not in the original features
        myids = ["e", "c", "a", "z", "c"]
        rs = ReferenceMatrix(rs, myids)
        # check new shape (three features and two references)
        self.assertEqual(len(rs.rows), 3)
        self.assertEqual(len(rs.row_names), 3)
        self.assertEqual(rs.data.shape, (3, 2))
        # check that the relevant rows are present
        result = set(rs.rows.keys())
        expected = set(myids)
        expected.remove("z")
        self.assertEqual(result, expected)
        # check data subset in output
        output_A = [0.1, 0.3, 0.0]
        output_B = [0.0, 0.6, 0.8]
        self.assertEqual(sum(rs.data[:, 0]), sum(output_A))
        self.assertEqual(sum(rs.data[:, 1]), sum(output_B))
예제 #20
0
def get_modelsets(dbpath, obo, partition_size=4096):
    """create ReferenceSets objects with general and specific phenotypes

    :param dbpath: path to phenoscoring db
    :param config: dictionary configuration settings
    :param obo: object with ontology
    :return: array of ReferenceSets objects, each with a subset of models
    """

    model_names = get_model_names(dbpath)
    if len(model_names) == 0:
        return []

    # partition models into chunks
    model_groups = [[]]
    for m in model_names:
        group = model_groups[-1]
        if len(group) >= partition_size:
            model_groups.append([])
            group = model_groups[-1]
        group.append(m)

    # load all model information from database
    models = get_model_representations(dbpath, obo)
    phen_priors = get_phenotype_priors(dbpath)

    # transfer into small-sized reference sets
    result = []
    for group in model_groups:
        packet_priors = dict.fromkeys(group, 1 / len(model_names))
        refset = ReferenceSet(packet_priors, obo.ids())
        for m in group:
            model = models.pop(m)
            model.impute(obo, phen_priors)
            refset.add(model)
        result.append(refset)
    return result
예제 #21
0
    def test_add_raises(self):
        """adding an unexpected piece of data raises exceptions."""

        rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=obo.ids())
        with self.assertRaises(Exception):
            rs.add(5)

        with self.assertRaises(Exception):
            rs.add(dict.fromkeys(["DOID:0014667", "DOID:0080015"], 0))
예제 #22
0
def get_refsets(dbpath, ref_priors=None, phenotype_priors=None):
    """create ReferenceSets objects with general and specific phenotypes

    :param dbpath: path to phenoscoring db
    :param ref_priors: dictionary with priors for references
        (if None, fetched from db)
    :param phenotype_priors: dictionary with priors for all featurs
        (if None, fetched from db)
    :return: two ReferenceSets objects
    """

    # at first create just a dictionary of representations
    general_dict, specific_dict = dict(), dict()

    if phenotype_priors is None:
        phenotype_priors = get_phenotype_priors(dbpath)
    if ref_priors is None:
        ref_priors = get_ref_priors(dbpath)

    # create empty Representations for each reference
    nullrep = get_complete_null(dbpath)
    phenotypes = nullrep.keys()
    for id in ref_priors.keys():
        general_dict[id] = nullrep.copy(name=id)
        specific_dict[id] = nullrep.copy(name=id)

    # fill the representations with values
    phentab = ReferenceCompletePhenotypeTable(dbpath)
    if len(ref_priors) == 1:
        refname = list(ref_priors.keys())[0]
        generator = DBGenerator(phentab, where=dict(id=refname))
    else:
        generator = DBGenerator(phentab)
    for row in generator.next():
        id, phen = row["id"], row["phenotype"]
        if id in ref_priors:
            general_dict[id].set(phen, row["value"])
            specific_dict[id].set(phen, row["specific_value"])

    # transfer representations into ReferenceSets
    general = ReferenceSet(ref_priors, phenotypes, phenotype_priors)
    specific = ReferenceSet(ref_priors, phenotypes, phenotype_priors)
    for refid in general_dict.keys():
        general.add(general_dict[refid])
        specific.add(specific_dict[refid])

    return general, specific
예제 #23
0
 def test_underflow(self):
     """attempt to get underflow in individual p."""  
     
     # let model have very sure values
     model = Representation(name="underflow")
     model.set("Y:007", 0.00001).set("Y:004", 1).set("Y:003", 1)
     # let ref universe have two annotations and one null
     refA = Representation(name="refA").set("Y:003", 1)        
     refB = Representation(name="refB").set("Y:003", 1)        
     rs = ReferenceSet(dict(null=0.98, refA=0.001, refB=0.001), 
                       ids=self.obo.ids())
     rs.add(refA).add(refB)
     rs.learn_obo(self.obo)
     rs.prep()        
             
     result = rs.inference(model, verbose=True)                 
     self.assertGreaterEqual(result["refA"], 0, 
                     msg="must always be a number, even if zero")        
     self.assertGreaterEqual(result["refB"], 0, 
                     msg="must always be a number, even if zero")
     self.assertGreaterEqual(result["refB"], 0, 
                     msg="must always be a number, even if zero")        
예제 #24
0
    def test_FP_can_increase(self):
        """FP can in principle yield greater score"""

        # make a new reference set with different priors
        priors2 = self.priors.copy()
        priors2["Y:002"] = 0.1
        rs2 = ReferenceSet(dict(null=0.4, refA=0.3, refB=0.3),
                           ids=Yobo.ids(),
                           row_priors=priors2)
        rs2.add(self.refnull).add(self.refA).add(self.refB)
        rs2.learn_obo(Yobo)

        FP = Representation(name="model").set("Y:002", 0.2)
        chain = rs2.inference_chain(FP, "refB", verbose=True, fp_penalty=1)
        chain.evaluate_inference()
        self.assertGreater(chain.posterior, chain.prior)
예제 #25
0
    def test_add_incrementally(self):
        """transferring values into a representation set."""

        r1 = Representation(name="refA").set("DOID:0014667", 0.4)
        r1.impute(obo, obodefaults)

        r2 = Representation(name="refB").set("DOID:0080015", 0.6)
        r2.impute(obo, obodefaults)

        rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=obo.ids())
        rs.add(r1).add(r2)

        self.assertEqual(rs.get("DOID:0014667", "refA"), 0.4,
                         "refset should contain inserted data")
        self.assertEqual(rs.get("DOID:0080015", "refB"), 0.6,
                         "refset should contain inserted data")
        self.assertEqual(rs.get("DOID:4", "refB"), 0.6,
                         "refset should contain imputed data")
예제 #26
0
    def test_TP_scores_better_than_FP(self):
        """FPs must score lower than TPs"""

        # make a new reference set with different (lower) priors
        priors2 = self.priors.copy()
        priors2["Y:002"] = 0.55
        priors2["Y:007"] = 0.15
        rs2 = ReferenceSet(dict(null=0.4, refA=0.3, refB=0.3),
                           ids=Yobo.ids(),
                           row_priors=priors2)
        rs2.add(self.refnull).add(self.refA).add(self.refB)
        rs2.learn_obo(Yobo)

        # compare with refA, which has Y:002 equal to 1
        FP = Representation(name="FP").set("Y:007", 1)
        chain_FP = rs2.inference_chain(FP, "refA", verbose=True, fp_penalty=2)
        chain_FP.evaluate_inference()
        TP = Representation(name="TP").set("Y:002", 1)
        chain_TP = rs2.inference_chain(TP, "refA", verbose=True)
        chain_TP.evaluate_inference()

        self.assertGreaterEqual(chain_TP.posterior, chain_FP.posterior)
예제 #27
0
    def test_FP_with_fp_penalty(self):
        """FP increases more with lower fp_penalty"""

        # make a new reference set with different priors
        priors2 = self.priors.copy()
        priors2["Y:003"] = 0.4
        priors2["Y:002"] = 0.15
        priors2["Y:007"] = 0.1
        rs2 = ReferenceSet(dict(null=0.4, ref=0.3),
                           ids=Yobo.ids(),
                           row_priors=priors2)
        ref = Representation(name="ref")
        ref.set("Y:001", 1).impute(Yobo, priors2)
        ref.set("Y:007", priors2["Y:007"] / 2)
        rs2.add(self.refnull).add(ref)
        rs2.learn_obo(Yobo)

        FP = Representation(name="model").set("Y:007", 0.35)
        chain1 = rs2.inference_chain(FP, "ref", verbose=True, fp_penalty=0.1)
        chain1.evaluate_inference()
        self.assertGreater(chain1.posterior, chain1.prior)
        chain2 = rs2.inference_chain(FP, "ref", verbose=True, fp_penalty=1)
        chain2.evaluate_inference()
        self.assertLess(chain2.posterior, chain1.posterior)
예제 #28
0
    def setUp(self):
        """prepare a reference set with a few references."""

        self.refnull = Representation(data=null_defaults, name="null")
        self.refA = Representation(data=Ydefaults, name="refA")
        self.refA.set("Y:004", 1)
        self.refB = Representation(data=Ydefaults, name="refB")
        self.refB.set("Y:004", 1).set("Y:001", 0.6).set("Y:003", 0.5)
        self.refC = Representation(data=Ydefaults, name="refC")
        self.refC.set("Y:004", 1).set("Y:001", 0.5)
        self.refD = Representation(data=Ydefaults, name="refD")
        self.refD.set("Y:004", 0.1)
        ref_priors = dict(null=0.1, refA=0.1, refB=0.1, refC=0.1, refD=0.1)
        rs = ReferenceSet(ref_priors, ids=Ydefaults.keys())
        rs.add(self.refnull)
        rs.add(self.refA).add(self.refB)
        rs.add(self.refC).add(self.refD)
        self.rm = ReferenceMatrix(rs, list(Ydefaults.keys()))
예제 #29
0
    def test_inference_chain(self):
        """compute an inference chain."""

        # create a reference set
        refA = Representation(name="refA")
        refA.set("Y:002", 1).impute(Yobo, Ydefaults)
        refB = Representation(name="refB")
        refB.set("Y:001", 1).impute(Yobo, Ydefaults)
        rs = ReferenceSet(dict(refA=0.5, refB=0.5),
                          ids=Yobo.ids(),
                          row_priors=Ydefaults)
        rs.add(refA).add(refB)
        rs.learn_obo(Yobo)

        # compute a chain object explaining scoring steps
        chain = rs.inference_chain(refA, "refB", verbose=True)
        self.assertEqual(chain.__dict__["model"], "refA")
        self.assertEqual(chain.__dict__["reference"], "refB")
        self.assertGreater(len(chain.data), 2,
                           "data chain should describe multiple features")
        self.assertTrue("background" in chain.data[0].__dict__,
                        "chain data have comparison information")
        self.assertTrue("result" in chain.data[0].__dict__,
                        "chain data have TP/FP/etc codes")
예제 #30
0
 def test_checkprep(self):
     """inference only works when set is prepped"""  
                                     
     rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=self.obo.ids())
     with self.assertRaises(Exception):
         rs.inferenceModel(self.y3model)