Ejemplo n.º 1
0
def get_raw_references(datapath, phenotype_set):
    """Parse a phenotype file and collect descriptions and raw phenotypes  
    
    raw phenotypes are phenotypes in the original ontology
    
    Args:
        datapath       path to phenotab file
        phenotype_set  set of acceptable phenotypes
    
    Returns:
        two objects
        - dict mapping reference codes to reference descriptions and phenotypes
        - set of phenotypes that could not be mapped
    """

    badphenotypes = set()
    references = dict()
    with open_file(datapath, "rt") as f:
        reader = csv.DictReader(f, delimiter="\t", quotechar="\"")
        for row in reader:
            if not valid_reference_id(row["Reference"]):
                continue
            phenotype = row["Phenotype"]
            if phenotype not in phenotype_set:
                badphenotypes.add(phenotype)
                continue
            rowval = tofreq[row["Frequency"]]
            id = row["Source"] + ":" + row["Disease_number"]
            if id not in references:
                references[id] = Representation(name=id)
                references[id].title = row["Disease_title"]
            references[id].set(phenotype, rowval)

    return references, badphenotypes
Ejemplo n.º 2
0
    def test_impute_fromseeds_highfirst(self):
        """imputing values from manually-specified seeds."""

        rr = Representation(dict())
        # specify data for two children, DOID:4 is higher in tree, so should gain
        rr.set("DOID:0014667", 0.4)
        rr.set("DOID:0080015", 0.3)

        rr.impute(self.obo,
                  self.obodef,
                  seeds=["DOID:0014667", "DOID:0080015"])

        self.assertAlmostEqual(rr.get("DOID:0014667"),
                               0.4,
                               msg="should remain")
        self.assertAlmostEqual(rr.get("DOID:0080015"),
                               0.3,
                               msg="should remain")
        self.assertAlmostEqual(
            rr.get("DOID:4"),
            1 - ((1 - 0.4) * (1 - 0.3) * (1 - 0.2)),
            msg="ancestor gains from two children (and prior)")
        self.assertAlmostEqual(rr.get("DOID:655"),
                               0.2,
                               msg="remain; new DOID:4")
Ejemplo n.º 3
0
 def setUpClass(cls):
     """For setup, ensure db does not exist."""
                     
     config = CompleteTestConfig()
     config.null_prior = 0.2
     cls.dbfile = config.db        
     cls.pipeline = Phenoscoring(config)
     cls.pipeline.build()
     obopath = check_file(config.obo, config.db, "obo")
     cls.obo = MinimalObo(obopath, True)
     
     # a dummy set of default values
     cls.obodefaults = dict.fromkeys(cls.obo.ids(), 0.2)
     cls.obozeros = dict.fromkeys(cls.obo.ids(), 0)
     
     cls.ref_priors = get_ref_priors(config.db)
     cls.rs, cls.rs2 = get_refsets(config.db, ref_priors=cls.ref_priors)
     cls.rs.learn_obo(cls.obo)
     cls.rs2.learn_obo(cls.obo)
     
     # for testing individual configurations
     cls.y3model = Representation(name="Y3").set("Y:003", 0.8)        
     cls.refA = Representation(name="refA").set("Y:002", 1)
     cls.refA.defaults(cls.obozeros)
     cls.refB = Representation(name="refB").set("Y:002", 1)
     cls.refB.defaults(cls.obozeros)
Ejemplo n.º 4
0
    def setUpClass(cls):
        # create with sibling diseases Y:002 and Y:001 are siblings
        # set some phenotype priors that are nonzero
        cls.priors = dict()
        cls.priors["Y:004"] = 0.66
        cls.priors["Y:005"] = cls.priors["Y:006"] = 0.25
        cls.priors["Y:003"] = 0.66
        cls.priors["Y:001"] = cls.priors["Y:002"] = 0.33
        cls.priors["Y:007"] = cls.priors["Y:008"] = 0.25

        cls.refnull = Representation(name="null")
        # refA has a negative phenotype
        cls.refA = Representation(name="refA")
        cls.refA.set("Y:002", 0.1).impute(Yobo, cls.priors)
        # refB has a negative and positive phenotypes
        cls.refB = Representation(name="refB")
        cls.refB.set("Y:001", 0.01).set("Y:006", 0.8).impute(Yobo, cls.priors)
        # refB2 has a weaker positive phenotype
        cls.refB2 = Representation(name="refB2")
        cls.refB2.set("Y:001", 0.1).set("Y:006", 0.5).impute(Yobo, cls.priors)
        cls.rs = ReferenceSet(dict(null=0.4, refA=0.3, refB=0.3, refB2=0.3),
                              ids=Yobo.ids(),
                              row_priors=cls.priors)
        cls.rs.add(cls.refnull).add(cls.refA).add(cls.refB).add(cls.refB2)
        cls.rs.learn_obo(Yobo)
Ejemplo n.º 5
0
 def setUpClass(cls):
     # set some phenotype priors that are nonzero
     cls.priors = dict()
     cls.priors["Y:004"] = 0.66
     cls.priors["Y:005"] = cls.priors["Y:006"] = 0.25
     cls.priors["Y:003"] = 0.66
     cls.priors["Y:001"] = cls.priors["Y:002"] = 0.33
     cls.priors["Y:007"] = cls.priors["Y:008"] = 0.25
     # create reference set with some strong phenotypes
     cls.refnull = Representation(name="null")
     cls.refA = Representation(name="refA")
     cls.refB = Representation(name="refB")
     cls.refA.set("Y:002", 1).impute(Yobo, cls.priors)
     cls.refB = Representation(name="refB")
     cls.refB.set("Y:001", 1).impute(Yobo, cls.priors)
     # reset missing phenotypes to smaller-than-prior
     for k, v in cls.priors.items():
         if cls.refA.get(k) == v:
             cls.refA.set(k, v / 2)
         if cls.refB.get(k) == v:
             cls.refB.set(k, v / 2)
     cls.rs = ReferenceSet(dict(null=0.3, refA=0.3, refB=0.3),
                           ids=Yobo.ids(),
                           row_priors=cls.priors)
     cls.rs.add(cls.refnull).add(cls.refA).add(cls.refB)
     cls.rs.learn_obo(Yobo)
Ejemplo n.º 6
0
    def test_bg_nonverbose(self):
        """compute with background value leaves score the same."""

        neutral = Representation(name="TP")
        neutral.set("Y:004", self.priors["Y:004"])
        chain = self.rs.inference_chain(neutral, "refA", verbose=False)
        chain.evaluate()
        self.assertEqual(chain.posterior, chain.prior)
Ejemplo n.º 7
0
def get_complete_null(dbpath):
    """create a complete representation for the null reference"""

    result = Representation(name="null")
    tab = ReferenceCompletePhenotypeTable(dbpath)
    generator = DBGenerator(tab, where=dict(id="null"))
    for row in generator.next():
        result.set(row["phenotype"], row["value"])
    return result
Ejemplo n.º 8
0
    def test_FP_same_parent(self):
        """various FPs to same parent score the same"""

        # both models are FP using children of a refA phenotype
        FP_close = Representation(name="close").set("Y:007", 0.9)
        FP_far = Representation(name="far").set("Y:008", 0.9)
        chain_close = self.rs.inference_chain(FP_close, "refA", verbose=True)
        chain_far = self.rs.inference_chain(FP_far, "refA", verbose=True)
        chain_close.evaluate()
        chain_far.evaluate()
        self.assertEqual(chain_close.posterior, chain_far.posterior)
Ejemplo n.º 9
0
def get_concise_refdict(dbpath):
    """transfer information on concise reference phenotypes into a dict."""
    
    refdict = dict()
    refdict["null"] = Representation(name="null")
    refgenerator = DBGenerator(ReferenceConcisePhenotypeTable(dbpath))
    for row in refgenerator.next():
        rowid = row["id"]
        if rowid not in refdict:
            refdict[rowid] = Representation(name=rowid)
        refdict[rowid].set(row["phenotype"], row["value"])    
    return refdict
Ejemplo n.º 10
0
    def test_prep_row_priors(self):
        """prepare row priors."""

        # let ref universe have two annotations and one null
        refA = Representation(data=dict(a=1), name="refA")
        refA.defaults(zerovals)
        refB = Representation(data=dict(a=1, b=0.8), name="refB")
        refB.defaults(zerovals)
        rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=zerovals.keys())
        rs.add(refA).add(refB)
        # compute feature priors
        rs.prep()
        # row_priors should gain key/values for all features
        expected_features = set(zerovals.keys())
        self.assertEqual(set(rs.row_names), expected_features)
        # features declared in representations should get reasonable priors
        a_index = rs.rows["a"]
        b_index = rs.rows["b"]
        d_index = rs.rows["d"]
        self.assertEqual(rs.row_priors[a_index], 1,
                         "refA and refB both have a")
        self.assertEqual(rs.row_priors[b_index], 0.4,
                         "only refB has b, so 0.8/2")
        self.assertEqual(rs.row_priors[d_index], 0.2,
                         "value is 1/num features")
Ejemplo n.º 11
0
    def test_positive_parent_multi(self):
        """fetching a parent term when terms have multiple parents."""

        # load an ontology in which Y7 is connected to both Y2 and Y1
        Yfile = join(testdir, "Ymulti.obo")
        Yobo = MinimalObo(Yfile)
        Ydefaults = dict.fromkeys(Yobo.ids(), 0.0001)
        Ydefaults["Y:003"] = 0.0002
        Ydefaults["Y:005"] = 0.0002
        # make slight variations of representations
        rs = ReferenceSet(dict(refA=0.5, refB=0.5),
                          ids=Yobo.ids(),
                          row_priors=Ydefaults)
        refA = Representation(name="refA")
        refA.set("Y:002", 0.5).set("Y:005", 1).impute(Yobo, Ydefaults)
        refB = Representation(name="refB")
        refB.set("Y:001", 0.5).impute(Yobo, Ydefaults)
        rs.add(refA).add(refB)
        rs.learn_obo(Yobo)

        self.assertEqual(
            rs._positive_ancestor(rs.columns["refA"], rs.rows["Y:007"]),
            rs.rows["Y:002"], "Y2 is a positive ancestor")
        self.assertEqual(
            rs._positive_ancestor(rs.columns["refB"], rs.rows["Y:007"]),
            rs.rows["Y:001"], "Y1 is a positive immediate parent")
Ejemplo n.º 12
0
    def test_FN_decrease_scaled_with_ref(self):
        """negative compared to positive decreases score"""

        FN1 = Representation(name="FN1").set("Y:005", 0.01)
        chain_FN1 = self.rs.inference_chain(FN1, "refB", verbose=True)
        chain_FN1.evaluate_inference()
        FN2 = Representation(name="FN2").set("Y:005", 0.01)
        chain_FN2 = self.rs.inference_chain(FN2, "refB2", verbose=True)
        chain_FN2.evaluate_inference()

        # both cases should lead to decrease in score
        self.assertLess(chain_FN1.posterior, chain_FN1.prior)
        self.assertLess(chain_FN2.posterior, chain_FN2.prior)
        # the decrease should be steeper in FN1 because refB positive is stronger
        self.assertLess(chain_FN1.posterior, chain_FN2.posterior)
Ejemplo n.º 13
0
    def test_FN_decrease_scales_with_model(self):
        """negative compared to positive decreases score"""

        FN1 = Representation(name="FN").set("Y:005", 0.01)
        FN2 = Representation(name="FN").set("Y:005", 0.001)
        chain_FN1 = self.rs.inference_chain(FN1, "refB", verbose=True)
        chain_FN1.evaluate_inference()
        chain_FN2 = self.rs.inference_chain(FN2, "refB", verbose=True)
        chain_FN2.evaluate_inference()

        # both models should decrease score
        self.assertLess(chain_FN1.posterior, chain_FN1.prior)
        self.assertLess(chain_FN2.posterior, chain_FN2.prior)
        # second model should decrease more
        self.assertLess(chain_FN2.posterior, chain_FN1.posterior)
Ejemplo n.º 14
0
    def test_FN_can_decrease_score(self):
        """negative compared to positive decreases score"""

        FN = Representation(name="FN").set("Y:005", 0.01)
        chain_FN = self.rs.inference_chain(FN, "refB", verbose=True)
        chain_FN.evaluate_inference()
        self.assertLess(chain_FN.posterior, chain_FN.prior)
Ejemplo n.º 15
0
    def test_AN_leaves_score_unchanged(self):
        """negative compared to prior leaves score unchanged """

        AN = Representation(name="FN").set("Y:005", 0.01)
        chain_AN = self.rs.inference_chain(AN, "refA", verbose=True)
        chain_AN.evaluate_inference()
        self.assertAlmostEqual(chain_AN.posterior, chain_AN.prior)
Ejemplo n.º 16
0
    def test_empty_representation(self):
        """setting and getting from a generic representation."""

        rr = Representation()
        self.assertEqual(len(rr.data), 0, "representation should be empty")
        self.assertEqual(rr.name, None,
                         "representation should not have a name")
Ejemplo n.º 17
0
    def test_add_without_name_raises(self):
        """adding a representation without a name raises exceptions."""

        r1 = Representation().set("DOID:0014667", 0.4)
        rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=obo.ids())
        with self.assertRaises(Exception):
            rs.add(r1)
Ejemplo n.º 18
0
    def test_FP2_with_null(self):
        """FP2 is phenotype with a moderate high prior."""

        FP2 = Representation(name="model").set("Y:002", 0.9)
        chain2 = self.rs.inference_chain(FP2, "null")
        chain2.evaluate()
        self.assertLessEqual(chain2.posterior, chain2.prior)
Ejemplo n.º 19
0
    def test_FP3_with_null(self):
        """FP3 is phenotype with a fairly high prior."""

        FP3 = Representation(name="model").set("Y:003", 0.9)
        chain3 = self.rs.inference_chain(FP3, "null", verbose=True)
        chain3.evaluate()
        self.assertLessEqual(chain3.posterior, chain3.prior)
Ejemplo n.º 20
0
    def test_positive_parent(self):
        """fetching a parent term that has a positive value."""

        rs = ReferenceSet(dict(refA=0.5), ids=Yobo.ids(), row_priors=Ydefaults)
        refA = Representation(name="refA")
        refA.set("Y:002", 0.5).set("Y:005", 1).impute(Yobo, Ydefaults)
        rs.add(refA)
        rs.learn_obo(Yobo)

        refAindex = rs.columns["refA"]
        self.assertEqual(rs._positive_ancestor(refAindex, rs.rows["Y:002"]),
                         rs.rows["Y:002"], "Y2 is itself is positive")
        self.assertEqual(rs._positive_ancestor(refAindex, rs.rows["Y:007"]),
                         rs.rows["Y:002"], "Y2 is immediate parent of Y7")
        self.assertEqual(rs._positive_ancestor(refAindex, rs.rows["Y:006"]),
                         rs.rows["Y:005"], "Y5 is immediate parent of Y6")
Ejemplo n.º 21
0
def get_model_representations(dbpath,
                              obo,
                              log=None,
                              log_prefix="",
                              model_names=None):
    """transfer model phenotype data into representations"""

    if model_names is None:
        model_names = get_model_names(dbpath)
    model_names_set = set(model_names)
    result = dict()
    for m in model_names:
        result[m] = Representation(name=m)
    phen_priors = get_phenotype_priors(dbpath)
    generator = DBGenerator(ModelPhenotypeTable(dbpath))
    for row in generator.next():
        m, phenotype = row["id"], obo.canonical(row["phenotype"])
        # avoid cases  - irrelevant model, obsolete phenotype
        if m not in model_names_set:
            continue
        if obo.has(phenotype) and not obo.valid(phenotype):
            phenotype = obo.replaced_by(phenotype)
        if phenotype is None:
            if log is not None:
                msg = "Skipping phenotype " + row["phenotype"]
                msg += " in model " + m
                log(log_prefix + " - " + msg)
            continue
        result[m] = add_data_to_model(result[m], phenotype, row["value"],
                                      row["TPR"], row["FPR"], phen_priors)
    return result
Ejemplo n.º 22
0
    def __init__(self,
                 config,
                 references=None,
                 models=None,
                 log=None,
                 run_msg="Packet "):
        """A runnable class for processing a set of references and models

        :param config: object of class PhenoscoringConfig
        :param references: iterable with reference names
        :param models: iterable with model names
        :param stamp: timestamp for calculations
        :param log: a logging function
        :param run_msg: a template for a status message
        """

        self.config = config
        self.stamp = config.stamp
        self.log = log
        self.run_msg = run_msg
        self.phen_priors = None
        self.ref_priors = None
        self.general_refset = None
        self.specific_refset = None
        self.models = dict()
        self.references = set()

        # setup calculation with references and stub models
        self.references = set(references)
        for id in models:
            self.models[id] = Representation(name=id)
Ejemplo n.º 23
0
    def test_general_representation_get(self):
        """setting and getting from a generic representation."""

        rr = Representation(dict(xyz=0.2))
        rr.set("bob", 0.4)
        rr.set("xyz", 0.3)
        rr.defaults(self.defaults)
        self.assertEqual(rr.get("bob"), 0.4,
                         "value should come from manual input")
        self.assertEqual(rr.get("abc"), 0.1,
                         "value should come from defaults dict")
        self.assertEqual(rr.get("xyz"), 0.3,
                         "value should come from manual override")
Ejemplo n.º 24
0
 def test_specific_term_FP_increases(self):
     """score a specific model, vaguely similar disease should increase."""  
     
     rr = Representation(name="specific").set("Y:008", 0.8)
     inf = self.rs.inference(rr)        
     
     self.assertGreater(inf["DISEASE:3"], inf["DISEASE:1"], 
                        "D3 has Y2, which is close to Y1")
Ejemplo n.º 25
0
 def test_refset2_inference(self):
     """inference based on specific should be smaller than on general."""
     
     rr = Representation(name="custom").set("Y:001", 0.8) 
     inf_general = self.rs.inference(rr)
     inf_specific = self.rs2.inference(rr)
     # Y:001 is shared by DISEASE:1 and DISEASE:2, so specific inf down
     self.assertLess(inf_specific["DISEASE:1"], inf_general["DISEASE:1"])
Ejemplo n.º 26
0
 def test_model_rootonly(self):
     """score a vague model."""  
             
     rr = Representation(name="vague").set("Y:004", 0.8)             
     inf = self.rs.inference(rr)
     
     self.assertAlmostEqual(inf["DISEASE:1"], inf["DISEASE:2"], 
                            "most diseases about the same")
Ejemplo n.º 27
0
    def test_FP7_with_null(self):
        """FP7 is phenotype with a low prior."""

        FP7 = Representation(name="model").set("Y:007", 0.9)
        chain7 = self.rs.inference_chain(FP7, "null", verbose=True)
        chain7.evaluate()
        self.assertLess(chain7.posterior, chain7.prior,
                        "must be strictly lower")
Ejemplo n.º 28
0
    def test_impute_fromseeds_lowfirst(self):
        """imputing values from manually-specified seeds."""

        rr = Representation(dict())
        ## specify an inconsistent set of values, DOID:4 is higher in tree, so cannot
        ## have a lower value than DOID:0014667
        rr.set("DOID:0014667", 0.4).set("DOID:4", 0.1)
        rr.impute(self.obo, self.obodef, seeds=["DOID:4", "DOID:0014667"])
        self.assertEqual(rr.get("DOID:0080015"), 0.1, "child of DOID:4")
        self.assertEqual(rr.get("DOID:655"), 0.1, "child of DOID:4")
Ejemplo n.º 29
0
    def test_general_representation_get2(self):
        """setting and getting from a generic representation."""

        # Similar to previous, but setting defaults before the specifics
        rr = Representation(dict(abc=0.1, xyz=0.2))
        rr.defaults(self.defaults)
        rr.set("bob", 0.4)
        rr.set("xyz", 0.3)
        self.assertEqual(rr.get("bob"), 0.4,
                         "value should come from manual input")
        self.assertEqual(rr.get("abc"), 0.1,
                         "value should come from defaults dict")
        self.assertEqual(rr.get("xyz"), 0.3,
                         "value should come from manual override")
Ejemplo n.º 30
0
    def test_TN_strong_model(self):
        """TN against strong negative ref should increase overall score"""

        # create two models that matches ref phenotypes TN
        TNA = Representation(name="model").set("Y:002", 0.01)
        TNB = Representation(name="model").set("Y:002", 0.001)

        # construct inference chains that give TNs
        chainA = self.rs.inference_chain(TNA, "refA", verbose=True)
        chainA.evaluate_inference()
        chainB = self.rs.inference_chain(TNB, "refA", verbose=True)
        chainB.evaluate_inference()

        # both comparisons should lead to higher scores
        self.assertGreater(chainA.posterior, chainA.prior)
        self.assertGreater(chainB.posterior, chainB.prior)
        # second comparison should end higher because refB is more strong negative
        self.assertGreater(chainB.posterior, chainA.posterior)