Exemple #1
0
    def test_positive_parent_multi(self):
        """fetching a parent term when terms have multiple parents."""

        # load an ontology in which Y7 is connected to both Y2 and Y1
        Yfile = join(testdir, "Ymulti.obo")
        Yobo = MinimalObo(Yfile)
        Ydefaults = dict.fromkeys(Yobo.ids(), 0.0001)
        Ydefaults["Y:003"] = 0.0002
        Ydefaults["Y:005"] = 0.0002
        # make slight variations of representations
        rs = ReferenceSet(dict(refA=0.5, refB=0.5),
                          ids=Yobo.ids(),
                          row_priors=Ydefaults)
        refA = Representation(name="refA")
        refA.set("Y:002", 0.5).set("Y:005", 1).impute(Yobo, Ydefaults)
        refB = Representation(name="refB")
        refB.set("Y:001", 0.5).impute(Yobo, Ydefaults)
        rs.add(refA).add(refB)
        rs.learn_obo(Yobo)

        self.assertEqual(
            rs._positive_ancestor(rs.columns["refA"], rs.rows["Y:007"]),
            rs.rows["Y:002"], "Y2 is a positive ancestor")
        self.assertEqual(
            rs._positive_ancestor(rs.columns["refB"], rs.rows["Y:007"]),
            rs.rows["Y:001"], "Y1 is a positive immediate parent")
Exemple #2
0
 def setUpClass(cls):
     # set some phenotype priors that are nonzero
     cls.priors = dict()
     cls.priors["Y:004"] = 0.66
     cls.priors["Y:005"] = cls.priors["Y:006"] = 0.25
     cls.priors["Y:003"] = 0.66
     cls.priors["Y:001"] = cls.priors["Y:002"] = 0.33
     cls.priors["Y:007"] = cls.priors["Y:008"] = 0.25
     # create reference set with some strong phenotypes
     cls.refnull = Representation(name="null")
     cls.refA = Representation(name="refA")
     cls.refB = Representation(name="refB")
     cls.refA.set("Y:002", 1).impute(Yobo, cls.priors)
     cls.refB = Representation(name="refB")
     cls.refB.set("Y:001", 1).impute(Yobo, cls.priors)
     # reset missing phenotypes to smaller-than-prior
     for k, v in cls.priors.items():
         if cls.refA.get(k) == v:
             cls.refA.set(k, v / 2)
         if cls.refB.get(k) == v:
             cls.refB.set(k, v / 2)
     cls.rs = ReferenceSet(dict(null=0.3, refA=0.3, refB=0.3),
                           ids=Yobo.ids(),
                           row_priors=cls.priors)
     cls.rs.add(cls.refnull).add(cls.refA).add(cls.refB)
     cls.rs.learn_obo(Yobo)
Exemple #3
0
    def test_impute_fromseeds_auto(self):
        """imputing values from automatically-ordered seeds."""

        # specify an inconsistent set of values, DOID:4 is higher in tree, so cannot
        # have a lower value than DOID:0014667
        # However, low DOID:4 can impact on other branches
        rr1 = Representation(dict())
        rr1.set("DOID:0014667", 0.4).set("DOID:4", 0.1)
        rr1.impute(self.obo, self.obodef)

        # auto seeds
        rr2 = Representation(dict())
        rr2.set("DOID:0014667", 0.4).set("DOID:4", 0.1)
        rr2.impute(self.obo, self.obodef)

        # auto seeds, different initial ordering
        rr3 = Representation(dict())
        rr3.set("DOID:4", 0.1).set("DOID:0014667", 0.4)
        rr3.impute(self.obo, self.obodef)

        self.assertTrue(rr1.data == rr2.data,
                        "auto and manual should have same data")
        self.assertTrue(rr2.data == rr3.data,
                        "should be = regardless of input order")

        self.assertGreater(rr1.data["DOID:0014667"], 0.2,
                           "DOID:0014667 increase by direct evidence")
        self.assertGreater(rr1.data["DOID:4"], 0.2,
                           "DOID:4 increases driven by 0014667")
        self.assertEqual(rr1.data["DOID:11044"], 0.1,
                         "low raw DOID:4 propagates down")
 def setUpClass(cls):
     """For setup, ensure db does not exist."""
                     
     config = CompleteTestConfig()
     config.null_prior = 0.2
     cls.dbfile = config.db        
     cls.pipeline = Phenoscoring(config)
     cls.pipeline.build()
     obopath = check_file(config.obo, config.db, "obo")
     cls.obo = MinimalObo(obopath, True)
     
     # a dummy set of default values
     cls.obodefaults = dict.fromkeys(cls.obo.ids(), 0.2)
     cls.obozeros = dict.fromkeys(cls.obo.ids(), 0)
     
     cls.ref_priors = get_ref_priors(config.db)
     cls.rs, cls.rs2 = get_refsets(config.db, ref_priors=cls.ref_priors)
     cls.rs.learn_obo(cls.obo)
     cls.rs2.learn_obo(cls.obo)
     
     # for testing individual configurations
     cls.y3model = Representation(name="Y3").set("Y:003", 0.8)        
     cls.refA = Representation(name="refA").set("Y:002", 1)
     cls.refA.defaults(cls.obozeros)
     cls.refB = Representation(name="refB").set("Y:002", 1)
     cls.refB.defaults(cls.obozeros)
Exemple #5
0
    def test_prep_row_priors(self):
        """prepare row priors."""

        # let ref universe have two annotations and one null
        refA = Representation(data=dict(a=1), name="refA")
        refA.defaults(zerovals)
        refB = Representation(data=dict(a=1, b=0.8), name="refB")
        refB.defaults(zerovals)
        rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=zerovals.keys())
        rs.add(refA).add(refB)
        # compute feature priors
        rs.prep()
        # row_priors should gain key/values for all features
        expected_features = set(zerovals.keys())
        self.assertEqual(set(rs.row_names), expected_features)
        # features declared in representations should get reasonable priors
        a_index = rs.rows["a"]
        b_index = rs.rows["b"]
        d_index = rs.rows["d"]
        self.assertEqual(rs.row_priors[a_index], 1,
                         "refA and refB both have a")
        self.assertEqual(rs.row_priors[b_index], 0.4,
                         "only refB has b, so 0.8/2")
        self.assertEqual(rs.row_priors[d_index], 0.2,
                         "value is 1/num features")
Exemple #6
0
    def setUpClass(cls):
        # create with sibling diseases Y:002 and Y:001 are siblings
        # set some phenotype priors that are nonzero
        cls.priors = dict()
        cls.priors["Y:004"] = 0.66
        cls.priors["Y:005"] = cls.priors["Y:006"] = 0.25
        cls.priors["Y:003"] = 0.66
        cls.priors["Y:001"] = cls.priors["Y:002"] = 0.33
        cls.priors["Y:007"] = cls.priors["Y:008"] = 0.25

        cls.refnull = Representation(name="null")
        # refA has a negative phenotype
        cls.refA = Representation(name="refA")
        cls.refA.set("Y:002", 0.1).impute(Yobo, cls.priors)
        # refB has a negative and positive phenotypes
        cls.refB = Representation(name="refB")
        cls.refB.set("Y:001", 0.01).set("Y:006", 0.8).impute(Yobo, cls.priors)
        # refB2 has a weaker positive phenotype
        cls.refB2 = Representation(name="refB2")
        cls.refB2.set("Y:001", 0.1).set("Y:006", 0.5).impute(Yobo, cls.priors)
        cls.rs = ReferenceSet(dict(null=0.4, refA=0.3, refB=0.3, refB2=0.3),
                              ids=Yobo.ids(),
                              row_priors=cls.priors)
        cls.rs.add(cls.refnull).add(cls.refA).add(cls.refB).add(cls.refB2)
        cls.rs.learn_obo(Yobo)
Exemple #7
0
 def test_priors_reps(self):
     """generate priors for phenotypes from representations."""
    
     repA = Representation(name="A")
     repA.set("Y:006", 1)
     repB = Representation(name="B")
     repB.set("Y:001", 0.8)
     
     reps = dict(A=repA, B=repB)
     priors, num = get_priors_from_reps(reps, obo)
     self.assertGreater(num, 0)
Exemple #8
0
    def test_FP_same_parent(self):
        """various FPs to same parent score the same"""

        # both models are FP using children of a refA phenotype
        FP_close = Representation(name="close").set("Y:007", 0.9)
        FP_far = Representation(name="far").set("Y:008", 0.9)
        chain_close = self.rs.inference_chain(FP_close, "refA", verbose=True)
        chain_far = self.rs.inference_chain(FP_far, "refA", verbose=True)
        chain_close.evaluate()
        chain_far.evaluate()
        self.assertEqual(chain_close.posterior, chain_far.posterior)
Exemple #9
0
def get_concise_refdict(dbpath):
    """transfer information on concise reference phenotypes into a dict."""
    
    refdict = dict()
    refdict["null"] = Representation(name="null")
    refgenerator = DBGenerator(ReferenceConcisePhenotypeTable(dbpath))
    for row in refgenerator.next():
        rowid = row["id"]
        if rowid not in refdict:
            refdict[rowid] = Representation(name=rowid)
        refdict[rowid].set(row["phenotype"], row["value"])    
    return refdict
Exemple #10
0
    def setUp(self):
        """prepare a reference set with some data."""

        # let ref universe have two annotations and one null
        refA = Representation(data=dict(a=1, b=0.8), name="refA")
        refA.defaults(zerovals)
        refB = Representation(data=dict(a=1, d=0.2), name="refB")
        refB.defaults(zerovals)
        self.rs = ReferenceSet(dict(null=0.7, refA=0.15, refB=0.15),
                               ids=zerovals.keys())
        self.rs.add(refA).add(refB)
        self.rs.prep()
Exemple #11
0
    def test_impute_down_ordering(self):
        """updating values in representation via negative evidence."""

        r1 = Representation(dict())
        r1.set("DOID:3650", 0.01).set("DOID:0014667", 0.05)
        r2 = Representation(dict())
        r2.set("DOID:3650", 0.01).set("DOID:0014667", 0.05)
        # imputation down should not depend on order of the seeds
        r1.impute(self.obo, self.obodef, seeds=["DOID:3650", "DOID:0014667"])
        r2.impute(self.obo, self.obodef, seeds=["DOID:0014667", "DOID:3650"])

        self.assertEqual(r1.data, r2.data, "all values the same")
Exemple #12
0
    def test_get_reference(self):
        """extract one reference from a representation set."""

        r1 = Representation(name="refA").set("DOID:0014667", 0.4)
        r1.impute(obo, obodefaults)
        r2 = Representation(name="refB").set("DOID:0080015", 0.6)
        r2.impute(obo, obodefaults)

        rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=obo.ids())
        rs.add(r1).add(r2)

        r3 = rs.get_representation("refA")
        self.assertTrue(r3.equal(r1))
Exemple #13
0
    def test_FN_decrease_scales_with_model(self):
        """negative compared to positive decreases score"""

        FN1 = Representation(name="FN").set("Y:005", 0.01)
        FN2 = Representation(name="FN").set("Y:005", 0.001)
        chain_FN1 = self.rs.inference_chain(FN1, "refB", verbose=True)
        chain_FN1.evaluate_inference()
        chain_FN2 = self.rs.inference_chain(FN2, "refB", verbose=True)
        chain_FN2.evaluate_inference()

        # both models should decrease score
        self.assertLess(chain_FN1.posterior, chain_FN1.prior)
        self.assertLess(chain_FN2.posterior, chain_FN2.prior)
        # second model should decrease more
        self.assertLess(chain_FN2.posterior, chain_FN1.posterior)
Exemple #14
0
    def test_FN_decrease_scaled_with_ref(self):
        """negative compared to positive decreases score"""

        FN1 = Representation(name="FN1").set("Y:005", 0.01)
        chain_FN1 = self.rs.inference_chain(FN1, "refB", verbose=True)
        chain_FN1.evaluate_inference()
        FN2 = Representation(name="FN2").set("Y:005", 0.01)
        chain_FN2 = self.rs.inference_chain(FN2, "refB2", verbose=True)
        chain_FN2.evaluate_inference()

        # both cases should lead to decrease in score
        self.assertLess(chain_FN1.posterior, chain_FN1.prior)
        self.assertLess(chain_FN2.posterior, chain_FN2.prior)
        # the decrease should be steeper in FN1 because refB positive is stronger
        self.assertLess(chain_FN1.posterior, chain_FN2.posterior)
Exemple #15
0
    def test_FP3_with_null(self):
        """FP3 is phenotype with a fairly high prior."""

        FP3 = Representation(name="model").set("Y:003", 0.9)
        chain3 = self.rs.inference_chain(FP3, "null", verbose=True)
        chain3.evaluate()
        self.assertLessEqual(chain3.posterior, chain3.prior)
Exemple #16
0
def get_raw_references(datapath, phenotype_set):
    """Parse a phenotype file and collect descriptions and raw phenotypes  
    
    raw phenotypes are phenotypes in the original ontology
    
    Args:
        datapath       path to phenotab file
        phenotype_set  set of acceptable phenotypes
    
    Returns:
        two objects
        - dict mapping reference codes to reference descriptions and phenotypes
        - set of phenotypes that could not be mapped
    """

    badphenotypes = set()
    references = dict()
    with open_file(datapath, "rt") as f:
        reader = csv.DictReader(f, delimiter="\t", quotechar="\"")
        for row in reader:
            if not valid_reference_id(row["Reference"]):
                continue
            phenotype = row["Phenotype"]
            if phenotype not in phenotype_set:
                badphenotypes.add(phenotype)
                continue
            rowval = tofreq[row["Frequency"]]
            id = row["Source"] + ":" + row["Disease_number"]
            if id not in references:
                references[id] = Representation(name=id)
                references[id].title = row["Disease_title"]
            references[id].set(phenotype, rowval)

    return references, badphenotypes
Exemple #17
0
    def test_FN_can_decrease_score(self):
        """negative compared to positive decreases score"""

        FN = Representation(name="FN").set("Y:005", 0.01)
        chain_FN = self.rs.inference_chain(FN, "refB", verbose=True)
        chain_FN.evaluate_inference()
        self.assertLess(chain_FN.posterior, chain_FN.prior)
Exemple #18
0
    def test_AN_leaves_score_unchanged(self):
        """negative compared to prior leaves score unchanged """

        AN = Representation(name="FN").set("Y:005", 0.01)
        chain_AN = self.rs.inference_chain(AN, "refA", verbose=True)
        chain_AN.evaluate_inference()
        self.assertAlmostEqual(chain_AN.posterior, chain_AN.prior)
Exemple #19
0
    def test_impute_fromseeds_highfirst(self):
        """imputing values from manually-specified seeds."""

        rr = Representation(dict())
        # specify data for two children, DOID:4 is higher in tree, so should gain
        rr.set("DOID:0014667", 0.4)
        rr.set("DOID:0080015", 0.3)

        rr.impute(self.obo,
                  self.obodef,
                  seeds=["DOID:0014667", "DOID:0080015"])

        self.assertAlmostEqual(rr.get("DOID:0014667"),
                               0.4,
                               msg="should remain")
        self.assertAlmostEqual(rr.get("DOID:0080015"),
                               0.3,
                               msg="should remain")
        self.assertAlmostEqual(
            rr.get("DOID:4"),
            1 - ((1 - 0.4) * (1 - 0.3) * (1 - 0.2)),
            msg="ancestor gains from two children (and prior)")
        self.assertAlmostEqual(rr.get("DOID:655"),
                               0.2,
                               msg="remain; new DOID:4")
Exemple #20
0
    def test_FP2_with_null(self):
        """FP2 is phenotype with a moderate high prior."""

        FP2 = Representation(name="model").set("Y:002", 0.9)
        chain2 = self.rs.inference_chain(FP2, "null")
        chain2.evaluate()
        self.assertLessEqual(chain2.posterior, chain2.prior)
Exemple #21
0
    def test_set_feature_float(self):
        """can set and retrieve values"""

        rr = Representation()
        rr.set("abc", 1)
        self.assertEqual(rr.get("abc"), 1.0)
        self.assertTrue(type(rr.get("abc")) is float)
Exemple #22
0
    def test_add_without_name_raises(self):
        """adding a representation without a name raises exceptions."""

        r1 = Representation().set("DOID:0014667", 0.4)
        rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=obo.ids())
        with self.assertRaises(Exception):
            rs.add(r1)
Exemple #23
0
    def __init__(self,
                 config,
                 references=None,
                 models=None,
                 log=None,
                 run_msg="Packet "):
        """A runnable class for processing a set of references and models

        :param config: object of class PhenoscoringConfig
        :param references: iterable with reference names
        :param models: iterable with model names
        :param stamp: timestamp for calculations
        :param log: a logging function
        :param run_msg: a template for a status message
        """

        self.config = config
        self.stamp = config.stamp
        self.log = log
        self.run_msg = run_msg
        self.phen_priors = None
        self.ref_priors = None
        self.general_refset = None
        self.specific_refset = None
        self.models = dict()
        self.references = set()

        # setup calculation with references and stub models
        self.references = set(references)
        for id in models:
            self.models[id] = Representation(name=id)
Exemple #24
0
def get_model_representations(dbpath,
                              obo,
                              log=None,
                              log_prefix="",
                              model_names=None):
    """transfer model phenotype data into representations"""

    if model_names is None:
        model_names = get_model_names(dbpath)
    model_names_set = set(model_names)
    result = dict()
    for m in model_names:
        result[m] = Representation(name=m)
    phen_priors = get_phenotype_priors(dbpath)
    generator = DBGenerator(ModelPhenotypeTable(dbpath))
    for row in generator.next():
        m, phenotype = row["id"], obo.canonical(row["phenotype"])
        # avoid cases  - irrelevant model, obsolete phenotype
        if m not in model_names_set:
            continue
        if obo.has(phenotype) and not obo.valid(phenotype):
            phenotype = obo.replaced_by(phenotype)
        if phenotype is None:
            if log is not None:
                msg = "Skipping phenotype " + row["phenotype"]
                msg += " in model " + m
                log(log_prefix + " - " + msg)
            continue
        result[m] = add_data_to_model(result[m], phenotype, row["value"],
                                      row["TPR"], row["FPR"], phen_priors)
    return result
Exemple #25
0
def get_priors_from_models(models, categories, obo, dark=1):
    """Compute cohort-wide phenotype frequencies
    
    Arguments:
        models        dictionary of Entity objects
        categories    set, determines what models to use in the calculation
        obo           object of class Obo
        dark          integer, dark count for phenotype normalization
        
    Returns:
        dict mapping phenotypes (from obo) to values [0,1]
        integer, number of models used to inform the prior
    """

    # get a subset of the models that satisfy the criteria
    all = [obj for _, obj in models.items()]
    hits = filter_entities_cat(all, categories)

    # transfer phenotypes into representations
    obodefaults = dict.fromkeys(obo.ids(), 0)
    freqcounts = dict.fromkeys(list(obo.ids()), dark)
    for entity in hits:
        # prepare concise representations
        rep = Representation(name=entity.id)
        for datum in entity.data:
            rep.set(datum.phenotype, datum.value)
        # convert to complete representation
        rep.impute(obo, obodefaults)
        # count phenotypes
        for phenotype in obo.ids():
            freqcounts[phenotype] += rep.data[phenotype]

    # convert counts into frequencies
    result = counts_p(freqcounts, len(hits), dark)
    return result, len(hits)
Exemple #26
0
    def test_empty_representation(self):
        """setting and getting from a generic representation."""

        rr = Representation()
        self.assertEqual(len(rr.data), 0, "representation should be empty")
        self.assertEqual(rr.name, None,
                         "representation should not have a name")
Exemple #27
0
    def test_FP7_with_null(self):
        """FP7 is phenotype with a low prior."""

        FP7 = Representation(name="model").set("Y:007", 0.9)
        chain7 = self.rs.inference_chain(FP7, "null", verbose=True)
        chain7.evaluate()
        self.assertLess(chain7.posterior, chain7.prior,
                        "must be strictly lower")
 def test_specific_term_FP_increases(self):
     """score a specific model, vaguely similar disease should increase."""  
     
     rr = Representation(name="specific").set("Y:008", 0.8)
     inf = self.rs.inference(rr)        
     
     self.assertGreater(inf["DISEASE:3"], inf["DISEASE:1"], 
                        "D3 has Y2, which is close to Y1")
 def test_refset2_inference(self):
     """inference based on specific should be smaller than on general."""
     
     rr = Representation(name="custom").set("Y:001", 0.8) 
     inf_general = self.rs.inference(rr)
     inf_specific = self.rs2.inference(rr)
     # Y:001 is shared by DISEASE:1 and DISEASE:2, so specific inf down
     self.assertLess(inf_specific["DISEASE:1"], inf_general["DISEASE:1"])
 def test_model_rootonly(self):
     """score a vague model."""  
             
     rr = Representation(name="vague").set("Y:004", 0.8)             
     inf = self.rs.inference(rr)
     
     self.assertAlmostEqual(inf["DISEASE:1"], inf["DISEASE:2"], 
                            "most diseases about the same")