def test_scores_bad_input(self): """inference function should raise with bad input.""" # model is empty rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=obo.ids()) with self.assertRaises(Exception): rs.inference("refA")
def test_add_without_name_raises(self): """adding a representation without a name raises exceptions.""" r1 = Representation().set("DOID:0014667", 0.4) rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=obo.ids()) with self.assertRaises(Exception): rs.add(r1)
def setUp(self): """prepare a reference set with some data.""" # let ref universe have two annotations and one null refA = Representation(data=dict(a=1, b=0.8), name="refA") refA.defaults(zerovals) refB = Representation(data=dict(a=1, d=0.2), name="refB") refB.defaults(zerovals) self.rs = ReferenceSet(dict(null=0.7, refA=0.15, refB=0.15), ids=zerovals.keys()) self.rs.add(refA).add(refB) self.rs.prep()
def dict2referenceset(repdict, feature_ids, priors): """create a representation set, using imputation :param repdict: a dictionary of Representation objects :param feature_ids: list with all features :param priors: dict linking name to a prior probability :return: ReferenceSet object """ result = ReferenceSet(priors, feature_ids) for id, representation in repdict.items(): result.add(representation) return result
def test_get_reference(self): """extract one reference from a representation set.""" r1 = Representation(name="refA").set("DOID:0014667", 0.4) r1.impute(obo, obodefaults) r2 = Representation(name="refB").set("DOID:0080015", 0.6) r2.impute(obo, obodefaults) rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=obo.ids()) rs.add(r1).add(r2) r3 = rs.get_representation("refA") self.assertTrue(r3.equal(r1))
def test_positive_parent_multi(self): """fetching a parent term when terms have multiple parents.""" # load an ontology in which Y7 is connected to both Y2 and Y1 Yfile = join(testdir, "Ymulti.obo") Yobo = MinimalObo(Yfile) Ydefaults = dict.fromkeys(Yobo.ids(), 0.0001) Ydefaults["Y:003"] = 0.0002 Ydefaults["Y:005"] = 0.0002 # make slight variations of representations rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=Yobo.ids(), row_priors=Ydefaults) refA = Representation(name="refA") refA.set("Y:002", 0.5).set("Y:005", 1).impute(Yobo, Ydefaults) refB = Representation(name="refB") refB.set("Y:001", 0.5).impute(Yobo, Ydefaults) rs.add(refA).add(refB) rs.learn_obo(Yobo) self.assertEqual( rs._positive_ancestor(rs.columns["refA"], rs.rows["Y:007"]), rs.rows["Y:002"], "Y2 is a positive ancestor") self.assertEqual( rs._positive_ancestor(rs.columns["refB"], rs.rows["Y:007"]), rs.rows["Y:001"], "Y1 is a positive immediate parent")
def test_between2(self): """inference when model equally similar to two refs""" # let ref universe have two annotations rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=self.obo.ids(), row_priors=self.obodefaults) rs.add(self.refA).add(self.refB) rs.learn_obo(self.obo) rs.prep() inf = rs.inference(self.y3model) self.assertAlmostEqual(inf["refA"], inf["refB"], msg="equally likely")
def setUpClass(cls): # create with sibling diseases Y:002 and Y:001 are siblings # set some phenotype priors that are nonzero cls.priors = dict() cls.priors["Y:004"] = 0.66 cls.priors["Y:005"] = cls.priors["Y:006"] = 0.25 cls.priors["Y:003"] = 0.66 cls.priors["Y:001"] = cls.priors["Y:002"] = 0.33 cls.priors["Y:007"] = cls.priors["Y:008"] = 0.25 cls.refnull = Representation(name="null") # refA has a negative phenotype cls.refA = Representation(name="refA") cls.refA.set("Y:002", 0.1).impute(Yobo, cls.priors) # refB has a negative and positive phenotypes cls.refB = Representation(name="refB") cls.refB.set("Y:001", 0.01).set("Y:006", 0.8).impute(Yobo, cls.priors) # refB2 has a weaker positive phenotype cls.refB2 = Representation(name="refB2") cls.refB2.set("Y:001", 0.1).set("Y:006", 0.5).impute(Yobo, cls.priors) cls.rs = ReferenceSet(dict(null=0.4, refA=0.3, refB=0.3, refB2=0.3), ids=Yobo.ids(), row_priors=cls.priors) cls.rs.add(cls.refnull).add(cls.refA).add(cls.refB).add(cls.refB2) cls.rs.learn_obo(Yobo)
def setUpClass(cls): # set some phenotype priors that are nonzero cls.priors = dict() cls.priors["Y:004"] = 0.66 cls.priors["Y:005"] = cls.priors["Y:006"] = 0.25 cls.priors["Y:003"] = 0.66 cls.priors["Y:001"] = cls.priors["Y:002"] = 0.33 cls.priors["Y:007"] = cls.priors["Y:008"] = 0.25 # create reference set with some strong phenotypes cls.refnull = Representation(name="null") cls.refA = Representation(name="refA") cls.refB = Representation(name="refB") cls.refA.set("Y:002", 1).impute(Yobo, cls.priors) cls.refB = Representation(name="refB") cls.refB.set("Y:001", 1).impute(Yobo, cls.priors) # reset missing phenotypes to smaller-than-prior for k, v in cls.priors.items(): if cls.refA.get(k) == v: cls.refA.set(k, v / 2) if cls.refB.get(k) == v: cls.refB.set(k, v / 2) cls.rs = ReferenceSet(dict(null=0.3, refA=0.3, refB=0.3), ids=Yobo.ids(), row_priors=cls.priors) cls.rs.add(cls.refnull).add(cls.refA).add(cls.refB) cls.rs.learn_obo(Yobo)
def test_between_refs_and_null(self): """inference when model is similar to two refs and a there is a null""" # let ref universe have two annotations and one null rs = ReferenceSet(dict(null=0.8, refA=0.15, refB=0.15), ids=self.obo.ids(), row_priors=self.obodefaults) rs.add(self.refA).add(self.refB) rs.learn_obo(self.obo) rs.prep() inf = rs.inference(self.y3model) self.assertAlmostEqual(inf["refA"], inf["refB"], msg="equally likely")
def test_empty_representation(self): """creating a new set of references.""" rs = ReferenceSet(dict(refA=0.5, refB=1), ids=obo.ids()) num_ids = len(obo.ids()) self.assertEqual(len(rs.data), 2, "refset should allocate memory") self.assertEqual(len(rs.data[0]), num_ids) self.assertEqual(len(rs.data[1]), num_ids)
def test_str(self): """getting a quick string with the content.""" rs = ReferenceSet(dict(null=0.7, refA=0.15, refB=0.15), ids=zerovals.keys()) result = str(rs) self.assertTrue("refA" in result) self.assertFalse("hello" in result)
def test_difference_in_priors(self): """inference when model matches two references, but have different priors""" # let ref universe have two annotations and one null rs = ReferenceSet(dict(null=0.85, refA=0.05, refB=0.1), ids=self.obo.ids(), row_priors=self.obodefaults) rs.add(self.refA).add(self.refB) rs.learn_obo(self.obo) rs.prep() inf = rs.inference(self.y3model) self.assertLess(inf["refA"], inf["refB"], msg="equal match, but A has weaker prior")
def test_model_nodata(self): """inference when references are unequal but model has no data""" model = Representation(name="nodata") # let ref universe have two annotations and one null rs = ReferenceSet(dict(null=0.8, refA=0.1, refB=0.1), ids=self.obo.ids(), row_priors=self.obodefaults) rs.add(self.refA).add(self.refB) rs.learn_obo(self.obo) rs.prep() inf = rs.inference(model) self.assertAlmostEqual(inf["refA"], inf["refB"], msg="equally likely")
def test_positive_parent(self): """fetching a parent term that has a positive value.""" rs = ReferenceSet(dict(refA=0.5), ids=Yobo.ids(), row_priors=Ydefaults) refA = Representation(name="refA") refA.set("Y:002", 0.5).set("Y:005", 1).impute(Yobo, Ydefaults) rs.add(refA) rs.learn_obo(Yobo) refAindex = rs.columns["refA"] self.assertEqual(rs._positive_ancestor(refAindex, rs.rows["Y:002"]), rs.rows["Y:002"], "Y2 is itself is positive") self.assertEqual(rs._positive_ancestor(refAindex, rs.rows["Y:007"]), rs.rows["Y:002"], "Y2 is immediate parent of Y7") self.assertEqual(rs._positive_ancestor(refAindex, rs.rows["Y:006"]), rs.rows["Y:005"], "Y5 is immediate parent of Y6")
def test_baddata(self): """inference should raise when input is bad""" rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=["a", "b", "c"]) rs.prep() with self.assertRaises(Exception) as e: rs.inference(5)
def test_prep_row_priors(self): """prepare row priors.""" # let ref universe have two annotations and one null refA = Representation(data=dict(a=1), name="refA") refA.defaults(zerovals) refB = Representation(data=dict(a=1, b=0.8), name="refB") refB.defaults(zerovals) rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=zerovals.keys()) rs.add(refA).add(refB) # compute feature priors rs.prep() # row_priors should gain key/values for all features expected_features = set(zerovals.keys()) self.assertEqual(set(rs.row_names), expected_features) # features declared in representations should get reasonable priors a_index = rs.rows["a"] b_index = rs.rows["b"] d_index = rs.rows["d"] self.assertEqual(rs.row_priors[a_index], 1, "refA and refB both have a") self.assertEqual(rs.row_priors[b_index], 0.4, "only refB has b, so 0.8/2") self.assertEqual(rs.row_priors[d_index], 0.2, "value is 1/num features")
def test_learn_from_obo(self): """create parents_of tuples for all features""" r1 = Representation(name="refA").set("DOID:0014667", 0.4) rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=obo.ids()) rs.add(r1) self.assertEqual(rs.parents, None) rs.learn_obo(obo) self.assertEqual(len(rs.parents), len(obo.ids()))
def test_subset(self): """make a refset smaller by ignoring some features.""" # create a reference set rs = ReferenceSet(OrderedDict(refA=0.5, refB=0.5), ids=testfeatures, row_priors=zerovals) # add some data to the two representations r1 = Representation(name="refA") r1.set("a", 0.1).set("b", 0.2).set("c", 0.3).set("d", 0.4) r2 = Representation(name="refB") r2.set("c", 0.6).set("d", 0.7).set("e", 0.8) rs.add(r1).add(r2) # manually create arrays with the reference set data expected_raw_A = [0.1, 0.2, 0.3, 0.4, 0.0] expected_raw_B = [0.0, 0.0, 0.6, 0.7, 0.8] self.assertTrue(list(rs.data[0]), expected_raw_A) self.assertTrue(list(rs.data[1]), expected_raw_B) # subset to a smaller number of features # myids - here c is repeated twice, z is not in the original features myids = ["e", "c", "a", "z", "c"] rs = ReferenceMatrix(rs, myids) # check new shape (three features and two references) self.assertEqual(len(rs.rows), 3) self.assertEqual(len(rs.row_names), 3) self.assertEqual(rs.data.shape, (3, 2)) # check that the relevant rows are present result = set(rs.rows.keys()) expected = set(myids) expected.remove("z") self.assertEqual(result, expected) # check data subset in output output_A = [0.1, 0.3, 0.0] output_B = [0.0, 0.6, 0.8] self.assertEqual(sum(rs.data[:, 0]), sum(output_A)) self.assertEqual(sum(rs.data[:, 1]), sum(output_B))
def get_modelsets(dbpath, obo, partition_size=4096): """create ReferenceSets objects with general and specific phenotypes :param dbpath: path to phenoscoring db :param config: dictionary configuration settings :param obo: object with ontology :return: array of ReferenceSets objects, each with a subset of models """ model_names = get_model_names(dbpath) if len(model_names) == 0: return [] # partition models into chunks model_groups = [[]] for m in model_names: group = model_groups[-1] if len(group) >= partition_size: model_groups.append([]) group = model_groups[-1] group.append(m) # load all model information from database models = get_model_representations(dbpath, obo) phen_priors = get_phenotype_priors(dbpath) # transfer into small-sized reference sets result = [] for group in model_groups: packet_priors = dict.fromkeys(group, 1 / len(model_names)) refset = ReferenceSet(packet_priors, obo.ids()) for m in group: model = models.pop(m) model.impute(obo, phen_priors) refset.add(model) result.append(refset) return result
def test_add_raises(self): """adding an unexpected piece of data raises exceptions.""" rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=obo.ids()) with self.assertRaises(Exception): rs.add(5) with self.assertRaises(Exception): rs.add(dict.fromkeys(["DOID:0014667", "DOID:0080015"], 0))
def get_refsets(dbpath, ref_priors=None, phenotype_priors=None): """create ReferenceSets objects with general and specific phenotypes :param dbpath: path to phenoscoring db :param ref_priors: dictionary with priors for references (if None, fetched from db) :param phenotype_priors: dictionary with priors for all featurs (if None, fetched from db) :return: two ReferenceSets objects """ # at first create just a dictionary of representations general_dict, specific_dict = dict(), dict() if phenotype_priors is None: phenotype_priors = get_phenotype_priors(dbpath) if ref_priors is None: ref_priors = get_ref_priors(dbpath) # create empty Representations for each reference nullrep = get_complete_null(dbpath) phenotypes = nullrep.keys() for id in ref_priors.keys(): general_dict[id] = nullrep.copy(name=id) specific_dict[id] = nullrep.copy(name=id) # fill the representations with values phentab = ReferenceCompletePhenotypeTable(dbpath) if len(ref_priors) == 1: refname = list(ref_priors.keys())[0] generator = DBGenerator(phentab, where=dict(id=refname)) else: generator = DBGenerator(phentab) for row in generator.next(): id, phen = row["id"], row["phenotype"] if id in ref_priors: general_dict[id].set(phen, row["value"]) specific_dict[id].set(phen, row["specific_value"]) # transfer representations into ReferenceSets general = ReferenceSet(ref_priors, phenotypes, phenotype_priors) specific = ReferenceSet(ref_priors, phenotypes, phenotype_priors) for refid in general_dict.keys(): general.add(general_dict[refid]) specific.add(specific_dict[refid]) return general, specific
def test_underflow(self): """attempt to get underflow in individual p.""" # let model have very sure values model = Representation(name="underflow") model.set("Y:007", 0.00001).set("Y:004", 1).set("Y:003", 1) # let ref universe have two annotations and one null refA = Representation(name="refA").set("Y:003", 1) refB = Representation(name="refB").set("Y:003", 1) rs = ReferenceSet(dict(null=0.98, refA=0.001, refB=0.001), ids=self.obo.ids()) rs.add(refA).add(refB) rs.learn_obo(self.obo) rs.prep() result = rs.inference(model, verbose=True) self.assertGreaterEqual(result["refA"], 0, msg="must always be a number, even if zero") self.assertGreaterEqual(result["refB"], 0, msg="must always be a number, even if zero") self.assertGreaterEqual(result["refB"], 0, msg="must always be a number, even if zero")
def test_FP_can_increase(self): """FP can in principle yield greater score""" # make a new reference set with different priors priors2 = self.priors.copy() priors2["Y:002"] = 0.1 rs2 = ReferenceSet(dict(null=0.4, refA=0.3, refB=0.3), ids=Yobo.ids(), row_priors=priors2) rs2.add(self.refnull).add(self.refA).add(self.refB) rs2.learn_obo(Yobo) FP = Representation(name="model").set("Y:002", 0.2) chain = rs2.inference_chain(FP, "refB", verbose=True, fp_penalty=1) chain.evaluate_inference() self.assertGreater(chain.posterior, chain.prior)
def test_add_incrementally(self): """transferring values into a representation set.""" r1 = Representation(name="refA").set("DOID:0014667", 0.4) r1.impute(obo, obodefaults) r2 = Representation(name="refB").set("DOID:0080015", 0.6) r2.impute(obo, obodefaults) rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=obo.ids()) rs.add(r1).add(r2) self.assertEqual(rs.get("DOID:0014667", "refA"), 0.4, "refset should contain inserted data") self.assertEqual(rs.get("DOID:0080015", "refB"), 0.6, "refset should contain inserted data") self.assertEqual(rs.get("DOID:4", "refB"), 0.6, "refset should contain imputed data")
def test_TP_scores_better_than_FP(self): """FPs must score lower than TPs""" # make a new reference set with different (lower) priors priors2 = self.priors.copy() priors2["Y:002"] = 0.55 priors2["Y:007"] = 0.15 rs2 = ReferenceSet(dict(null=0.4, refA=0.3, refB=0.3), ids=Yobo.ids(), row_priors=priors2) rs2.add(self.refnull).add(self.refA).add(self.refB) rs2.learn_obo(Yobo) # compare with refA, which has Y:002 equal to 1 FP = Representation(name="FP").set("Y:007", 1) chain_FP = rs2.inference_chain(FP, "refA", verbose=True, fp_penalty=2) chain_FP.evaluate_inference() TP = Representation(name="TP").set("Y:002", 1) chain_TP = rs2.inference_chain(TP, "refA", verbose=True) chain_TP.evaluate_inference() self.assertGreaterEqual(chain_TP.posterior, chain_FP.posterior)
def test_FP_with_fp_penalty(self): """FP increases more with lower fp_penalty""" # make a new reference set with different priors priors2 = self.priors.copy() priors2["Y:003"] = 0.4 priors2["Y:002"] = 0.15 priors2["Y:007"] = 0.1 rs2 = ReferenceSet(dict(null=0.4, ref=0.3), ids=Yobo.ids(), row_priors=priors2) ref = Representation(name="ref") ref.set("Y:001", 1).impute(Yobo, priors2) ref.set("Y:007", priors2["Y:007"] / 2) rs2.add(self.refnull).add(ref) rs2.learn_obo(Yobo) FP = Representation(name="model").set("Y:007", 0.35) chain1 = rs2.inference_chain(FP, "ref", verbose=True, fp_penalty=0.1) chain1.evaluate_inference() self.assertGreater(chain1.posterior, chain1.prior) chain2 = rs2.inference_chain(FP, "ref", verbose=True, fp_penalty=1) chain2.evaluate_inference() self.assertLess(chain2.posterior, chain1.posterior)
def setUp(self): """prepare a reference set with a few references.""" self.refnull = Representation(data=null_defaults, name="null") self.refA = Representation(data=Ydefaults, name="refA") self.refA.set("Y:004", 1) self.refB = Representation(data=Ydefaults, name="refB") self.refB.set("Y:004", 1).set("Y:001", 0.6).set("Y:003", 0.5) self.refC = Representation(data=Ydefaults, name="refC") self.refC.set("Y:004", 1).set("Y:001", 0.5) self.refD = Representation(data=Ydefaults, name="refD") self.refD.set("Y:004", 0.1) ref_priors = dict(null=0.1, refA=0.1, refB=0.1, refC=0.1, refD=0.1) rs = ReferenceSet(ref_priors, ids=Ydefaults.keys()) rs.add(self.refnull) rs.add(self.refA).add(self.refB) rs.add(self.refC).add(self.refD) self.rm = ReferenceMatrix(rs, list(Ydefaults.keys()))
def test_inference_chain(self): """compute an inference chain.""" # create a reference set refA = Representation(name="refA") refA.set("Y:002", 1).impute(Yobo, Ydefaults) refB = Representation(name="refB") refB.set("Y:001", 1).impute(Yobo, Ydefaults) rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=Yobo.ids(), row_priors=Ydefaults) rs.add(refA).add(refB) rs.learn_obo(Yobo) # compute a chain object explaining scoring steps chain = rs.inference_chain(refA, "refB", verbose=True) self.assertEqual(chain.__dict__["model"], "refA") self.assertEqual(chain.__dict__["reference"], "refB") self.assertGreater(len(chain.data), 2, "data chain should describe multiple features") self.assertTrue("background" in chain.data[0].__dict__, "chain data have comparison information") self.assertTrue("result" in chain.data[0].__dict__, "chain data have TP/FP/etc codes")
def test_checkprep(self): """inference only works when set is prepped""" rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=self.obo.ids()) with self.assertRaises(Exception): rs.inferenceModel(self.y3model)