def test_positive_parent_multi(self): """fetching a parent term when terms have multiple parents.""" # load an ontology in which Y7 is connected to both Y2 and Y1 Yfile = join(testdir, "Ymulti.obo") Yobo = MinimalObo(Yfile) Ydefaults = dict.fromkeys(Yobo.ids(), 0.0001) Ydefaults["Y:003"] = 0.0002 Ydefaults["Y:005"] = 0.0002 # make slight variations of representations rs = ReferenceSet(dict(refA=0.5, refB=0.5), ids=Yobo.ids(), row_priors=Ydefaults) refA = Representation(name="refA") refA.set("Y:002", 0.5).set("Y:005", 1).impute(Yobo, Ydefaults) refB = Representation(name="refB") refB.set("Y:001", 0.5).impute(Yobo, Ydefaults) rs.add(refA).add(refB) rs.learn_obo(Yobo) self.assertEqual( rs._positive_ancestor(rs.columns["refA"], rs.rows["Y:007"]), rs.rows["Y:002"], "Y2 is a positive ancestor") self.assertEqual( rs._positive_ancestor(rs.columns["refB"], rs.rows["Y:007"]), rs.rows["Y:001"], "Y1 is a positive immediate parent")
def test_obo_canonical(self): """official ids are canonical ids.""" obo = Obo(alts_file) self.assertEqual(obo.canonical("AA:1"), "AA:1") self.assertEqual(obo.canonical("AA:2"), "AA:2") minobo = MinimalObo(alts_file) self.assertEqual(minobo.canonical("AA:1"), "AA:1") self.assertEqual(minobo.canonical("AA:2"), "AA:2")
def setUpClass(cls): """For setup, ensure db does not exist.""" config = CompleteTestConfig() config.null_prior = 0.2 cls.dbfile = config.db cls.pipeline = Phenoscoring(config) cls.pipeline.build() obopath = check_file(config.obo, config.db, "obo") cls.obo = MinimalObo(obopath, True) # a dummy set of default values cls.obodefaults = dict.fromkeys(cls.obo.ids(), 0.2) cls.obozeros = dict.fromkeys(cls.obo.ids(), 0) cls.ref_priors = get_ref_priors(config.db) cls.rs, cls.rs2 = get_refsets(config.db, ref_priors=cls.ref_priors) cls.rs.learn_obo(cls.obo) cls.rs2.learn_obo(cls.obo) # for testing individual configurations cls.y3model = Representation(name="Y3").set("Y:003", 0.8) cls.refA = Representation(name="refA").set("Y:002", 1) cls.refA.defaults(cls.obozeros) cls.refB = Representation(name="refB").set("Y:002", 1) cls.refB.defaults(cls.obozeros)
def test_parsing(self): """parsing should return model descriptions""" obo = MinimalObo(obo_file) models = prep_IMPC(impc_file, (0.8, 0.05), 0.01, obo=obo) # should get eight models # two marker, two allele; one zygosity # one set of 4 with positive phenoytpes, # one set of 4 with negative phenotypes # that is 8. Then x3 for universal/male/female self.assertEqual(len(models), 24) alleles = set() markers = set() for _, value in models.items(): alleles.add(value.description["allele_id"]) markers.add(value.description["marker_id"]) self.assertEqual(len(alleles), 2) self.assertEqual(len(markers), 2) # count the male and female models males, females, unspecified = 0, 0, 0 for _, value in models.items(): sex = value.description["sex"] if sex == "M": males += 1 elif sex == "F": females += 1 elif sex: unspecified += 1 self.assertEqual(males, females, "should be paired and equal") self.assertEqual(males, unspecified, "should be paired and equal") self.assertGreater(males, 0)
def build(self): """create a db for phenoscoring, includes setup and table-filling.""" # create db with empty tables dbpath, config = self.setup() # avoid work if setup decided db exists and build can be skipped if dbpath is None: return # check prerequisite files obopath = check_file(config.obo, dbpath, "obo") refpath = check_file(config.reference_phenotypes, dbpath, "reference_phenotypes") freqpath = check_file(config.phenotype_frequencies, dbpath, "phenotype_frequencies") self.logger.msg1("Loading ontology") obo = MinimalObo(obopath, True) self.logger.msg1("Preparing phenotype frequencies") fill_phenotype_frequency_table(dbpath, freqpath) # fill database with data self.logger.msg1("Preparing references") fill_concise_reference_table(dbpath, refpath) fill_complete_reference_table(dbpath, obo, config) self._end()
def test_scaled_cooc(self): """write out cooc matrices""" obo = MinimalObo(obo_file) models = prep_IMPC(impc_file, (0.8, 0.05), 0.01, obo=obo) models_allele = get_UA_models(models, "allele") observed = get_models_by_phenotype(models_allele, 1) # write out various types of cooc matrices cooc_full, phenindex = make_scaled_cooc(observed, obo, 0, "full") write_phenotype_cooc(cooc_full, phenindex, out_prefix + "-full") cooc_freq, phenindex = make_scaled_cooc(observed, obo, 0, "freq") write_phenotype_cooc(cooc_freq, phenindex, out_prefix + "-freq") cooc_simJ, phenindex = make_scaled_cooc(observed, obo, 0, "simJ") write_phenotype_cooc(cooc_simJ, phenindex, out_prefix + "-simJ") # check the numerics of the matrices for p1, i1 in phenindex.items(): for p2, i2 in phenindex.items(): observed = cooc_full[i1, i2] expected = cooc_freq[i1, i2] * (1 - cooc_simJ[i1, i2]) self.assertEqual(observed, expected) # check all the files exist self.assertTrue(exists(cooc_full_file)) self.assertTrue(exists(cooc_freq_file)) self.assertTrue(exists(cooc_simJ_file)) # very gently (not rigorously check content of files) with open_file(cooc_full_file, "rt") as f: full = f.read().strip().split("\n") self.assertGreater(len(full), 2)
def export_representations(self): """write matrix representations for models and refs to disk""" dbpath, config = self._start() self.logger.msg1("Loading ontology") obo_path = check_file(config.obo, dbpath, "obo") self.obo = MinimalObo(obo_path, True) self._export_reference_representations() self._export_model_representations(config) self._end()
def explain(self): """Perform a verbose calculation of inference scores. The prep for this function is similar as for compute(). Once the relevant data is loaded from the db, the calculations are performed and recorded manually. """ self.logger.verbose = False dbpath, config = self._start() if config.explain not in ["specific", "general"]: return "--explain must be 'general' or 'specific'" config.obo = check_file(config.obo, dbpath, "obo") # allow user to pass several model/reference pairs models = config.model.split(",") references = config.reference.split(",") M = len(models) if len(references) != M: raise Exception("incompatible number of models and references") # use the packet to load information from the db, refset and models packet = prep_compute_packets(self.config, references=references, models=models, partition_size=M)[0] packet.prep() refset = packet.general_refset if config.explain == "specific": refset = packet.specific_refset refset.learn_obo(MinimalObo(config.obo)) allresults = [None] * M for i, (modelid, refid) in enumerate(zip(models, references)): data = packet.models[modelid] result = refset.inference_chain(data, refid, verbose=True, fp_penalty=config.fp_penalty) allresults[i] = result.to_json(nodata=config.explain_nodata) return "[" + (",".join(allresults)) + "]"
def test_minimal_obo_converts_alts(self): """converts alt ids into canonical ids - using MinimalObo class.""" obo = MinimalObo(alts_file) # the next three ids are defined in the obo self.assertEqual(obo.canonical("AA:02"), "AA:2") self.assertEqual(obo.canonical("AA:03"), "AA:3") self.assertEqual(obo.canonical("AA:003"), "AA:3") # second time should use cache self.assertEqual(obo.canonical("AA:003"), "AA:3") # the next few are not in the obo self.assertEqual(obo.canonical("AA:000"), None) self.assertEqual(obo.canonical("AA:002"), None)
def test_writing(self): """write the parsed MGI data onto files""" obo = MinimalObo(obo_file) models = prep_IMPC(impc_file, (0.8, 0.05), 0.01, obo=obo) write_models(models, out_prefix) # read contents back self.assertTrue(exists(desc_file)) self.assertTrue(exists(pheno_file)) with open_file(desc_file, "rt") as f: desc = f.read().strip().split("\n") with open_file(pheno_file, "rt") as f: pheno = f.read().strip().split("\n") # description file should have 25 lines, 24 data lines plus header self.assertEqual(len(desc), 25) # phenotype file should have at least 7 lines (more) self.assertGreater(len(pheno), 7)
def test_imputing(self): """create new models based on UA.""" obo = MinimalObo(obo_file) models = prep_IMPC(impc_file, (0.8, 0.05), 0.01, obo=obo) models_allele = get_UA_models(models, "allele") imputed = impute_IMPC(models_allele, obo, 0) write_models(imputed, out_prefix + "-imputed") # check output files exist and contain proper content self.assertTrue(exists(imputed_desc_file)) self.assertTrue(exists(imputed_pheno_file)) with open_file(imputed_desc_file, "rt") as f: desc = f.read().strip().split("\n") with open_file(imputed_pheno_file, "rt") as f: pheno = f.read().strip().split("\n") # description file should have 3 lines, 2 desc lines plus header self.assertEqual(len(desc), 3) # phenotype file should have a few lines self.assertGreater(len(pheno), 3)
def prep(self): """loads initial data from the db.""" config = self.config dbpath = config.db # load the ontology obo = MinimalObo(config.obo) # prepare information about references self.phen_priors = get_phenotype_priors(dbpath) self.ref_priors = get_ref_priors(dbpath, self.references) general, specific = get_refsets(dbpath, ref_priors=self.ref_priors) # assign an ontology for reasoning specific.learn_obo(obo) general.learn_obo(obo) self.general_refset = general self.specific_refset = specific # transfer model phenotypes model_names = list(self.models.keys()) self.models = get_model_representations(dbpath, obo, log=self.log, log_prefix=self.run_msg, model_names=model_names)
class RepresentationTests(unittest.TestCase): """Test cases for class Representation.""" defaults = dict(abc=0.1, xyz=0.1) # file with a small obo-formatted ontology obofile = os.path.join("tests", "testdata", "small.obo") def setUp(self): """For setup, load a small ontology.""" self.obo = MinimalObo(self.obofile) self.obodef = dict.fromkeys(self.obo.ids(), 0.2) def test_empty_representation(self): """setting and getting from a generic representation.""" rr = Representation() self.assertEqual(len(rr.data), 0, "representation should be empty") self.assertEqual(rr.name, None, "representation should not have a name") def test_named_representation(self): """setting and getting from a generic representation.""" rr = Representation(dict(abc=1), name="rr") self.assertEqual(rr.name, "rr", "rep should have a name") def test_str(self): """setting and getting from a generic representation.""" ss = str(Representation(name="ABC XYZ")) self.assertRegex(ss, "XYZ", "representation string should have name") def test_set_feature(self): """can set and retrieve values""" rr = Representation() rr.set("abc", 0.2) self.assertEqual(rr.get("abc"), 0.2) def test_set_feature_float(self): """can set and retrieve values""" rr = Representation() rr.set("abc", 1) self.assertEqual(rr.get("abc"), 1.0) self.assertTrue(type(rr.get("abc")) is float) def test_init_float(self): """initializing with a dict ensure float values""" rr = Representation(dict(abc=1), name="rr") self.assertEqual(rr.get("abc"), 1.0) self.assertTrue(type(rr.get("abc")) is float) def test_general_representation_get(self): """setting and getting from a generic representation.""" rr = Representation(dict(xyz=0.2)) rr.set("bob", 0.4) rr.set("xyz", 0.3) rr.defaults(self.defaults) self.assertEqual(rr.get("bob"), 0.4, "value should come from manual input") self.assertEqual(rr.get("abc"), 0.1, "value should come from defaults dict") self.assertEqual(rr.get("xyz"), 0.3, "value should come from manual override") def test_keys(self): """setting and getting from a generic representation.""" rr = Representation(dict(xyz=0.2)) rr.set("bob", 0.4) rr.set("xyz", 0.3) rr.defaults(self.defaults) self.assertEqual(rr.keys(), ["abc", "xyz", "bob"], "keys should have defaults and non-defaults") def test_has(self): """querying whether a value has been set.""" rr = Representation(dict(xyz=0.2)) rr.set("bob", 0.4) self.assertTrue(rr.has("xyz"), "set in constructor") self.assertTrue(rr.has("bob"), "set manually") self.assertFalse(rr.has("alice"), "not set") def test_equality(self): """checking content of representations.""" r1 = Representation(self.defaults, name="hello") r2 = Representation(self.defaults, name="hello") r3 = Representation(self.defaults, name="bye") r4 = Representation(self.defaults, name="hello") r4.set("abc", 100) r5 = Representation() r6 = Representation(self.defaults, name="hello") r6.set("qqq", 20) self.assertTrue(r1.equal(r2), "all is the same") self.assertFalse(r1.equal(5), "argument is not a Representation") self.assertFalse(r1.equal(r3), "same content, but different name") self.assertFalse(r1.equal(r4), "same name, but different content") self.assertFalse(r1.equal(r5), "r5 is empty") self.assertFalse(r1.equal(r6), "r6 has more keys") self.assertFalse(r1.equal(range(4)), "must compare to Representation") def test_general_representation_get2(self): """setting and getting from a generic representation.""" # Similar to previous, but setting defaults before the specifics rr = Representation(dict(abc=0.1, xyz=0.2)) rr.defaults(self.defaults) rr.set("bob", 0.4) rr.set("xyz", 0.3) self.assertEqual(rr.get("bob"), 0.4, "value should come from manual input") self.assertEqual(rr.get("abc"), 0.1, "value should come from defaults dict") self.assertEqual(rr.get("xyz"), 0.3, "value should come from manual override") def test_impute_up(self): """updating values in representation via positive evidence.""" rr = Representation(dict()) rr.set("unrelated", 0.8) rr.set("DOID:0014667", 0.4) rr.impute(self.obo, self.obodef) self.assertEqual(rr.get("unrelated"), 0.8, msg="out-of-ontology terms remain") self.assertEqual(rr.get("DOID:0014667"), 0.4, msg="set value should remain") self.assertGreater(rr.get("DOID:4"), 0.4, msg="ancestors should receive greater score") self.assertEqual(rr.get("DOID:0080015"), 0.2, msg="unrelated terms get default") self.assertEqual(rr.get("DOID:655"), 0.2, msg="children are unaffected") def test_impute_up_always_increases(self): """updating values in representation via positive evidence.""" rr = Representation(dict()) rr.set("DOID:3650", 0.25) defaults = self.obodef.copy() defaults["DOID:0014667"] = 0.5 defaults["DOID:4"] = 1 rr.impute(self.obo, defaults) self.assertEqual(rr.get("DOID:3650"), 0.25, "set value should remain") self.assertGreater(rr.get("DOID:0060158"), 0.25, "ancestors should receive greater score") self.assertEqual(rr.get("DOID:655"), 0.2, "unrelated should stay at default") # ancestor that has already a higher score than what is propagated self.assertGreater( rr.get("DOID:0014667"), 0.5, "ancestor should receive score greater than its prior") def test_impute_up_avoid_doubles(self): """updating values in representation via positive evidence in DAG""" rr = Representation(dict()) # DOID:11044 in test ontology has two paths to root (DOID:4) # one is direct (a shortcut) # another path is through 0080015 rr.set("DOID:11044", 0.4) rr.impute(self.obo, self.obodef) self.assertGreater(rr.get("DOID:0080015"), 0.2, "ancestor should increase") self.assertAlmostEqual( rr.get("DOID:0080015"), rr.get("DOID:4"), msg="4 should get bumped once, despite two paths from 11044") def test_impute_down(self): """updating values in representation via negative evidence.""" rr = Representation(dict()) rr.set("unrelated", 0.8) rr.set("DOID:0014667", 0.05) rr.impute(self.obo, self.obodef) self.assertAlmostEqual(rr.get("unrelated"), 0.8, "out-of-ontology terms remain") self.assertAlmostEqual(rr.get("DOID:0014667"), 0.05, "set value should remain") self.assertAlmostEqual(rr.get("DOID:4"), 0.2, "ancestors should remain") self.assertAlmostEqual(rr.get("DOID:0080015"), 0.2, "unrelated terms get default") self.assertAlmostEqual(rr.get("DOID:655"), 0.05, "children are unaffected") def test_impute_down_ordering(self): """updating values in representation via negative evidence.""" r1 = Representation(dict()) r1.set("DOID:3650", 0.01).set("DOID:0014667", 0.05) r2 = Representation(dict()) r2.set("DOID:3650", 0.01).set("DOID:0014667", 0.05) # imputation down should not depend on order of the seeds r1.impute(self.obo, self.obodef, seeds=["DOID:3650", "DOID:0014667"]) r2.impute(self.obo, self.obodef, seeds=["DOID:0014667", "DOID:3650"]) self.assertEqual(r1.data, r2.data, "all values the same") def test_impute_fromseeds_highfirst(self): """imputing values from manually-specified seeds.""" rr = Representation(dict()) # specify data for two children, DOID:4 is higher in tree, so should gain rr.set("DOID:0014667", 0.4) rr.set("DOID:0080015", 0.3) rr.impute(self.obo, self.obodef, seeds=["DOID:0014667", "DOID:0080015"]) self.assertAlmostEqual(rr.get("DOID:0014667"), 0.4, msg="should remain") self.assertAlmostEqual(rr.get("DOID:0080015"), 0.3, msg="should remain") self.assertAlmostEqual( rr.get("DOID:4"), 1 - ((1 - 0.4) * (1 - 0.3) * (1 - 0.2)), msg="ancestor gains from two children (and prior)") self.assertAlmostEqual(rr.get("DOID:655"), 0.2, msg="remain; new DOID:4") def test_impute_fromseeds_lowfirst(self): """imputing values from manually-specified seeds.""" rr = Representation(dict()) ## specify an inconsistent set of values, DOID:4 is higher in tree, so cannot ## have a lower value than DOID:0014667 rr.set("DOID:0014667", 0.4).set("DOID:4", 0.1) rr.impute(self.obo, self.obodef, seeds=["DOID:4", "DOID:0014667"]) self.assertEqual(rr.get("DOID:0080015"), 0.1, "child of DOID:4") self.assertEqual(rr.get("DOID:655"), 0.1, "child of DOID:4") def test_impute_fromseeds_auto(self): """imputing values from automatically-ordered seeds.""" # specify an inconsistent set of values, DOID:4 is higher in tree, so cannot # have a lower value than DOID:0014667 # However, low DOID:4 can impact on other branches rr1 = Representation(dict()) rr1.set("DOID:0014667", 0.4).set("DOID:4", 0.1) rr1.impute(self.obo, self.obodef) # auto seeds rr2 = Representation(dict()) rr2.set("DOID:0014667", 0.4).set("DOID:4", 0.1) rr2.impute(self.obo, self.obodef) # auto seeds, different initial ordering rr3 = Representation(dict()) rr3.set("DOID:4", 0.1).set("DOID:0014667", 0.4) rr3.impute(self.obo, self.obodef) self.assertTrue(rr1.data == rr2.data, "auto and manual should have same data") self.assertTrue(rr2.data == rr3.data, "should be = regardless of input order") self.assertGreater(rr1.data["DOID:0014667"], 0.2, "DOID:0014667 increase by direct evidence") self.assertGreater(rr1.data["DOID:4"], 0.2, "DOID:4 increases driven by 0014667") self.assertEqual(rr1.data["DOID:11044"], 0.1, "low raw DOID:4 propagates down") def test_sum_with_impute(self): """sum of values associated with the representation.""" rr = Representation(dict()) rr.set("DOID:0014667", 1) sum1 = rr.sum() rr.impute(self.obo, self.obodef) sum2 = rr.sum() self.assertEqual(sum1, 1, "value of one phenotype") self.assertGreater(sum2, 2, msg="value for one phenotype+ancestors+defaults") def test_copy(self): """can copy a representation into a new object.""" r1 = Representation(self.defaults, name="hello") r1.set("abc", 0.5) result = r1.copy() self.assertEqual(r1.name, result.name) self.assertEqual(r1.get("abc"), result.get("abc")) result.set("abc", 0.75) self.assertEqual(r1.get("abc"), 0.5) self.assertEqual(result.get("abc"), 0.75)
def setUp(self): """have a target ontology ready""" self.obo = MinimalObo(obo_file) self.emapa = get_emapa_map(emapa_file, self.obo)
from phenoprep.prep_refs import prep_refs, get_oo_map, valid_reference_id from phenoprep.prep_refs import prep_tech_models from phenoprep.write import write_references from obo.obo import MinimalObo from ..testhelpers import remove_if_exists from tools.files import open_file testdir = join("tests", "testdata") refs_file = join(testdir, "phenotab-small.tab") obo_file = join(testdir, "Y.obo") oo_file = join(testdir, "owlsim-small.txt") k = 2 out_prefix = join("tests", "testdata", "test-prep-phenotab") out_file = out_prefix + "-phenotypes.tsv.gz" obo = MinimalObo(obo_file, True) class ReferenceIdTests(unittest.TestCase): """Test cases for function valid_reference_id.""" def test_accepts(self): """function accepts proper reference ids.""" self.assertTrue(valid_reference_id("OMIM:10001")) self.assertTrue(valid_reference_id("DECIPHER:10")) self.assertTrue(valid_reference_id("ORPHANET:9002")) self.assertTrue(valid_reference_id("ORPHA:9002")) self.assertTrue(valid_reference_id("DISEASE:1")) def test_rejects(self):
import json import unittest from collections import OrderedDict from os.path import join, exists from scoring.referenceset import ReferenceSet from scoring.referencematrix import ReferenceMatrix from scoring.representation import Representation from obo.obo import MinimalObo from tests.testhelpers import remove_if_exists testfeatures = ["a", "b", "c", "d", "e"] # define an ontology for these tests testdir = join("tests", "testdata") obofile = join(testdir, "small.obo") obo = MinimalObo(obofile) obodefaults = dict.fromkeys(obo.ids(), 0.0) # another ontology Y.obo Yfile = join(testdir, "Y.obo") Yobo = MinimalObo(Yfile) Ydefaults = dict.fromkeys(Yobo.ids(), 0.2) null_defaults = dict.fromkeys(Yobo.ids(), 0.001) # set of features for this set of tests defvals = dict.fromkeys(testfeatures, 0.2) zerovals = dict.fromkeys(testfeatures, 0.0) nanvals = dict.fromkeys(testfeatures, float("nan")) # prefix for output files outfile = join(testdir, "small_out")
import unittest from os.path import join, exists from obo.obo import MinimalObo from phenoprep.prep_mgi import prep_MGI from phenoprep.write import write_models from ..testhelpers import remove_if_exists from tools.files import open_file testdir = join("tests", "testdata") mgi_file = join(testdir, "MGI-GP-small.rpt") obo_file = join(testdir, "Y.obo") out_prefix = join(testdir, "test-prep-MGI") desc_file = out_prefix + "-models.tsv.gz" pheno_file = out_prefix + "-phenotypes.tsv.gz" obo = MinimalObo(obo_file) class PrepMGITests(unittest.TestCase): """Test cases for parsing information from MGI tables""" def tearDown(self): """remove any written-out files (if generated)""" remove_if_exists(desc_file) remove_if_exists(pheno_file) pass def test_inputs(self): """check that input file exists""" self.assertEqual(exists(mgi_file), True, "input file should exist")
class PhenoscoringEntityTrimmingTests(unittest.TestCase): """Test cases for trimming phenotypes out of entity objects""" obofile = join("tests", "testdata", "small.obo") obo = MinimalObo(obofile) def test_trim_nothing(self): """trimming does nothing if there is nothing to do.""" m = Entity("abc", "genes") d1 = PhenotypeDatum("DOID:3650", Experiment(1, 0.8, 0.05)) d2 = PhenotypeDatum("DOID:11044", Experiment(1, 0.8, 0.05)) m.add(d1).add(d2) self.assertEqual(len(m.data), 2) m.trim_ancestors(self.obo) self.assertEqual(len(m.data), 2) def test_trim_easy(self): """trimming eliminates root node.""" m = Entity("abc", "genes") d1 = PhenotypeDatum("DOID:4", Experiment(1, 0.8, 0.05)) d2 = PhenotypeDatum("DOID:11044", Experiment(1, 0.8, 0.05)) m.add(d1).add(d2) self.assertEqual(len(m.data), 2) m.trim_ancestors(self.obo) self.assertEqual(len(m.data), 1) self.assertEqual(m.data[0].phenotype, "DOID:11044") def test_trim_easy_keep(self): """trimming does not eliminate node if ask to keep.""" m = Entity("abc", "genes") d1 = PhenotypeDatum("DOID:4", Experiment(1, 0.8, 0.05)) d2 = PhenotypeDatum("DOID:11044", Experiment(1, 0.8, 0.05)) m.add(d1).add(d2) self.assertEqual(len(m.data), 2) m.trim_ancestors(self.obo, set(["DOID:4"])) self.assertEqual(len(m.data), 2) def test_trim_medium(self): """trimming eliminates when when there are several leafs.""" m = Entity("abc", "genes") d1 = PhenotypeDatum("DOID:4", Experiment(1, 0.8, 0.05)) d2 = PhenotypeDatum("DOID:11044", Experiment(1, 0.8, 0.05)) d3 = PhenotypeDatum("DOID:0080015", Experiment(1, 0.8, 0.05)) d4 = PhenotypeDatum("DOID:655", Experiment(1, 0.8, 0.05)) m.add(d1).add(d2).add(d3).add(d4) self.assertEqual(len(m.data), 4) m.trim_ancestors(self.obo) self.assertEqual(len(m.data), 2) result = set([_.phenotype for _ in m.data]) self.assertEqual(result, set(["DOID:11044", "DOID:655"])) def test_trim_leave_highvalue(self): """trimming eliminates when when there are several leafs.""" m = Entity("abc", "genes") d1 = PhenotypeDatum("DOID:4", Experiment(1, 0.8, 0.05)) d2 = PhenotypeDatum("DOID:11044", Experiment(0.5, 0.8, 0.05)) d3 = PhenotypeDatum("DOID:0080015", Experiment(1, 0.8, 0.05)) d4 = PhenotypeDatum("DOID:655", Experiment(1, 0.8, 0.05)) m.add(d1).add(d2).add(d3).add(d4) self.assertEqual(len(m.data), 4) m.trim_ancestors(self.obo) self.assertEqual(len(m.data), 3) result = set([_.phenotype for _ in m.data]) self.assertEqual(result, set(["DOID:11044", "DOID:655", "DOID:0080015"]))
class OboCoreTests(unittest.TestCase): """Test cases for class Obo - core capabilities with minimal data parsing""" # load file with small ontology, skip non-essential fields obo = MinimalObo(smallfile) def test_obo_ids(self): """identify ids in a small ontology""" self.assertEqual(self.obo.ids(True), allids, "parsing should identify all ids in file") self.assertEqual(self.obo.ids(), curids, "parsing should identify all ids in file") def test_obo_dealing_with_none(self): """object None is handled gracefully via has and valid""" self.assertFalse(self.obo.has(None)) self.assertFalse(self.obo.valid(None)) def test_obo_has(self): """Extracting term ids from obo.""" # test a few keys that are present self.assertTrue(self.obo.has("DOID:4")) self.assertTrue(self.obo.has("DOID:0014667")) self.assertTrue(self.obo.has("DOID:3653")) # test a few keys that are not present self.assertFalse(self.obo.has("wrongkey")) def test_obo_valid(self): """Extracting term ids from obo.""" # test a few keys that are present self.assertTrue(self.obo.valid("DOID:4")) self.assertTrue(self.obo.valid("DOID:0014667")) # test a few keys that are not present self.assertFalse(self.obo.valid("wrongkey")) # test an item that is present but is obsolete self.assertFalse(self.obo.valid("DOID:3653")) def test_obo_parents_of_root(self): """Getting parent structure.""" self.assertEqual(self.obo.parents("DOID:4"), (), "root has no parents") def test_obo_parents_valid(self): """Getting parent structure requires valid term""" with self.assertRaises(Exception) as e: self.obo.parents("bad_id") self.assertTrue("not present" in str(e.exception)) def test_obo_parents_of_nonroot_nodes(self): """Getting parent structure.""" self.assertEqual(self.obo.parents("DOID:0014667"), ("DOID:4", ), "parent is root") self.assertEqual(self.obo.parents("DOID:3650"), ("DOID:0060158", ), "parent is some other node") def test_obo_ancestors(self): """Getting ancestors structure.""" self.assertEqual(sorted(self.obo.ancestors("DOID:3650")), ["DOID:0014667", "DOID:0060158", "DOID:4"], "ancestor traversal to root") def test_obo_parents_of_obsolete(self): """Getting parent structure.""" self.assertEqual(self.obo.parents("DOID:3653"), (), "obsolete term here has no parent") def test_obo_children_of_root(self): """Getting inferred children.""" self.assertEqual(sorted(self.obo.children("DOID:4")), ["DOID:0014667", "DOID:0080015", "DOID:11044"], "root has two direct children and one indirect") def test_obo_children_of_leaf(self): """Getting parent structure.""" self.assertEqual(self.obo.children("DOID:3650"), (), "no children") def test_obo_descendants(self): """Getting parent structure.""" self.assertEqual(sorted(self.obo.descendants("DOID:0014667")), ["DOID:0060158", "DOID:3650", "DOID:655"], "descendant traversal to leaves") def test_siblings_only_child(self): """Getting siblings from root or single child should be empty set.""" self.assertEqual(self.obo.siblings("DOID:4"), ()) self.assertEqual(self.obo.siblings("DOID:3650"), ()) def test_siblings(self): """Getting siblings from node with siblings.""" self.assertEqual(self.obo.siblings("DOID:655"), ("DOID:0060158", )) self.assertEqual(self.obo.siblings("DOID:0060158"), ("DOID:655", )) # for the next two, siblings also include nodes using shortcuts to a parent self.assertEqual(sorted(self.obo.siblings("DOID:0014667")), ["DOID:0080015", "DOID:11044"]) self.assertEqual(sorted(self.obo.siblings("DOID:0080015")), ["DOID:0014667", "DOID:11044"]) def test_sim_simple(self): """computing similarity of two terms using ancestors.""" sim1 = self.obo.sim_jaccard("DOID:0014667", "DOID:0080015") self.assertEqual(sim1, 1 / 3, "both terms are direct children on DOID:4") sim2 = self.obo.sim_jaccard("DOID:655", "DOID:0060158") self.assertGreater(sim2, sim1, "second set of terms is more specific") self.assertLess(sim2, 1, "terms are not identical so <1") def test_sim_self(self): """computing similarity a term with itself.""" sim = self.obo.sim_jaccard("DOID:3650", "DOID:3650") self.assertEqual(sim, 1, "should be 1 by definition") def test_sim_distant(self): """computing similarity a term with another distant term.""" sim = self.obo.sim_jaccard("DOID:3650", "DOID:11044") self.assertLess(sim, 0.2, "only root is shared") def test_obo_replaced_by_none(self): """Getting parent structure.""" self.assertEqual(self.obo.replaced_by("DOID:4"), None, "active term is not replaced by anything")
def setUp(self): """For setup, load a small ontology.""" self.obo = MinimalObo(self.obofile) self.obodef = dict.fromkeys(self.obo.ids(), 0.2)
from phenoprep.priors import get_priors_from_reps from phenoprep.write import write_priors from scoring.representation import Representation from tools.files import open_file from ..testhelpers import remove_if_exists testdir = join("tests", "testdata") mgi_file = join(testdir, "MGI-GP-small.rpt") out_prefix = join(testdir, "test-prep-MGI") priors_file = out_prefix + "-priors.tsv.gz" # standard ontology obo_file = join(testdir, "Y.obo") obo = MinimalObo(obo_file) # extended ontologies with intermediate terms ext1_obo_file = join(testdir, "Y.ext1.obo") ext1_obo = MinimalObo(ext1_obo_file) ext2_obo_file = join(testdir, "Y.ext2.obo") ext2_obo = MinimalObo(ext2_obo_file) class PriorsTests(unittest.TestCase): """Test cases for obtaining prior probabilities for phenotypes""" def setUp(self): self.models = prep_MGI(mgi_file, (0.8, 0.05), obo) def tearDown(self):
# Execute the program if module is used as an executable if __name__ == "__main__": config = parser.parse_args() tprfpr = (config.tpr, config.fpr) fe = filter_entities fe_cat = filter_entities_cat threshold = config.threshold if config.action == "MGI": # action to parse mouse phenotype models from MGI check_file(config.input, required="input") check_file(config.obo) obo = MinimalObo(config.obo) models = prep_MGI(config.input, tprfpr, obo) # write out all models and subsets genotype_models = fe_cat(models, set(["genotype"])) marker_models = fe_cat(models, set(["marker"])) write_models(genotype_models, config.output + "-genotype-universal") write_models(marker_models, config.output + "-marker-universal") # compute and write priors based on certain types of models categories = set(config.priors.split(",")) priors, num_models = get_priors_from_models(models, categories, obo, dark=config.dark_count) print("Number of models used to inform prior: " + str(num_models)) write_priors(priors, config.output)