예제 #1
0
    def test_positive_parent_multi(self):
        """fetching a parent term when terms have multiple parents."""

        # load an ontology in which Y7 is connected to both Y2 and Y1
        Yfile = join(testdir, "Ymulti.obo")
        Yobo = MinimalObo(Yfile)
        Ydefaults = dict.fromkeys(Yobo.ids(), 0.0001)
        Ydefaults["Y:003"] = 0.0002
        Ydefaults["Y:005"] = 0.0002
        # make slight variations of representations
        rs = ReferenceSet(dict(refA=0.5, refB=0.5),
                          ids=Yobo.ids(),
                          row_priors=Ydefaults)
        refA = Representation(name="refA")
        refA.set("Y:002", 0.5).set("Y:005", 1).impute(Yobo, Ydefaults)
        refB = Representation(name="refB")
        refB.set("Y:001", 0.5).impute(Yobo, Ydefaults)
        rs.add(refA).add(refB)
        rs.learn_obo(Yobo)

        self.assertEqual(
            rs._positive_ancestor(rs.columns["refA"], rs.rows["Y:007"]),
            rs.rows["Y:002"], "Y2 is a positive ancestor")
        self.assertEqual(
            rs._positive_ancestor(rs.columns["refB"], rs.rows["Y:007"]),
            rs.rows["Y:001"], "Y1 is a positive immediate parent")
예제 #2
0
    def test_obo_canonical(self):
        """official ids are canonical ids."""

        obo = Obo(alts_file)
        self.assertEqual(obo.canonical("AA:1"), "AA:1")
        self.assertEqual(obo.canonical("AA:2"), "AA:2")
        minobo = MinimalObo(alts_file)
        self.assertEqual(minobo.canonical("AA:1"), "AA:1")
        self.assertEqual(minobo.canonical("AA:2"), "AA:2")
예제 #3
0
 def setUpClass(cls):
     """For setup, ensure db does not exist."""
                     
     config = CompleteTestConfig()
     config.null_prior = 0.2
     cls.dbfile = config.db        
     cls.pipeline = Phenoscoring(config)
     cls.pipeline.build()
     obopath = check_file(config.obo, config.db, "obo")
     cls.obo = MinimalObo(obopath, True)
     
     # a dummy set of default values
     cls.obodefaults = dict.fromkeys(cls.obo.ids(), 0.2)
     cls.obozeros = dict.fromkeys(cls.obo.ids(), 0)
     
     cls.ref_priors = get_ref_priors(config.db)
     cls.rs, cls.rs2 = get_refsets(config.db, ref_priors=cls.ref_priors)
     cls.rs.learn_obo(cls.obo)
     cls.rs2.learn_obo(cls.obo)
     
     # for testing individual configurations
     cls.y3model = Representation(name="Y3").set("Y:003", 0.8)        
     cls.refA = Representation(name="refA").set("Y:002", 1)
     cls.refA.defaults(cls.obozeros)
     cls.refB = Representation(name="refB").set("Y:002", 1)
     cls.refB.defaults(cls.obozeros)
예제 #4
0
    def test_parsing(self):
        """parsing should return model descriptions"""

        obo = MinimalObo(obo_file)
        models = prep_IMPC(impc_file, (0.8, 0.05), 0.01, obo=obo)

        # should get eight models
        # two marker, two allele; one zygosity
        # one set of 4 with positive phenoytpes,
        # one set of 4 with negative phenotypes
        # that is 8. Then x3 for universal/male/female
        self.assertEqual(len(models), 24)

        alleles = set()
        markers = set()
        for _, value in models.items():
            alleles.add(value.description["allele_id"])
            markers.add(value.description["marker_id"])

        self.assertEqual(len(alleles), 2)
        self.assertEqual(len(markers), 2)

        # count the male and female models
        males, females, unspecified = 0, 0, 0
        for _, value in models.items():
            sex = value.description["sex"]
            if sex == "M":
                males += 1
            elif sex == "F":
                females += 1
            elif sex:
                unspecified += 1
        self.assertEqual(males, females, "should be paired and equal")
        self.assertEqual(males, unspecified, "should be paired and equal")
        self.assertGreater(males, 0)
예제 #5
0
    def build(self):
        """create a db for phenoscoring, includes setup and table-filling."""

        # create db with empty tables
        dbpath, config = self.setup()

        # avoid work if setup decided db exists and build can be skipped
        if dbpath is None:
            return

        # check prerequisite files
        obopath = check_file(config.obo, dbpath, "obo")
        refpath = check_file(config.reference_phenotypes, dbpath,
                             "reference_phenotypes")
        freqpath = check_file(config.phenotype_frequencies, dbpath,
                              "phenotype_frequencies")

        self.logger.msg1("Loading ontology")
        obo = MinimalObo(obopath, True)

        self.logger.msg1("Preparing phenotype frequencies")
        fill_phenotype_frequency_table(dbpath, freqpath)

        # fill database with data
        self.logger.msg1("Preparing references")
        fill_concise_reference_table(dbpath, refpath)
        fill_complete_reference_table(dbpath, obo, config)

        self._end()
예제 #6
0
    def test_scaled_cooc(self):
        """write out cooc matrices"""

        obo = MinimalObo(obo_file)
        models = prep_IMPC(impc_file, (0.8, 0.05), 0.01, obo=obo)
        models_allele = get_UA_models(models, "allele")
        observed = get_models_by_phenotype(models_allele, 1)
        # write out various types of cooc matrices
        cooc_full, phenindex = make_scaled_cooc(observed, obo, 0, "full")
        write_phenotype_cooc(cooc_full, phenindex, out_prefix + "-full")
        cooc_freq, phenindex = make_scaled_cooc(observed, obo, 0, "freq")
        write_phenotype_cooc(cooc_freq, phenindex, out_prefix + "-freq")
        cooc_simJ, phenindex = make_scaled_cooc(observed, obo, 0, "simJ")
        write_phenotype_cooc(cooc_simJ, phenindex, out_prefix + "-simJ")

        # check the numerics of the matrices
        for p1, i1 in phenindex.items():
            for p2, i2 in phenindex.items():
                observed = cooc_full[i1, i2]
                expected = cooc_freq[i1, i2] * (1 - cooc_simJ[i1, i2])
                self.assertEqual(observed, expected)

        # check all the files exist
        self.assertTrue(exists(cooc_full_file))
        self.assertTrue(exists(cooc_freq_file))
        self.assertTrue(exists(cooc_simJ_file))

        # very gently (not rigorously check content of files)
        with open_file(cooc_full_file, "rt") as f:
            full = f.read().strip().split("\n")
        self.assertGreater(len(full), 2)
예제 #7
0
    def export_representations(self):
        """write matrix representations for models and refs to disk"""

        dbpath, config = self._start()
        self.logger.msg1("Loading ontology")
        obo_path = check_file(config.obo, dbpath, "obo")
        self.obo = MinimalObo(obo_path, True)
        self._export_reference_representations()
        self._export_model_representations(config)
        self._end()
예제 #8
0
    def explain(self):
        """Perform a verbose calculation of inference scores.
        
        The prep for this function is similar as for compute().
        Once the relevant data is loaded from the db, the calculations
        are performed and recorded manually.
        """

        self.logger.verbose = False
        dbpath, config = self._start()

        if config.explain not in ["specific", "general"]:
            return "--explain must be 'general' or 'specific'"
        config.obo = check_file(config.obo, dbpath, "obo")

        # allow user to pass several model/reference pairs
        models = config.model.split(",")
        references = config.reference.split(",")
        M = len(models)

        if len(references) != M:
            raise Exception("incompatible number of models and references")

        # use the packet to load information from the db, refset and models
        packet = prep_compute_packets(self.config,
                                      references=references,
                                      models=models,
                                      partition_size=M)[0]
        packet.prep()
        refset = packet.general_refset
        if config.explain == "specific":
            refset = packet.specific_refset
        refset.learn_obo(MinimalObo(config.obo))

        allresults = [None] * M
        for i, (modelid, refid) in enumerate(zip(models, references)):
            data = packet.models[modelid]
            result = refset.inference_chain(data,
                                            refid,
                                            verbose=True,
                                            fp_penalty=config.fp_penalty)
            allresults[i] = result.to_json(nodata=config.explain_nodata)

        return "[" + (",".join(allresults)) + "]"
예제 #9
0
    def test_minimal_obo_converts_alts(self):
        """converts alt ids into canonical ids - using MinimalObo class."""

        obo = MinimalObo(alts_file)
        # the next three ids are defined in the obo
        self.assertEqual(obo.canonical("AA:02"), "AA:2")
        self.assertEqual(obo.canonical("AA:03"), "AA:3")
        self.assertEqual(obo.canonical("AA:003"), "AA:3")
        # second time should use cache
        self.assertEqual(obo.canonical("AA:003"), "AA:3")
        # the next few are not in the obo
        self.assertEqual(obo.canonical("AA:000"), None)
        self.assertEqual(obo.canonical("AA:002"), None)
예제 #10
0
    def test_writing(self):
        """write the parsed MGI data onto files"""

        obo = MinimalObo(obo_file)
        models = prep_IMPC(impc_file, (0.8, 0.05), 0.01, obo=obo)
        write_models(models, out_prefix)

        # read contents back
        self.assertTrue(exists(desc_file))
        self.assertTrue(exists(pheno_file))
        with open_file(desc_file, "rt") as f:
            desc = f.read().strip().split("\n")
        with open_file(pheno_file, "rt") as f:
            pheno = f.read().strip().split("\n")

        # description file should have 25 lines, 24 data lines plus header
        self.assertEqual(len(desc), 25)
        # phenotype file should have at least 7 lines (more)
        self.assertGreater(len(pheno), 7)
예제 #11
0
    def test_imputing(self):
        """create new models based on UA."""

        obo = MinimalObo(obo_file)
        models = prep_IMPC(impc_file, (0.8, 0.05), 0.01, obo=obo)
        models_allele = get_UA_models(models, "allele")
        imputed = impute_IMPC(models_allele, obo, 0)
        write_models(imputed, out_prefix + "-imputed")

        # check output files exist and contain proper content
        self.assertTrue(exists(imputed_desc_file))
        self.assertTrue(exists(imputed_pheno_file))
        with open_file(imputed_desc_file, "rt") as f:
            desc = f.read().strip().split("\n")
        with open_file(imputed_pheno_file, "rt") as f:
            pheno = f.read().strip().split("\n")

        # description file should have 3 lines, 2 desc lines plus header
        self.assertEqual(len(desc), 3)
        # phenotype file should have a few lines
        self.assertGreater(len(pheno), 3)
예제 #12
0
    def prep(self):
        """loads initial data from the db."""

        config = self.config
        dbpath = config.db
        # load the ontology
        obo = MinimalObo(config.obo)
        # prepare information about references
        self.phen_priors = get_phenotype_priors(dbpath)
        self.ref_priors = get_ref_priors(dbpath, self.references)
        general, specific = get_refsets(dbpath, ref_priors=self.ref_priors)
        # assign an ontology for reasoning
        specific.learn_obo(obo)
        general.learn_obo(obo)
        self.general_refset = general
        self.specific_refset = specific
        # transfer model phenotypes
        model_names = list(self.models.keys())
        self.models = get_model_representations(dbpath,
                                                obo,
                                                log=self.log,
                                                log_prefix=self.run_msg,
                                                model_names=model_names)
예제 #13
0
class RepresentationTests(unittest.TestCase):
    """Test cases for class Representation."""

    defaults = dict(abc=0.1, xyz=0.1)

    # file with a small obo-formatted ontology
    obofile = os.path.join("tests", "testdata", "small.obo")

    def setUp(self):
        """For setup, load a small ontology."""
        self.obo = MinimalObo(self.obofile)
        self.obodef = dict.fromkeys(self.obo.ids(), 0.2)

    def test_empty_representation(self):
        """setting and getting from a generic representation."""

        rr = Representation()
        self.assertEqual(len(rr.data), 0, "representation should be empty")
        self.assertEqual(rr.name, None,
                         "representation should not have a name")

    def test_named_representation(self):
        """setting and getting from a generic representation."""

        rr = Representation(dict(abc=1), name="rr")
        self.assertEqual(rr.name, "rr", "rep should have a name")

    def test_str(self):
        """setting and getting from a generic representation."""

        ss = str(Representation(name="ABC XYZ"))
        self.assertRegex(ss, "XYZ", "representation string should have name")

    def test_set_feature(self):
        """can set and retrieve values"""

        rr = Representation()
        rr.set("abc", 0.2)
        self.assertEqual(rr.get("abc"), 0.2)

    def test_set_feature_float(self):
        """can set and retrieve values"""

        rr = Representation()
        rr.set("abc", 1)
        self.assertEqual(rr.get("abc"), 1.0)
        self.assertTrue(type(rr.get("abc")) is float)

    def test_init_float(self):
        """initializing with a dict ensure float values"""

        rr = Representation(dict(abc=1), name="rr")
        self.assertEqual(rr.get("abc"), 1.0)
        self.assertTrue(type(rr.get("abc")) is float)

    def test_general_representation_get(self):
        """setting and getting from a generic representation."""

        rr = Representation(dict(xyz=0.2))
        rr.set("bob", 0.4)
        rr.set("xyz", 0.3)
        rr.defaults(self.defaults)
        self.assertEqual(rr.get("bob"), 0.4,
                         "value should come from manual input")
        self.assertEqual(rr.get("abc"), 0.1,
                         "value should come from defaults dict")
        self.assertEqual(rr.get("xyz"), 0.3,
                         "value should come from manual override")

    def test_keys(self):
        """setting and getting from a generic representation."""

        rr = Representation(dict(xyz=0.2))
        rr.set("bob", 0.4)
        rr.set("xyz", 0.3)
        rr.defaults(self.defaults)
        self.assertEqual(rr.keys(), ["abc", "xyz", "bob"],
                         "keys should have defaults and non-defaults")

    def test_has(self):
        """querying whether a value has been set."""

        rr = Representation(dict(xyz=0.2))
        rr.set("bob", 0.4)
        self.assertTrue(rr.has("xyz"), "set in constructor")
        self.assertTrue(rr.has("bob"), "set manually")
        self.assertFalse(rr.has("alice"), "not set")

    def test_equality(self):
        """checking content of representations."""

        r1 = Representation(self.defaults, name="hello")
        r2 = Representation(self.defaults, name="hello")
        r3 = Representation(self.defaults, name="bye")
        r4 = Representation(self.defaults, name="hello")
        r4.set("abc", 100)
        r5 = Representation()
        r6 = Representation(self.defaults, name="hello")
        r6.set("qqq", 20)

        self.assertTrue(r1.equal(r2), "all is the same")
        self.assertFalse(r1.equal(5), "argument is not a Representation")
        self.assertFalse(r1.equal(r3), "same content, but different name")
        self.assertFalse(r1.equal(r4), "same name, but different content")
        self.assertFalse(r1.equal(r5), "r5 is empty")
        self.assertFalse(r1.equal(r6), "r6 has more keys")
        self.assertFalse(r1.equal(range(4)), "must compare to Representation")

    def test_general_representation_get2(self):
        """setting and getting from a generic representation."""

        # Similar to previous, but setting defaults before the specifics
        rr = Representation(dict(abc=0.1, xyz=0.2))
        rr.defaults(self.defaults)
        rr.set("bob", 0.4)
        rr.set("xyz", 0.3)
        self.assertEqual(rr.get("bob"), 0.4,
                         "value should come from manual input")
        self.assertEqual(rr.get("abc"), 0.1,
                         "value should come from defaults dict")
        self.assertEqual(rr.get("xyz"), 0.3,
                         "value should come from manual override")

    def test_impute_up(self):
        """updating values in representation via positive evidence."""

        rr = Representation(dict())
        rr.set("unrelated", 0.8)
        rr.set("DOID:0014667", 0.4)
        rr.impute(self.obo, self.obodef)

        self.assertEqual(rr.get("unrelated"),
                         0.8,
                         msg="out-of-ontology terms remain")
        self.assertEqual(rr.get("DOID:0014667"),
                         0.4,
                         msg="set value should remain")
        self.assertGreater(rr.get("DOID:4"),
                           0.4,
                           msg="ancestors should receive greater score")
        self.assertEqual(rr.get("DOID:0080015"),
                         0.2,
                         msg="unrelated terms get default")
        self.assertEqual(rr.get("DOID:655"),
                         0.2,
                         msg="children are unaffected")

    def test_impute_up_always_increases(self):
        """updating values in representation via positive evidence."""

        rr = Representation(dict())
        rr.set("DOID:3650", 0.25)
        defaults = self.obodef.copy()
        defaults["DOID:0014667"] = 0.5
        defaults["DOID:4"] = 1
        rr.impute(self.obo, defaults)

        self.assertEqual(rr.get("DOID:3650"), 0.25, "set value should remain")
        self.assertGreater(rr.get("DOID:0060158"), 0.25,
                           "ancestors should receive greater score")
        self.assertEqual(rr.get("DOID:655"), 0.2,
                         "unrelated should stay at default")
        # ancestor that has already a higher score than what is propagated
        self.assertGreater(
            rr.get("DOID:0014667"), 0.5,
            "ancestor should receive score greater than its prior")

    def test_impute_up_avoid_doubles(self):
        """updating values in representation via positive evidence in DAG"""

        rr = Representation(dict())
        # DOID:11044 in test ontology has two paths to root (DOID:4)
        # one is direct (a shortcut)
        # another path is through 0080015
        rr.set("DOID:11044", 0.4)
        rr.impute(self.obo, self.obodef)

        self.assertGreater(rr.get("DOID:0080015"), 0.2,
                           "ancestor should increase")
        self.assertAlmostEqual(
            rr.get("DOID:0080015"),
            rr.get("DOID:4"),
            msg="4 should get bumped once, despite two paths from 11044")

    def test_impute_down(self):
        """updating values in representation via negative evidence."""

        rr = Representation(dict())
        rr.set("unrelated", 0.8)
        rr.set("DOID:0014667", 0.05)
        rr.impute(self.obo, self.obodef)

        self.assertAlmostEqual(rr.get("unrelated"), 0.8,
                               "out-of-ontology terms remain")
        self.assertAlmostEqual(rr.get("DOID:0014667"), 0.05,
                               "set value should remain")
        self.assertAlmostEqual(rr.get("DOID:4"), 0.2,
                               "ancestors should remain")
        self.assertAlmostEqual(rr.get("DOID:0080015"), 0.2,
                               "unrelated terms get default")
        self.assertAlmostEqual(rr.get("DOID:655"), 0.05,
                               "children are unaffected")

    def test_impute_down_ordering(self):
        """updating values in representation via negative evidence."""

        r1 = Representation(dict())
        r1.set("DOID:3650", 0.01).set("DOID:0014667", 0.05)
        r2 = Representation(dict())
        r2.set("DOID:3650", 0.01).set("DOID:0014667", 0.05)
        # imputation down should not depend on order of the seeds
        r1.impute(self.obo, self.obodef, seeds=["DOID:3650", "DOID:0014667"])
        r2.impute(self.obo, self.obodef, seeds=["DOID:0014667", "DOID:3650"])

        self.assertEqual(r1.data, r2.data, "all values the same")

    def test_impute_fromseeds_highfirst(self):
        """imputing values from manually-specified seeds."""

        rr = Representation(dict())
        # specify data for two children, DOID:4 is higher in tree, so should gain
        rr.set("DOID:0014667", 0.4)
        rr.set("DOID:0080015", 0.3)

        rr.impute(self.obo,
                  self.obodef,
                  seeds=["DOID:0014667", "DOID:0080015"])

        self.assertAlmostEqual(rr.get("DOID:0014667"),
                               0.4,
                               msg="should remain")
        self.assertAlmostEqual(rr.get("DOID:0080015"),
                               0.3,
                               msg="should remain")
        self.assertAlmostEqual(
            rr.get("DOID:4"),
            1 - ((1 - 0.4) * (1 - 0.3) * (1 - 0.2)),
            msg="ancestor gains from two children (and prior)")
        self.assertAlmostEqual(rr.get("DOID:655"),
                               0.2,
                               msg="remain; new DOID:4")

    def test_impute_fromseeds_lowfirst(self):
        """imputing values from manually-specified seeds."""

        rr = Representation(dict())
        ## specify an inconsistent set of values, DOID:4 is higher in tree, so cannot
        ## have a lower value than DOID:0014667
        rr.set("DOID:0014667", 0.4).set("DOID:4", 0.1)
        rr.impute(self.obo, self.obodef, seeds=["DOID:4", "DOID:0014667"])
        self.assertEqual(rr.get("DOID:0080015"), 0.1, "child of DOID:4")
        self.assertEqual(rr.get("DOID:655"), 0.1, "child of DOID:4")

    def test_impute_fromseeds_auto(self):
        """imputing values from automatically-ordered seeds."""

        # specify an inconsistent set of values, DOID:4 is higher in tree, so cannot
        # have a lower value than DOID:0014667
        # However, low DOID:4 can impact on other branches
        rr1 = Representation(dict())
        rr1.set("DOID:0014667", 0.4).set("DOID:4", 0.1)
        rr1.impute(self.obo, self.obodef)

        # auto seeds
        rr2 = Representation(dict())
        rr2.set("DOID:0014667", 0.4).set("DOID:4", 0.1)
        rr2.impute(self.obo, self.obodef)

        # auto seeds, different initial ordering
        rr3 = Representation(dict())
        rr3.set("DOID:4", 0.1).set("DOID:0014667", 0.4)
        rr3.impute(self.obo, self.obodef)

        self.assertTrue(rr1.data == rr2.data,
                        "auto and manual should have same data")
        self.assertTrue(rr2.data == rr3.data,
                        "should be = regardless of input order")

        self.assertGreater(rr1.data["DOID:0014667"], 0.2,
                           "DOID:0014667 increase by direct evidence")
        self.assertGreater(rr1.data["DOID:4"], 0.2,
                           "DOID:4 increases driven by 0014667")
        self.assertEqual(rr1.data["DOID:11044"], 0.1,
                         "low raw DOID:4 propagates down")

    def test_sum_with_impute(self):
        """sum of values associated with the representation."""

        rr = Representation(dict())
        rr.set("DOID:0014667", 1)
        sum1 = rr.sum()
        rr.impute(self.obo, self.obodef)
        sum2 = rr.sum()

        self.assertEqual(sum1, 1, "value of one phenotype")
        self.assertGreater(sum2,
                           2,
                           msg="value for one phenotype+ancestors+defaults")

    def test_copy(self):
        """can copy a representation into a new object."""

        r1 = Representation(self.defaults, name="hello")
        r1.set("abc", 0.5)
        result = r1.copy()

        self.assertEqual(r1.name, result.name)
        self.assertEqual(r1.get("abc"), result.get("abc"))
        result.set("abc", 0.75)
        self.assertEqual(r1.get("abc"), 0.5)
        self.assertEqual(result.get("abc"), 0.75)
예제 #14
0
    def setUp(self):
        """have a target ontology ready"""

        self.obo = MinimalObo(obo_file)
        self.emapa = get_emapa_map(emapa_file, self.obo)
예제 #15
0
from phenoprep.prep_refs import prep_refs, get_oo_map, valid_reference_id
from phenoprep.prep_refs import prep_tech_models
from phenoprep.write import write_references
from obo.obo import MinimalObo
from ..testhelpers import remove_if_exists
from tools.files import open_file

testdir = join("tests", "testdata")
refs_file = join(testdir, "phenotab-small.tab")
obo_file = join(testdir, "Y.obo")
oo_file = join(testdir, "owlsim-small.txt")
k = 2
out_prefix = join("tests", "testdata", "test-prep-phenotab")
out_file = out_prefix + "-phenotypes.tsv.gz"

obo = MinimalObo(obo_file, True)


class ReferenceIdTests(unittest.TestCase):
    """Test cases for function valid_reference_id."""
    
    def test_accepts(self):
        """function accepts proper reference ids."""
                    
        self.assertTrue(valid_reference_id("OMIM:10001"))
        self.assertTrue(valid_reference_id("DECIPHER:10"))
        self.assertTrue(valid_reference_id("ORPHANET:9002"))
        self.assertTrue(valid_reference_id("ORPHA:9002"))
        self.assertTrue(valid_reference_id("DISEASE:1"))

    def test_rejects(self):
예제 #16
0
import json
import unittest
from collections import OrderedDict
from os.path import join, exists
from scoring.referenceset import ReferenceSet
from scoring.referencematrix import ReferenceMatrix
from scoring.representation import Representation
from obo.obo import MinimalObo
from tests.testhelpers import remove_if_exists

testfeatures = ["a", "b", "c", "d", "e"]

# define an ontology for these tests
testdir = join("tests", "testdata")
obofile = join(testdir, "small.obo")
obo = MinimalObo(obofile)
obodefaults = dict.fromkeys(obo.ids(), 0.0)

# another ontology Y.obo
Yfile = join(testdir, "Y.obo")
Yobo = MinimalObo(Yfile)
Ydefaults = dict.fromkeys(Yobo.ids(), 0.2)
null_defaults = dict.fromkeys(Yobo.ids(), 0.001)

# set of features for this set of tests
defvals = dict.fromkeys(testfeatures, 0.2)
zerovals = dict.fromkeys(testfeatures, 0.0)
nanvals = dict.fromkeys(testfeatures, float("nan"))

# prefix for output files
outfile = join(testdir, "small_out")
예제 #17
0
import unittest
from os.path import join, exists
from obo.obo import MinimalObo
from phenoprep.prep_mgi import prep_MGI
from phenoprep.write import write_models
from ..testhelpers import remove_if_exists
from tools.files import open_file

testdir = join("tests", "testdata")
mgi_file = join(testdir, "MGI-GP-small.rpt")
obo_file = join(testdir, "Y.obo")
out_prefix = join(testdir, "test-prep-MGI")
desc_file = out_prefix + "-models.tsv.gz"
pheno_file = out_prefix + "-phenotypes.tsv.gz"
obo = MinimalObo(obo_file)


class PrepMGITests(unittest.TestCase):
    """Test cases for parsing information from MGI tables"""
    def tearDown(self):
        """remove any written-out files (if generated)"""
        remove_if_exists(desc_file)
        remove_if_exists(pheno_file)
        pass

    def test_inputs(self):
        """check that input file exists"""

        self.assertEqual(exists(mgi_file), True, "input file should exist")
예제 #18
0
class PhenoscoringEntityTrimmingTests(unittest.TestCase):
    """Test cases for trimming phenotypes out of entity objects"""

    obofile = join("tests", "testdata", "small.obo")
    obo = MinimalObo(obofile)

    def test_trim_nothing(self):
        """trimming does nothing if there is nothing to do."""

        m = Entity("abc", "genes")
        d1 = PhenotypeDatum("DOID:3650", Experiment(1, 0.8, 0.05))
        d2 = PhenotypeDatum("DOID:11044", Experiment(1, 0.8, 0.05))
        m.add(d1).add(d2)

        self.assertEqual(len(m.data), 2)
        m.trim_ancestors(self.obo)
        self.assertEqual(len(m.data), 2)

    def test_trim_easy(self):
        """trimming eliminates root node."""

        m = Entity("abc", "genes")
        d1 = PhenotypeDatum("DOID:4", Experiment(1, 0.8, 0.05))
        d2 = PhenotypeDatum("DOID:11044", Experiment(1, 0.8, 0.05))
        m.add(d1).add(d2)

        self.assertEqual(len(m.data), 2)
        m.trim_ancestors(self.obo)
        self.assertEqual(len(m.data), 1)
        self.assertEqual(m.data[0].phenotype, "DOID:11044")

    def test_trim_easy_keep(self):
        """trimming does not eliminate node if ask to keep."""

        m = Entity("abc", "genes")
        d1 = PhenotypeDatum("DOID:4", Experiment(1, 0.8, 0.05))
        d2 = PhenotypeDatum("DOID:11044", Experiment(1, 0.8, 0.05))
        m.add(d1).add(d2)

        self.assertEqual(len(m.data), 2)
        m.trim_ancestors(self.obo, set(["DOID:4"]))
        self.assertEqual(len(m.data), 2)

    def test_trim_medium(self):
        """trimming eliminates when when there are several leafs."""

        m = Entity("abc", "genes")
        d1 = PhenotypeDatum("DOID:4", Experiment(1, 0.8, 0.05))
        d2 = PhenotypeDatum("DOID:11044", Experiment(1, 0.8, 0.05))
        d3 = PhenotypeDatum("DOID:0080015", Experiment(1, 0.8, 0.05))
        d4 = PhenotypeDatum("DOID:655", Experiment(1, 0.8, 0.05))
        m.add(d1).add(d2).add(d3).add(d4)

        self.assertEqual(len(m.data), 4)
        m.trim_ancestors(self.obo)
        self.assertEqual(len(m.data), 2)
        result = set([_.phenotype for _ in m.data])
        self.assertEqual(result, set(["DOID:11044", "DOID:655"]))

    def test_trim_leave_highvalue(self):
        """trimming eliminates when when there are several leafs."""

        m = Entity("abc", "genes")
        d1 = PhenotypeDatum("DOID:4", Experiment(1, 0.8, 0.05))
        d2 = PhenotypeDatum("DOID:11044", Experiment(0.5, 0.8, 0.05))
        d3 = PhenotypeDatum("DOID:0080015", Experiment(1, 0.8, 0.05))
        d4 = PhenotypeDatum("DOID:655", Experiment(1, 0.8, 0.05))
        m.add(d1).add(d2).add(d3).add(d4)

        self.assertEqual(len(m.data), 4)
        m.trim_ancestors(self.obo)
        self.assertEqual(len(m.data), 3)
        result = set([_.phenotype for _ in m.data])
        self.assertEqual(result,
                         set(["DOID:11044", "DOID:655", "DOID:0080015"]))
예제 #19
0
class OboCoreTests(unittest.TestCase):
    """Test cases for class Obo - core capabilities with minimal data parsing"""

    # load file with small ontology, skip non-essential fields
    obo = MinimalObo(smallfile)

    def test_obo_ids(self):
        """identify ids in a small ontology"""

        self.assertEqual(self.obo.ids(True), allids,
                         "parsing should identify all ids in file")
        self.assertEqual(self.obo.ids(), curids,
                         "parsing should identify all ids in file")

    def test_obo_dealing_with_none(self):
        """object None is handled gracefully via has and valid"""

        self.assertFalse(self.obo.has(None))
        self.assertFalse(self.obo.valid(None))

    def test_obo_has(self):
        """Extracting term ids from obo."""

        # test a few keys that are present
        self.assertTrue(self.obo.has("DOID:4"))
        self.assertTrue(self.obo.has("DOID:0014667"))
        self.assertTrue(self.obo.has("DOID:3653"))
        # test a few keys that are not present
        self.assertFalse(self.obo.has("wrongkey"))

    def test_obo_valid(self):
        """Extracting term ids from obo."""

        # test a few keys that are present
        self.assertTrue(self.obo.valid("DOID:4"))
        self.assertTrue(self.obo.valid("DOID:0014667"))
        # test a few keys that are not present
        self.assertFalse(self.obo.valid("wrongkey"))
        # test an item that is present but is obsolete
        self.assertFalse(self.obo.valid("DOID:3653"))

    def test_obo_parents_of_root(self):
        """Getting parent structure."""

        self.assertEqual(self.obo.parents("DOID:4"), (), "root has no parents")

    def test_obo_parents_valid(self):
        """Getting parent structure requires valid term"""

        with self.assertRaises(Exception) as e:
            self.obo.parents("bad_id")
        self.assertTrue("not present" in str(e.exception))

    def test_obo_parents_of_nonroot_nodes(self):
        """Getting parent structure."""

        self.assertEqual(self.obo.parents("DOID:0014667"), ("DOID:4", ),
                         "parent is root")
        self.assertEqual(self.obo.parents("DOID:3650"), ("DOID:0060158", ),
                         "parent is some other node")

    def test_obo_ancestors(self):
        """Getting ancestors structure."""

        self.assertEqual(sorted(self.obo.ancestors("DOID:3650")),
                         ["DOID:0014667", "DOID:0060158", "DOID:4"],
                         "ancestor traversal to root")

    def test_obo_parents_of_obsolete(self):
        """Getting parent structure."""

        self.assertEqual(self.obo.parents("DOID:3653"), (),
                         "obsolete term here has no parent")

    def test_obo_children_of_root(self):
        """Getting inferred children."""

        self.assertEqual(sorted(self.obo.children("DOID:4")),
                         ["DOID:0014667", "DOID:0080015", "DOID:11044"],
                         "root has two direct children and one indirect")

    def test_obo_children_of_leaf(self):
        """Getting parent structure."""

        self.assertEqual(self.obo.children("DOID:3650"), (), "no children")

    def test_obo_descendants(self):
        """Getting parent structure."""

        self.assertEqual(sorted(self.obo.descendants("DOID:0014667")),
                         ["DOID:0060158", "DOID:3650", "DOID:655"],
                         "descendant traversal to leaves")

    def test_siblings_only_child(self):
        """Getting siblings from root or single child should be empty set."""

        self.assertEqual(self.obo.siblings("DOID:4"), ())
        self.assertEqual(self.obo.siblings("DOID:3650"), ())

    def test_siblings(self):
        """Getting siblings from node with siblings."""

        self.assertEqual(self.obo.siblings("DOID:655"), ("DOID:0060158", ))
        self.assertEqual(self.obo.siblings("DOID:0060158"), ("DOID:655", ))
        # for the next two, siblings also include nodes using shortcuts to a parent
        self.assertEqual(sorted(self.obo.siblings("DOID:0014667")),
                         ["DOID:0080015", "DOID:11044"])
        self.assertEqual(sorted(self.obo.siblings("DOID:0080015")),
                         ["DOID:0014667", "DOID:11044"])

    def test_sim_simple(self):
        """computing similarity of two terms using ancestors."""

        sim1 = self.obo.sim_jaccard("DOID:0014667", "DOID:0080015")
        self.assertEqual(sim1, 1 / 3,
                         "both terms are direct children on DOID:4")
        sim2 = self.obo.sim_jaccard("DOID:655", "DOID:0060158")
        self.assertGreater(sim2, sim1, "second set of terms is more specific")
        self.assertLess(sim2, 1, "terms are not identical so <1")

    def test_sim_self(self):
        """computing similarity a term with itself."""

        sim = self.obo.sim_jaccard("DOID:3650", "DOID:3650")
        self.assertEqual(sim, 1, "should be 1 by definition")

    def test_sim_distant(self):
        """computing similarity a term with another distant term."""

        sim = self.obo.sim_jaccard("DOID:3650", "DOID:11044")
        self.assertLess(sim, 0.2, "only root is shared")

    def test_obo_replaced_by_none(self):
        """Getting parent structure."""

        self.assertEqual(self.obo.replaced_by("DOID:4"), None,
                         "active term is not replaced by anything")
예제 #20
0
 def setUp(self):
     """For setup, load a small ontology."""
     self.obo = MinimalObo(self.obofile)
     self.obodef = dict.fromkeys(self.obo.ids(), 0.2)
예제 #21
0
from phenoprep.priors import get_priors_from_reps
from phenoprep.write import write_priors
from scoring.representation import Representation
from tools.files import open_file
from ..testhelpers import remove_if_exists


testdir = join("tests", "testdata")

mgi_file = join(testdir, "MGI-GP-small.rpt")
out_prefix = join(testdir, "test-prep-MGI")
priors_file = out_prefix + "-priors.tsv.gz"

# standard ontology
obo_file = join(testdir, "Y.obo")        
obo = MinimalObo(obo_file)

# extended ontologies with intermediate terms
ext1_obo_file = join(testdir, "Y.ext1.obo")
ext1_obo = MinimalObo(ext1_obo_file)
ext2_obo_file = join(testdir, "Y.ext2.obo")
ext2_obo = MinimalObo(ext2_obo_file)


class PriorsTests(unittest.TestCase):
    """Test cases for obtaining prior probabilities for phenotypes"""        
        
    def setUp(self):
        self.models = prep_MGI(mgi_file, (0.8, 0.05), obo)
             
    def tearDown(self):
예제 #22
0
# Execute the program if module is used as an executable

if __name__ == "__main__":

    config = parser.parse_args()
    tprfpr = (config.tpr, config.fpr)
    fe = filter_entities
    fe_cat = filter_entities_cat
    threshold = config.threshold

    if config.action == "MGI":
        # action to parse mouse phenotype models from MGI

        check_file(config.input, required="input")
        check_file(config.obo)
        obo = MinimalObo(config.obo)
        models = prep_MGI(config.input, tprfpr, obo)
        # write out all models and subsets
        genotype_models = fe_cat(models, set(["genotype"]))
        marker_models = fe_cat(models, set(["marker"]))
        write_models(genotype_models, config.output + "-genotype-universal")
        write_models(marker_models, config.output + "-marker-universal")
        # compute and write priors based on certain types of models
        categories = set(config.priors.split(","))
        priors, num_models = get_priors_from_models(models,
                                                    categories,
                                                    obo,
                                                    dark=config.dark_count)
        print("Number of models used to inform prior: " + str(num_models))
        write_priors(priors, config.output)