Exemple #1
0
    def test_parsing(self):
        """parsing should return model descriptions"""

        obo = MinimalObo(obo_file)
        models = prep_IMPC(impc_file, (0.8, 0.05), 0.01, obo=obo)

        # should get eight models
        # two marker, two allele; one zygosity
        # one set of 4 with positive phenoytpes,
        # one set of 4 with negative phenotypes
        # that is 8. Then x3 for universal/male/female
        self.assertEqual(len(models), 24)

        alleles = set()
        markers = set()
        for _, value in models.items():
            alleles.add(value.description["allele_id"])
            markers.add(value.description["marker_id"])

        self.assertEqual(len(alleles), 2)
        self.assertEqual(len(markers), 2)

        # count the male and female models
        males, females, unspecified = 0, 0, 0
        for _, value in models.items():
            sex = value.description["sex"]
            if sex == "M":
                males += 1
            elif sex == "F":
                females += 1
            elif sex:
                unspecified += 1
        self.assertEqual(males, females, "should be paired and equal")
        self.assertEqual(males, unspecified, "should be paired and equal")
        self.assertGreater(males, 0)
Exemple #2
0
    def test_scaled_cooc(self):
        """write out cooc matrices"""

        obo = MinimalObo(obo_file)
        models = prep_IMPC(impc_file, (0.8, 0.05), 0.01, obo=obo)
        models_allele = get_UA_models(models, "allele")
        observed = get_models_by_phenotype(models_allele, 1)
        # write out various types of cooc matrices
        cooc_full, phenindex = make_scaled_cooc(observed, obo, 0, "full")
        write_phenotype_cooc(cooc_full, phenindex, out_prefix + "-full")
        cooc_freq, phenindex = make_scaled_cooc(observed, obo, 0, "freq")
        write_phenotype_cooc(cooc_freq, phenindex, out_prefix + "-freq")
        cooc_simJ, phenindex = make_scaled_cooc(observed, obo, 0, "simJ")
        write_phenotype_cooc(cooc_simJ, phenindex, out_prefix + "-simJ")

        # check the numerics of the matrices
        for p1, i1 in phenindex.items():
            for p2, i2 in phenindex.items():
                observed = cooc_full[i1, i2]
                expected = cooc_freq[i1, i2] * (1 - cooc_simJ[i1, i2])
                self.assertEqual(observed, expected)

        # check all the files exist
        self.assertTrue(exists(cooc_full_file))
        self.assertTrue(exists(cooc_freq_file))
        self.assertTrue(exists(cooc_simJ_file))

        # very gently (not rigorously check content of files)
        with open_file(cooc_full_file, "rt") as f:
            full = f.read().strip().split("\n")
        self.assertGreater(len(full), 2)
Exemple #3
0
    def test_writing(self):
        """write the parsed MGI data onto files"""

        obo = MinimalObo(obo_file)
        models = prep_IMPC(impc_file, (0.8, 0.05), 0.01, obo=obo)
        write_models(models, out_prefix)

        # read contents back
        self.assertTrue(exists(desc_file))
        self.assertTrue(exists(pheno_file))
        with open_file(desc_file, "rt") as f:
            desc = f.read().strip().split("\n")
        with open_file(pheno_file, "rt") as f:
            pheno = f.read().strip().split("\n")

        # description file should have 25 lines, 24 data lines plus header
        self.assertEqual(len(desc), 25)
        # phenotype file should have at least 7 lines (more)
        self.assertGreater(len(pheno), 7)
Exemple #4
0
    def test_imputing(self):
        """create new models based on UA."""

        obo = MinimalObo(obo_file)
        models = prep_IMPC(impc_file, (0.8, 0.05), 0.01, obo=obo)
        models_allele = get_UA_models(models, "allele")
        imputed = impute_IMPC(models_allele, obo, 0)
        write_models(imputed, out_prefix + "-imputed")

        # check output files exist and contain proper content
        self.assertTrue(exists(imputed_desc_file))
        self.assertTrue(exists(imputed_pheno_file))
        with open_file(imputed_desc_file, "rt") as f:
            desc = f.read().strip().split("\n")
        with open_file(imputed_pheno_file, "rt") as f:
            pheno = f.read().strip().split("\n")

        # description file should have 3 lines, 2 desc lines plus header
        self.assertEqual(len(desc), 3)
        # phenotype file should have a few lines
        self.assertGreater(len(pheno), 3)
Exemple #5
0
    def test_parsing_empty(self):
        """parsing empty input return empty dict"""

        result = prep_IMPC(None, None, None)
        self.assertEqual(result, dict())
Exemple #6
0
        priors, num_models = get_priors_from_models(models,
                                                    categories,
                                                    obo,
                                                    dark=config.dark_count)
        print("Number of models used to inform prior: " + str(num_models))
        write_priors(priors, config.output)

    if config.action == "IMPC":
        # action to parse mouse model annotations from IMPC

        check_file(config.input, required="input")
        check_file(config.obo)
        obo = MinimalObo(config.obo)
        models = prep_IMPC(config.input,
                           tprfpr,
                           threshold,
                           simplify=config.simplify,
                           obo=obo)

        # filter functions
        def f_U_allele(x):
            return x.category == "allele" and x.description["sex"] == "U"

        def f_S_allele(x):
            return x.category == "allele" and x.description["sex"] != "U"

        def f_U_marker(x):
            return x.category == "marker" and x.description["sex"] == "U"

        def f_S_marker(x):
            return x.category == "marker" and x.description["sex"] != "U"