def test_parsing(self): """parsing should return model descriptions""" obo = MinimalObo(obo_file) models = prep_IMPC(impc_file, (0.8, 0.05), 0.01, obo=obo) # should get eight models # two marker, two allele; one zygosity # one set of 4 with positive phenoytpes, # one set of 4 with negative phenotypes # that is 8. Then x3 for universal/male/female self.assertEqual(len(models), 24) alleles = set() markers = set() for _, value in models.items(): alleles.add(value.description["allele_id"]) markers.add(value.description["marker_id"]) self.assertEqual(len(alleles), 2) self.assertEqual(len(markers), 2) # count the male and female models males, females, unspecified = 0, 0, 0 for _, value in models.items(): sex = value.description["sex"] if sex == "M": males += 1 elif sex == "F": females += 1 elif sex: unspecified += 1 self.assertEqual(males, females, "should be paired and equal") self.assertEqual(males, unspecified, "should be paired and equal") self.assertGreater(males, 0)
def test_scaled_cooc(self): """write out cooc matrices""" obo = MinimalObo(obo_file) models = prep_IMPC(impc_file, (0.8, 0.05), 0.01, obo=obo) models_allele = get_UA_models(models, "allele") observed = get_models_by_phenotype(models_allele, 1) # write out various types of cooc matrices cooc_full, phenindex = make_scaled_cooc(observed, obo, 0, "full") write_phenotype_cooc(cooc_full, phenindex, out_prefix + "-full") cooc_freq, phenindex = make_scaled_cooc(observed, obo, 0, "freq") write_phenotype_cooc(cooc_freq, phenindex, out_prefix + "-freq") cooc_simJ, phenindex = make_scaled_cooc(observed, obo, 0, "simJ") write_phenotype_cooc(cooc_simJ, phenindex, out_prefix + "-simJ") # check the numerics of the matrices for p1, i1 in phenindex.items(): for p2, i2 in phenindex.items(): observed = cooc_full[i1, i2] expected = cooc_freq[i1, i2] * (1 - cooc_simJ[i1, i2]) self.assertEqual(observed, expected) # check all the files exist self.assertTrue(exists(cooc_full_file)) self.assertTrue(exists(cooc_freq_file)) self.assertTrue(exists(cooc_simJ_file)) # very gently (not rigorously check content of files) with open_file(cooc_full_file, "rt") as f: full = f.read().strip().split("\n") self.assertGreater(len(full), 2)
def test_writing(self): """write the parsed MGI data onto files""" obo = MinimalObo(obo_file) models = prep_IMPC(impc_file, (0.8, 0.05), 0.01, obo=obo) write_models(models, out_prefix) # read contents back self.assertTrue(exists(desc_file)) self.assertTrue(exists(pheno_file)) with open_file(desc_file, "rt") as f: desc = f.read().strip().split("\n") with open_file(pheno_file, "rt") as f: pheno = f.read().strip().split("\n") # description file should have 25 lines, 24 data lines plus header self.assertEqual(len(desc), 25) # phenotype file should have at least 7 lines (more) self.assertGreater(len(pheno), 7)
def test_imputing(self): """create new models based on UA.""" obo = MinimalObo(obo_file) models = prep_IMPC(impc_file, (0.8, 0.05), 0.01, obo=obo) models_allele = get_UA_models(models, "allele") imputed = impute_IMPC(models_allele, obo, 0) write_models(imputed, out_prefix + "-imputed") # check output files exist and contain proper content self.assertTrue(exists(imputed_desc_file)) self.assertTrue(exists(imputed_pheno_file)) with open_file(imputed_desc_file, "rt") as f: desc = f.read().strip().split("\n") with open_file(imputed_pheno_file, "rt") as f: pheno = f.read().strip().split("\n") # description file should have 3 lines, 2 desc lines plus header self.assertEqual(len(desc), 3) # phenotype file should have a few lines self.assertGreater(len(pheno), 3)
def test_parsing_empty(self): """parsing empty input return empty dict""" result = prep_IMPC(None, None, None) self.assertEqual(result, dict())
priors, num_models = get_priors_from_models(models, categories, obo, dark=config.dark_count) print("Number of models used to inform prior: " + str(num_models)) write_priors(priors, config.output) if config.action == "IMPC": # action to parse mouse model annotations from IMPC check_file(config.input, required="input") check_file(config.obo) obo = MinimalObo(config.obo) models = prep_IMPC(config.input, tprfpr, threshold, simplify=config.simplify, obo=obo) # filter functions def f_U_allele(x): return x.category == "allele" and x.description["sex"] == "U" def f_S_allele(x): return x.category == "allele" and x.description["sex"] != "U" def f_U_marker(x): return x.category == "marker" and x.description["sex"] == "U" def f_S_marker(x): return x.category == "marker" and x.description["sex"] != "U"