def test_forbidden_hits(self): model = Model("foo/T2SS", 10) c_gene_gspd = CoreGene(self.model_location, "gspD", self.profile_factory) gene_gspd = ModelGene(c_gene_gspd, model) model.add_mandatory_gene(gene_gspd) c_gene_sctj = CoreGene(self.model_location, "sctJ", self.profile_factory) gene_sctj = ModelGene(c_gene_sctj, model) model.add_forbidden_gene(gene_sctj) c_gene_sctn = CoreGene(self.model_location, "sctN", self.profile_factory) gene_sctn = ModelGene(c_gene_sctn, model) model.add_forbidden_gene(gene_sctn) hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY) hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.FORBIDDEN) hit_3 = Hit(c_gene_sctn, "hit_3", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_3 = ValidHit(hit_3, gene_sctn, GeneStatus.FORBIDDEN) ls_1 = LikelySystem(model, [v_hit_1], [], [], [v_hit_2, v_hit_3]) self.assertListEqual(ls_1.forbidden_hits, [v_hit_2, v_hit_3])
def test_UnlikelySystemSerializer_txt(self): model = Model("foo/FOO", 10) c_gene_gspd = CoreGene(self.model_location, "gspD", self.profile_factory) gene_gspd = ModelGene(c_gene_gspd, model) model.add_mandatory_gene(gene_gspd) c_gene_sctj = CoreGene(self.model_location, "sctJ", self.profile_factory) gene_sctj = ModelGene(c_gene_sctj, model) model.add_accessory_gene(gene_sctj) c_gene_sctn = CoreGene(self.model_location, "sctN", self.profile_factory) gene_sctn = ModelGene(c_gene_sctn, model) model.add_accessory_gene(gene_sctn) c_gene_abc = CoreGene(self.model_location, "abc", self.profile_factory) gene_abc = ModelGene(c_gene_abc, model) model.add_forbidden_gene(gene_abc) hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY) hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 2, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY) hit_3 = Hit(c_gene_sctn, "hit_3", 803, "replicon_id", 3, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_3 = ValidHit(hit_3, gene_sctn, GeneStatus.ACCESSORY) hit_4 = Hit(c_gene_abc, "hit_4", 803, "replicon_id", 4, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_4 = ValidHit(hit_4, gene_abc, GeneStatus.FORBIDDEN) ser = TxtUnikelySystemSerializer() ls_1 = UnlikelySystem(model, [v_hit_1], [v_hit_2, v_hit_3], [], [v_hit_4], ["the reason why"]) txt = ser.serialize(ls_1) expected_txt = """This replicon probably not contains a system foo/FOO: the reason why system id = replicon_id_FOO_1 model = foo/FOO replicon = replicon_id hits = [('hit_1', 'gspD', 1), ('hit_2', 'sctJ', 2), ('hit_3', 'sctN', 3), ('hit_4', 'abc', 4)] wholeness = 1.000 mandatory genes: \t- gspD: 1 (gspD) accessory genes: \t- sctJ: 1 (sctJ) \t- sctN: 1 (sctN) neutral genes: forbidden genes: \t- abc: 1 (abc) Use ordered replicon to have better prediction. """ self.assertEqual(txt, expected_txt)
def test_is_Forbidden(self): """ test if gene belong to model mandatory genes """ model_foo = Model("foo", 10) gene_name = 'sctJ_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) sctJ_FLG = ModelGene(c_gene, model_foo) model_foo.add_mandatory_gene(sctJ_FLG) self.assertFalse(sctJ_FLG.is_forbidden(model_foo)) gene_name = 'sctJ' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) sctJ = ModelGene(c_gene, model_foo) model_foo.add_forbidden_gene(sctJ) self.assertTrue(sctJ.is_forbidden(model_foo))
def test_str(self): model_fqn = "foo/bar" model = Model(model_fqn, 10) gene_name = 'sctJ_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) mandatory_gene = ModelGene(c_gene, model) model.add_mandatory_gene(mandatory_gene) homolog_name = 'sctJ' c_gene_homolg = CoreGene(self.model_location, homolog_name, self.profile_factory) homolog = Exchangeable(c_gene_homolg, mandatory_gene) mandatory_gene.add_exchangeable(homolog) gene_name = 'sctN_FLG' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) accessory_gene = ModelGene(c_gene, model) model.add_accessory_gene(accessory_gene) analog_name = 'sctN' c_gene_analog = CoreGene(self.model_location, analog_name, self.profile_factory) analog = Exchangeable(c_gene_analog, accessory_gene) accessory_gene.add_exchangeable(analog) gene_name = 'toto' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) neutral_gene = ModelGene(c_gene, model) model.add_neutral_gene(neutral_gene) gene_name = 'sctC' c_gene = CoreGene(self.model_location, gene_name, self.profile_factory) forbidden_gene = ModelGene(c_gene, model) model.add_forbidden_gene(forbidden_gene) exp_str = """name: bar fqn: foo/bar ==== mandatory genes ==== sctJ_FLG ==== accessory genes ==== sctN_FLG ==== neutral genes ==== toto ==== forbidden genes ==== sctC ============== end pprint model ================ """ self.assertEqual(str(model), exp_str)
def test_reason(self): model = Model("foo/model_A", 10) # test if id is well incremented c_gene_gspd = CoreGene(self.model_location, "gspD", self.profile_factory) gene_gspd = ModelGene(c_gene_gspd, model) model.add_mandatory_gene(gene_gspd) c_gene_sctj = CoreGene(self.model_location, "sctJ", self.profile_factory) gene_sctj = ModelGene(c_gene_sctj, model) model.add_forbidden_gene(gene_sctj) hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY) hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.FORBIDDEN) reason_2 = ["forbidden gene"] uls_2 = UnlikelySystem(model, [v_hit_1], [], [], [v_hit_2], reason_2) self.assertEqual(uls_2.reasons, reason_2)
def test_SystemSerializer_str(self): model_name = 'foo' model_location = ModelLocation( path=os.path.join(self.cfg.models_dir(), model_name)) model_A = Model("foo/A", 10) model_B = Model("foo/B", 10) c_gene_sctn_flg = CoreGene(model_location, "sctN_FLG", self.profile_factory) gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B) c_gene_sctj_flg = CoreGene(model_location, "sctJ_FLG", self.profile_factory) gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B) c_gene_flgB = CoreGene(model_location, "flgB", self.profile_factory) c_gene_tadZ = CoreGene(model_location, "tadZ", self.profile_factory) gene_tadZ = ModelGene(c_gene_tadZ, model_B) c_gene_sctn = CoreGene(model_location, "sctN", self.profile_factory) gene_sctn = ModelGene(c_gene_sctn, model_A) gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn) gene_sctn.add_exchangeable(gene_sctn_hom) c_gene_sctj = CoreGene(model_location, "sctJ", self.profile_factory) gene_sctj = ModelGene(c_gene_sctj, model_A) gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj) gene_sctj.add_exchangeable(gene_sctj_an) c_gene_gspd = CoreGene(model_location, "gspD", self.profile_factory) gene_gspd = ModelGene(c_gene_gspd, model_A) gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd) gene_gspd.add_exchangeable(gene_gspd_an) c_gene_abc = CoreGene(model_location, "abc", self.profile_factory) gene_abc = ModelGene(c_gene_abc, model_A) gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc) gene_abc.add_exchangeable(gene_abc_ho) model_A.add_mandatory_gene(gene_sctn) model_A.add_mandatory_gene(gene_sctj) model_A.add_accessory_gene(gene_gspd) model_A.add_forbidden_gene(gene_abc) model_B.add_mandatory_gene(gene_sctn_flg) model_B.add_mandatory_gene(gene_sctj_flg) model_B.add_accessory_gene(gene_gspd) model_B.add_accessory_gene(gene_tadZ) h_sctj = Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_sctn = Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_gspd = Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_sctj_flg = Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_tadZ = Hit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) model_A._min_mandatory_genes_required = 2 model_A._min_genes_required = 2 c1 = Cluster([ ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY), ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY), ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY) ], model_A, self.hit_weights) c2 = Cluster([ ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY), ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY) ], model_A, self.hit_weights) model_B._min_mandatory_genes_required = 1 model_B._min_genes_required = 2 c3 = Cluster([ ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY), ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY), ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY) ], model_B, self.hit_weights) sys_A = System(model_A, [c1, c2], self.cfg.redundancy_penalty()) sys_A.id = "sys_id_A" sys_B = System(model_B, [c3], self.cfg.redundancy_penalty()) sys_B.id = "sys_id_B" hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B]) system_serializer = TxtSystemSerializer() sys_str = f"""system id = {sys_A.id} model = foo/A replicon = replicon_id clusters = [('hit_sctj', 'sctJ', 1), ('hit_sctn', 'sctN', 1), ('hit_gspd', 'gspD', 1)], [('hit_sctj', 'sctJ', 1), ('hit_sctn', 'sctN', 1)] occ = 2 wholeness = 1.000 loci nb = 2 score = 1.500 mandatory genes: \t- sctN: 2 (sctN, sctN) \t- sctJ: 2 (sctJ, sctJ) accessory genes: \t- gspD: 1 (gspD [sys_id_B]) neutral genes: """ self.assertEqual( sys_str, system_serializer.serialize(sys_A, hit_multi_sys_tracker))
def test_LikelySystemSerializer_txt(self): model = Model("foo/FOO", 10) c_gene_gspd = CoreGene(self.model_location, "gspD", self.profile_factory) gene_gspd = ModelGene(c_gene_gspd, model) model.add_mandatory_gene(gene_gspd) c_gene_sctj = CoreGene(self.model_location, "sctJ", self.profile_factory) gene_sctj = ModelGene(c_gene_sctj, model) model.add_accessory_gene(gene_sctj) c_gene_sctn = CoreGene(self.model_location, "sctN", self.profile_factory) gene_sctn = ModelGene(c_gene_sctn, model) model.add_accessory_gene(gene_sctn) c_gene_abc = CoreGene(self.model_location, "abc", self.profile_factory) gene_abc = ModelGene(c_gene_abc, model) model.add_forbidden_gene(gene_abc) hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY) hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 2, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY) hit_3 = Hit(c_gene_sctn, "hit_3", 803, "replicon_id", 3, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_3 = ValidHit(hit_3, gene_sctn, GeneStatus.ACCESSORY) hit_4 = Hit(c_gene_abc, "hit_4", 803, "replicon_id", 4, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_4 = ValidHit(hit_4, gene_abc, GeneStatus.FORBIDDEN) ls_1 = LikelySystem(model, [v_hit_1], [v_hit_2, v_hit_3], [], [v_hit_4]) hit_multi_sys_tracker = HitSystemTracker([ls_1]) ser = TxtLikelySystemSerializer() txt = ser.serialize(ls_1, hit_multi_sys_tracker) expected_txt = """This replicon contains genetic materials needed for system foo/FOO WARNING there quorum is reached but there is also some forbidden genes. system id = replicon_id_FOO_1 model = foo/FOO replicon = replicon_id hits = [('hit_1', 'gspD', 1), ('hit_2', 'sctJ', 2), ('hit_3', 'sctN', 3), ('hit_4', 'abc', 4)] wholeness = 1.000 mandatory genes: \t- gspD: 1 (gspD) accessory genes: \t- sctJ: 1 (sctJ) \t- sctN: 1 (sctN) neutral genes: forbidden genes: \t- abc: 1 (abc) Use ordered replicon to have better prediction. """ self.assertEqual(txt, expected_txt)
def test_SolutionSerializer_tsv(self): model_name = 'foo' model_location = ModelLocation( path=os.path.join(self.cfg.models_dir(), model_name)) model_A = Model("foo/A", 10) model_B = Model("foo/B", 10) c_gene_sctn_flg = CoreGene(model_location, "sctN_FLG", self.profile_factory) gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B) c_gene_sctj_flg = CoreGene(model_location, "sctJ_FLG", self.profile_factory) gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B) c_gene_flgB = CoreGene(model_location, "flgB", self.profile_factory) gene_flgB = ModelGene(c_gene_flgB, model_B) c_gene_tadZ = CoreGene(model_location, "tadZ", self.profile_factory) gene_tadZ = ModelGene(c_gene_tadZ, model_B) c_gene_sctn = CoreGene(model_location, "sctN", self.profile_factory) gene_sctn = ModelGene(c_gene_sctn, model_A) gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn) gene_sctn.add_exchangeable(gene_sctn_hom) c_gene_sctj = CoreGene(model_location, "sctJ", self.profile_factory) gene_sctj = ModelGene(c_gene_sctj, model_A) gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj) gene_sctj.add_exchangeable(gene_sctj_an) c_gene_gspd = CoreGene(model_location, "gspD", self.profile_factory) gene_gspd = ModelGene(c_gene_gspd, model_A) gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd) gene_gspd.add_exchangeable(gene_gspd_an) c_gene_abc = CoreGene(model_location, "abc", self.profile_factory) gene_abc = ModelGene(c_gene_abc, model_A) gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc) gene_abc.add_exchangeable(gene_abc_ho) model_A.add_mandatory_gene(gene_sctn) model_A.add_mandatory_gene(gene_sctj) model_A.add_accessory_gene(gene_gspd) model_A.add_forbidden_gene(gene_abc) model_B.add_mandatory_gene(gene_sctn_flg) model_B.add_mandatory_gene(gene_sctj_flg) model_B.add_accessory_gene(gene_flgB) model_B.add_accessory_gene(gene_tadZ) h_sctj = Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_sctn = Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_gspd = Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_sctj_flg = Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_flgB = Hit(c_gene_flgB, "hit_flgB", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_tadZ = Hit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) model_A._min_mandatory_genes_required = 2 model_A._min_genes_required = 2 c1 = Cluster([ ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY), ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY), ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY) ], model_A, self.hit_weights) c2 = Cluster([ ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY), ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY) ], model_A, self.hit_weights) model_B._min_mandatory_genes_required = 1 model_B._min_genes_required = 2 c3 = Cluster([ ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY), ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY), ValidHit(h_flgB, gene_flgB, GeneStatus.ACCESSORY) ], model_B, self.hit_weights) sys_A = System(model_A, [c1, c2], self.cfg.redundancy_penalty()) sys_A.id = "sys_id_A" sys_B = System(model_B, [c3], self.cfg.redundancy_penalty()) sys_B.id = "sys_id_B" sol = [sys_A, sys_B] sol_id = '12' hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B]) system_serializer = TsvSolutionSerializer() sol_tsv = '\t'.join([ sol_id, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'gspD', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '1', 'foo/B', 'sys_id_B', '1', '0.750', '2.000', '1', 'sctJ_FLG', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_tadZ', 'tadZ', '1', 'foo/B', 'sys_id_B', '1', '0.750', '2.000', '1', 'tadZ', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id, 'replicon_id', 'hit_flgB', 'flgB', '1', 'foo/B', 'sys_id_B', '1', '0.750', '2.000', '1', 'flgB', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += "\n" ser = system_serializer.serialize(sol, sol_id, hit_multi_sys_tracker) self.assertEqual(ser, sol_tsv)
def test_unnlikely_systems_to_txt(self): args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'unordered' args.models_dir = self.find_data('models') cfg = Config(MacsyDefaults(), args) model_name = 'foo' models_location = ModelLocation( path=os.path.join(args.models_dir, model_name)) # we need to reset the ProfileFactory # because it's a like a singleton # so other tests are influenced by ProfileFactory and it's configuration # for instance search_genes get profile without hmmer_exe profile_factory = ProfileFactory(cfg) model = Model("foo/T2SS", 10) # test if id is well incremented gene_name = "gspD" c_gene_gspd = CoreGene(models_location, gene_name, profile_factory) gene_gspd = ModelGene(c_gene_gspd, model) model.add_mandatory_gene(gene_gspd) gene_name = "sctJ" c_gene_sctj = CoreGene(models_location, gene_name, profile_factory) gene_sctj = ModelGene(c_gene_sctj, model) model.add_accessory_gene(gene_sctj) gene_name = "sctC" c_gene_sctc = CoreGene(models_location, gene_name, profile_factory) gene_sctc = ModelGene(c_gene_sctc, model) model.add_neutral_gene(gene_sctc) gene_name = "tadZ" c_gene_tadz = CoreGene(models_location, gene_name, profile_factory) gene_tadz = ModelGene(c_gene_tadz, model) model.add_forbidden_gene(gene_tadz) hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY) hit_2 = Hit(c_gene_sctj, "hit_2", 804, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY) hit_3 = Hit(c_gene_sctc, "hit_3", 805, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_3 = ValidHit(hit_3, gene_sctc, GeneStatus.NEUTRAL) hit_4 = Hit(c_gene_tadz, "hit_4", 806, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_4 = ValidHit(hit_4, gene_tadz, GeneStatus.FORBIDDEN) reason = "why it not a system" system_1 = UnlikelySystem(model, [v_hit_1], [v_hit_2], [v_hit_3], [v_hit_4], reason) exp_txt = f"""# macsyfinder {macsypy.__version__} # {' '.join(sys.argv)} # Unlikely Systems found: This replicon probably not contains a system foo/T2SS: {reason} system id = replicon_id_T2SS_1 model = foo/T2SS replicon = replicon_id hits = [('hit_1', 'gspD', 1), ('hit_2', 'sctJ', 1), ('hit_3', 'sctC', 1), ('hit_4', 'tadZ', 1)] wholeness = 1.000 mandatory genes: \t- gspD: 1 (gspD) accessory genes: \t- sctJ: 1 (sctJ) neutral genes: \t- sctC: 1 (sctC) forbidden genes: \t- tadZ: 1 (tadZ) Use ordered replicon to have better prediction. ============================================================ """ f_out = StringIO() unlikely_systems_to_txt([system_1], f_out) self.assertMultiLineEqual(exp_txt, f_out.getvalue()) f_out = StringIO() unlikely_systems_to_txt([], f_out) expected_out = f"""# macsyfinder {macsypy.__version__} # {' '.join(sys.argv)} # No Unlikely Systems found """ self.assertEqual(expected_out, f_out.getvalue())
def test_likely_systems_to_tsv(self): args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'unordered' args.models_dir = self.find_data('models') cfg = Config(MacsyDefaults(), args) model_name = 'foo' models_location = ModelLocation( path=os.path.join(args.models_dir, model_name)) # we need to reset the ProfileFactory # because it's a like a singleton # so other tests are influenced by ProfileFactory and it's configuration # for instance search_genes get profile without hmmer_exe profile_factory = ProfileFactory(cfg) model = Model("foo/T2SS", 10) # test if id is well incremented gene_name = "gspD" c_gene_gspd = CoreGene(models_location, gene_name, profile_factory) gene_gspd = ModelGene(c_gene_gspd, model) model.add_mandatory_gene(gene_gspd) gene_name = "sctJ" c_gene_sctj = CoreGene(models_location, gene_name, profile_factory) gene_sctj = ModelGene(c_gene_sctj, model) model.add_accessory_gene(gene_sctj) gene_name = "sctC" c_gene_sctc = CoreGene(models_location, gene_name, profile_factory) gene_sctc = ModelGene(c_gene_sctc, model) model.add_neutral_gene(gene_sctc) gene_name = "tadZ" c_gene_tadz = CoreGene(models_location, gene_name, profile_factory) gene_tadz = ModelGene(c_gene_tadz, model) model.add_forbidden_gene(gene_tadz) hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY) hit_2 = Hit(c_gene_sctj, "hit_2", 804, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY) hit_3 = Hit(c_gene_sctc, "hit_3", 805, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_3 = ValidHit(hit_3, gene_sctc, GeneStatus.NEUTRAL) hit_4 = Hit(c_gene_tadz, "hit_4", 806, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) v_hit_4 = ValidHit(hit_4, gene_tadz, GeneStatus.FORBIDDEN) system_1 = LikelySystem(model, [v_hit_1], [v_hit_2], [v_hit_3], [v_hit_4]) sol_tsv = f"""# macsyfinder {macsypy.__version__} # {' '.join(sys.argv)} # Likely Systems found:""" sol_tsv += "\n\n" sol_tsv += "\t".join([ "replicon", "hit_id", "gene_name", "hit_pos", "model_fqn", "sys_id", "sys_wholeness", "hit_gene_ref", "hit_status", "hit_seq_len", "hit_i_eval", "hit_score", "hit_profile_cov", "hit_seq_cov", "hit_begin_match", "hit_end_match", "used_in" ]) sol_tsv += "\n" sol_tsv += '\t'.join([ "replicon_id", "hit_1", "gspD", "1", "foo/T2SS", "replicon_id_T2SS_1", "1.000", "gspD", "mandatory", "803", "1.0", "1.000", "1.000", "1.000", "10", "20", "" ]) sol_tsv += "\n" sol_tsv += '\t'.join([ "replicon_id", "hit_2", "sctJ", "1", "foo/T2SS", "replicon_id_T2SS_1", "1.000", "sctJ", "accessory", "804", "1.0", "1.000", "1.000", "1.000", "10", "20", "" ]) sol_tsv += "\n" sol_tsv += '\t'.join([ "replicon_id", "hit_4", "tadZ", "1", "foo/T2SS", "replicon_id_T2SS_1", "1.000", "tadZ", "forbidden", "806", "1.0", "1.000", "1.000", "1.000", "10", "20", "" ]) sol_tsv += "\n" sol_tsv += '\t'.join([ "replicon_id", "hit_3", "sctC", "1", "foo/T2SS", "replicon_id_T2SS_1", "1.000", "sctC", "neutral", "805", "1.0", "1.000", "1.000", "1.000", "10", "20", "" ]) sol_tsv += "\n" sol_tsv += "\n" f_out = StringIO() track_multi_systems_hit = HitSystemTracker([system_1]) likely_systems_to_tsv([system_1], track_multi_systems_hit, f_out) self.assertMultiLineEqual(sol_tsv, f_out.getvalue()) f_out = StringIO() likely_systems_to_tsv([], track_multi_systems_hit, f_out) expected_out = f"""# macsyfinder {macsypy.__version__} # {' '.join(sys.argv)} # No Likely Systems found """ self.assertEqual(expected_out, f_out.getvalue())
def test_solutions_to_tsv(self): args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') cfg = Config(MacsyDefaults(), args) model_name = 'foo' models_location = ModelLocation( path=os.path.join(args.models_dir, model_name)) # we need to reset the ProfileFactory # because it's a like a singleton # so other tests are influenced by ProfileFactory and it's configuration # for instance search_genes get profile without hmmer_exe profile_factory = ProfileFactory(cfg) model_A = Model("foo/A", 10) model_B = Model("foo/B", 10) model_C = Model("foo/C", 10) c_gene_sctn_flg = CoreGene(models_location, "sctN_FLG", profile_factory) gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B) c_gene_sctj_flg = CoreGene(models_location, "sctJ_FLG", profile_factory) gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B) c_gene_flgB = CoreGene(models_location, "flgB", profile_factory) gene_flgB = ModelGene(c_gene_flgB, model_B) c_gene_tadZ = CoreGene(models_location, "tadZ", profile_factory) gene_tadZ = ModelGene(c_gene_tadZ, model_B) c_gene_sctn = CoreGene(models_location, "sctN", profile_factory) gene_sctn = ModelGene(c_gene_sctn, model_A) gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn) gene_sctn.add_exchangeable(gene_sctn_hom) c_gene_sctj = CoreGene(models_location, "sctJ", profile_factory) gene_sctj = ModelGene(c_gene_sctj, model_A) gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj) gene_sctj.add_exchangeable(gene_sctj_an) c_gene_gspd = CoreGene(models_location, "gspD", profile_factory) gene_gspd = ModelGene(c_gene_gspd, model_A) gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd) gene_gspd.add_exchangeable(gene_gspd_an) c_gene_abc = CoreGene(models_location, "abc", profile_factory) gene_abc = ModelGene(c_gene_abc, model_A) gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc) gene_abc.add_exchangeable(gene_abc_ho) model_A.add_mandatory_gene(gene_sctn) model_A.add_mandatory_gene(gene_sctj) model_A.add_accessory_gene(gene_gspd) model_A.add_forbidden_gene(gene_abc) model_B.add_mandatory_gene(gene_sctn_flg) model_B.add_mandatory_gene(gene_sctj_flg) model_B.add_accessory_gene(gene_flgB) model_B.add_accessory_gene(gene_tadZ) model_C.add_mandatory_gene(gene_sctn_flg) model_C.add_mandatory_gene(gene_sctj_flg) model_C.add_mandatory_gene(gene_flgB) model_C.add_accessory_gene(gene_tadZ) model_C.add_accessory_gene(gene_gspd) h_sctj = Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_sctn = Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_gspd = Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_sctj_flg = Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_flgB = Hit(c_gene_flgB, "hit_flgB", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_tadZ = Hit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) model_A._min_mandatory_genes_required = 2 model_A._min_genes_required = 2 hit_weights = HitWeight(**cfg.hit_weights()) c1 = Cluster([ ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY), ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY), ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY) ], model_A, hit_weights) c2 = Cluster([ ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY), ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY) ], model_A, hit_weights) model_B._min_mandatory_genes_required = 1 model_B._min_genes_required = 2 c3 = Cluster([ ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY), ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY), ValidHit(h_flgB, gene_flgB, GeneStatus.ACCESSORY) ], model_B, hit_weights) model_C._min_mandatory_genes_required = 1 model_C._min_genes_required = 2 c4 = Cluster([ ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY), ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY), ValidHit(h_flgB, gene_flgB, GeneStatus.MANDATORY), ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY) ], model_C, hit_weights) sys_A = System(model_A, [c1, c2], cfg.redundancy_penalty()) sys_A.id = "sys_id_A" sys_B = System(model_B, [c3], cfg.redundancy_penalty()) sys_B.id = "sys_id_B" sys_C = System(model_C, [c4], cfg.redundancy_penalty()) sys_C.id = "sys_id_C" sol_1 = [sys_A, sys_B] sol_2 = [sys_A, sys_C] sol_id_1 = '1' sol_id_2 = '2' sol_tsv = f"""# macsyfinder {macsypy.__version__} # {' '.join(sys.argv)} # Systems found: """ sol_tsv += "\t".join([ "sol_id", "replicon", "hit_id", "gene_name", "hit_pos", "model_fqn", "sys_id", "sys_loci", "sys_wholeness", "sys_score", "sys_occ", "hit_gene_ref", "hit_status", "hit_seq_len", "hit_i_eval", "hit_score", "hit_profile_cov", "hit_seq_cov", "hit_begin_match", "hit_end_match", "used_in" ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_1, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_1, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_1, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'gspD', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_1, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_1, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_1, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '1', 'foo/B', 'sys_id_B', '1', '0.750', '2.000', '1', 'sctJ_FLG', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_1, 'replicon_id', 'hit_tadZ', 'tadZ', '1', 'foo/B', 'sys_id_B', '1', '0.750', '2.000', '1', 'tadZ', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_1, 'replicon_id', 'hit_flgB', 'flgB', '1', 'foo/B', 'sys_id_B', '1', '0.750', '2.000', '1', 'flgB', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_2, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_2, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_2, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'gspD', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_2, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_2, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A', 'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '' ]) sol_tsv += "\n" sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_2, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '1', 'foo/C', 'sys_id_C', '1', '0.800', '3.000', '1', 'sctJ_FLG', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_B' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_2, 'replicon_id', 'hit_tadZ', 'tadZ', '1', 'foo/C', 'sys_id_C', '1', '0.800', '3.000', '1', 'tadZ', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_B' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_2, 'replicon_id', 'hit_flgB', 'flgB', '1', 'foo/C', 'sys_id_C', '1', '0.800', '3.000', '1', 'flgB', 'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_B' ]) sol_tsv += "\n" sol_tsv += '\t'.join([ sol_id_2, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/C', 'sys_id_C', '1', '0.800', '3.000', '1', 'gspD', 'accessory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_A' ]) sol_tsv += "\n" sol_tsv += "\n" f_out = StringIO() hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B]) solutions_to_tsv([sol_1, sol_2], hit_multi_sys_tracker, f_out) self.assertMultiLineEqual(sol_tsv, f_out.getvalue())
def test_filter(self): model_fqn = "foo/bar" model = Model(model_fqn, 10) model_2 = Model("foo/buz", 10) gene_name = 'sctJ_FLG' sctJ_FLG_core = CoreGene(self.model_location, gene_name, self.profile_factory) sctJ_FLG = ModelGene(sctJ_FLG_core, model) model.add_mandatory_gene(sctJ_FLG) gene_name = 'sctJ' sctJ_core = CoreGene(self.model_location, gene_name, self.profile_factory) sctj = Exchangeable(sctJ_core, sctJ_FLG) sctJ_FLG.add_exchangeable(sctj) gene_name = 'sctN_FLG' sctN_FLG_core = CoreGene(self.model_location, gene_name, self.profile_factory) sctN_FLG = ModelGene(sctN_FLG_core, model) model.add_accessory_gene(sctN_FLG) gene_name = 'sctN' sctN_core = CoreGene(self.model_location, gene_name, self.profile_factory) sctn = Exchangeable(sctN_core, sctN_FLG) sctN_FLG.add_exchangeable(sctn) gene_name = 'sctC' sctC_core = CoreGene(self.model_location, gene_name, self.profile_factory) sctC = ModelGene(sctC_core, model) model.add_forbidden_gene(sctC) gene_name = 'toto' toto_core = CoreGene(self.model_location, gene_name, self.profile_factory) toto = ModelGene(toto_core, model) model.add_neutral_gene(toto) gene_name = 'totote' totote_core = CoreGene(self.model_location, gene_name, self.profile_factory) totote = Exchangeable(totote_core, toto) toto.add_exchangeable(totote) gene_name = 'gspD' gspd_core = CoreGene(self.model_location, gene_name, self.profile_factory) gspd = ModelGene(gspd_core, model_2) gene_name = 'tadZ' tadz_core = CoreGene(self.model_location, gene_name, self.profile_factory) tadz = Exchangeable(tadz_core, gspd) gspd.add_exchangeable(tadz) hit_to_keep = [] for gene in (sctJ_FLG, sctN_FLG, sctC, toto, totote): hit_to_keep.append(CoreHit(gene, f"PSAE001c01_{gene.name}", 1, "PSAE001c01", 1, 1.0, 1.0, 1.0, 1.0, 1, 2) ) hit_to_filter_out = [] for gene in (gspd, tadz): hit_to_filter_out.append(CoreHit(gene, f"PSAE001c01_{gene.name}", 1, "PSAE001c01", 1, 1.0, 1.0, 1.0, 1.0, 1, 2) ) filtered_hits = model.filter(hit_to_keep + hit_to_filter_out) self.assertListEqual(sorted(hit_to_keep), sorted(filtered_hits))
class MatchMakerTest(MacsyTest): def setUp(self) -> None: args = argparse.Namespace() args.sequence_db = self.find_data("base", "test_1.fasta") args.db_type = 'gembase' args.models_dir = self.find_data('models') self.cfg = Config(MacsyDefaults(), args) self.model_name = 'foo' self.model_location = ModelLocation( path=os.path.join(args.models_dir, self.model_name)) self.profile_factory = ProfileFactory(self.cfg) self.model = Model("foo/model_A", 10) c_gene_sctn = CoreGene(self.model_location, "sctN", self.profile_factory) gene_sctn = ModelGene(c_gene_sctn, self.model) c_gene_sctn_flg = CoreGene(self.model_location, "sctN_FLG", self.profile_factory) gene_sctn_flg = Exchangeable(c_gene_sctn_flg, gene_sctn) gene_sctn.add_exchangeable(gene_sctn_flg) c_gene_sctj = CoreGene(self.model_location, "sctJ", self.profile_factory) gene_sctj = ModelGene(c_gene_sctj, self.model) c_gene_sctj_flg = CoreGene(self.model_location, "sctJ_FLG", self.profile_factory) gene_sctj_flg = Exchangeable(c_gene_sctj_flg, gene_sctj) gene_sctj.add_exchangeable(gene_sctj_flg) c_gene_gspd = CoreGene(self.model_location, "gspD", self.profile_factory) gene_gspd = ModelGene(c_gene_gspd, self.model) c_gene_flgb = CoreGene(self.model_location, "flgB", self.profile_factory) gene_gspd_an = Exchangeable(c_gene_flgb, gene_gspd) gene_gspd.add_exchangeable(gene_gspd_an) c_gene_abc = CoreGene(self.model_location, "abc", self.profile_factory) gene_abc = ModelGene(c_gene_abc, self.model) c_gene_tadz = CoreGene(self.model_location, "tadZ", self.profile_factory) gene_abc_ho = Exchangeable(c_gene_tadz, gene_abc) gene_abc.add_exchangeable(gene_abc_ho) c_gene_toto = CoreGene(self.model_location, "toto", self.profile_factory) gene_toto = ModelGene(c_gene_toto, self.model) c_gene_totote = CoreGene(self.model_location, "totote", self.profile_factory) gene_toto_ho = Exchangeable(c_gene_totote, gene_toto) gene_toto.add_exchangeable(gene_toto_ho) self.model.add_mandatory_gene(gene_sctn) self.model.add_mandatory_gene(gene_sctj) self.model.add_accessory_gene(gene_gspd) self.model.add_neutral_gene(gene_toto) self.model.add_forbidden_gene(gene_abc) self.c_hits = { 'h_sctj': Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20), 'h_sctj_flg': Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20), 'h_sctn': Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20), 'h_sctn_flg': Hit(c_gene_sctn_flg, "hit_sctn_flg", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20), 'h_gspd': Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20), 'h_gspd_an': Hit(c_gene_flgb, "hit_gspd_an", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20), 'h_abc': Hit(c_gene_abc, "hit_abc", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20), 'h_abc_ho': Hit(c_gene_tadz, "hit_abc_ho", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20), 'h_toto': Hit(c_gene_toto, "hit_toto", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20), 'h_toto_ho': Hit(c_gene_totote, "hit_toto_ho", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20), } def test_sort_hits_by_status(self): ordered_match_maker = OrderedMatchMaker(self.model, self.cfg.redundancy_penalty()) mandatory_exp = [self.c_hits['h_sctn'], self.c_hits['h_sctj']] accessory_exp = [self.c_hits['h_gspd']] neutral_exp = [self.c_hits['h_toto']] forbidden_exp = [self.c_hits['h_abc']] mandatory, accessory, neutral, forbidden = ordered_match_maker.sort_hits_by_status( mandatory_exp + accessory_exp + neutral_exp + forbidden_exp) self.assertListEqual([h.gene.name for h in mandatory_exp], [h.gene.name for h in mandatory]) self.assertListEqual([h.gene.name for h in accessory_exp], [h.gene.name for h in accessory]) self.assertListEqual([h.gene.name for h in neutral_exp], [h.gene.name for h in neutral]) self.assertListEqual([h.gene.name for h in forbidden_exp], [h.gene.name for h in forbidden]) # do the same but with exchangeable mandatory_exp_exch = [ self.c_hits['h_sctn_flg'], self.c_hits['h_sctj_flg'] ] accessory_exp_exch = [self.c_hits['h_gspd_an']] neutral_exp_exch = [self.c_hits['h_toto_ho']] forbidden_exp_exch = [self.c_hits['h_abc_ho']] mandatory, accessory, neutral, forbidden = ordered_match_maker.sort_hits_by_status( mandatory_exp_exch + accessory_exp_exch + neutral_exp_exch + forbidden_exp_exch) self.assertListEqual([h.gene.name for h in mandatory_exp_exch], [h.gene.name for h in mandatory]) self.assertListEqual([h.gene.name for h in accessory_exp_exch], [h.gene.name for h in accessory]) self.assertListEqual([h.gene.name for h in neutral_exp_exch], [h.gene.name for h in neutral]) self.assertListEqual([h.gene.name for h in forbidden_exp_exch], [h.gene.name for h in forbidden]) # test if gene_ref is the ModelGene # alternate_of return the ModelGene of the function self.assertListEqual( [h.gene.name for h in mandatory_exp], [h.gene_ref.alternate_of().name for h in mandatory]) self.assertListEqual( [h.gene.name for h in accessory_exp], [h.gene_ref.alternate_of().name for h in accessory]) self.assertListEqual([h.gene.name for h in neutral_exp], [h.gene_ref.alternate_of().name for h in neutral]) self.assertListEqual( [h.gene.name for h in forbidden_exp], [h.gene_ref.alternate_of().name for h in forbidden]) def test_ordered_match(self): ##################### # test single locus # ##################### # it lack one mandatory gene self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 3 c1 = Cluster([self.c_hits['h_sctj'], self.c_hits['h_gspd']], self.model, self.cfg.hit_weights()) ordered_match_maker = OrderedMatchMaker(self.model, self.cfg.redundancy_penalty()) res = ordered_match_maker.match([c1]) self.assertIsInstance(res, RejectedClusters) self.assertEqual(res.reasons, [ "The quorum of mandatory genes required (2) is not reached: 1", "The quorum of genes required (3) is not reached: 2" ]) # all quorum are reached self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 1 c1 = Cluster([ self.c_hits['h_sctj'], self.c_hits['h_sctn'], self.c_hits['h_gspd'] ], self.model, self.cfg.hit_weights()) ordered_match_maker = OrderedMatchMaker(self.model, self.cfg.redundancy_penalty()) res = ordered_match_maker.match([c1]) self.assertIsInstance(res, System) # with one mandatory analog self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 1 c1 = Cluster([ self.c_hits['h_sctj_flg'], self.c_hits['h_sctn'], self.c_hits['h_gspd'] ], self.model, self.cfg.hit_weights()) ordered_match_maker = OrderedMatchMaker(self.model, self.cfg.redundancy_penalty()) res = ordered_match_maker.match([c1]) self.assertIsInstance(res, System) # with one accessory analog self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 1 c1 = Cluster([ self.c_hits['h_sctj'], self.c_hits['h_sctn'], self.c_hits['h_gspd_an'] ], self.model, self.cfg.hit_weights()) ordered_match_maker = OrderedMatchMaker(self.model, self.cfg.redundancy_penalty()) res = ordered_match_maker.match([c1]) self.assertIsInstance(res, System) # the min_gene_required quorum is not reached self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 4 c1 = Cluster([ self.c_hits['h_sctj'], self.c_hits['h_sctn_flg'], self.c_hits['h_gspd'] ], self.model, self.cfg.hit_weights()) ordered_match_maker = OrderedMatchMaker(self.model, self.cfg.redundancy_penalty()) res = ordered_match_maker.match([c1]) self.assertIsInstance(res, RejectedClusters) self.assertListEqual( res.reasons, ["The quorum of genes required (4) is not reached: 3"]) # the min_gene_required quorum is not reached even there is a neutral self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 4 c1 = Cluster([ self.c_hits['h_sctj'], self.c_hits['h_sctn_flg'], self.c_hits['h_gspd'], self.c_hits['h_toto'] ], self.model, self.cfg.hit_weights()) ordered_match_maker = OrderedMatchMaker(self.model, self.cfg.redundancy_penalty()) res = ordered_match_maker.match([c1]) self.assertIsInstance(res, RejectedClusters) self.assertEqual( res.reasons, ["The quorum of genes required (4) is not reached: 3"]) self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 4 c1 = Cluster([ self.c_hits['h_sctj'], self.c_hits['h_sctn_flg'], self.c_hits['h_gspd'], self.c_hits['h_toto_ho'] ], self.model, self.cfg.hit_weights()) ordered_match_maker = OrderedMatchMaker(self.model, self.cfg.redundancy_penalty()) res = ordered_match_maker.match([c1]) self.assertIsInstance(res, RejectedClusters) self.assertEqual( res.reasons, ["The quorum of genes required (4) is not reached: 3"]) # the cluster contain a forbidden gene self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 1 c1 = Cluster([ self.c_hits['h_sctj'], self.c_hits['h_sctn'], self.c_hits['h_gspd'], self.c_hits['h_abc'] ], self.model, self.cfg.hit_weights()) ordered_match_maker = OrderedMatchMaker(self.model, self.cfg.redundancy_penalty()) res = ordered_match_maker.match([c1]) self.assertIsInstance(res, RejectedClusters) self.assertEqual(res.reasons, ["There is 1 forbidden genes occurrence(s): abc"]) # the cluster contain a forbidden gene homolog self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 1 c1 = Cluster([ self.c_hits['h_sctj'], self.c_hits['h_sctn'], self.c_hits['h_gspd'], self.c_hits['h_abc_ho'] ], self.model, self.cfg.hit_weights()) ordered_match_maker = OrderedMatchMaker(self.model, self.cfg.redundancy_penalty()) res = ordered_match_maker.match([c1]) self.assertIsInstance(res, RejectedClusters) self.assertEqual(res.reasons, ["There is 1 forbidden genes occurrence(s): tadZ"]) ##################### # test multi loci # ##################### self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 1 c1 = Cluster([self.c_hits['h_sctj'], self.c_hits['h_sctn']], self.model, self.cfg.hit_weights()) c2 = Cluster([self.c_hits['h_gspd']], self.model, self.cfg.hit_weights()) ordered_match_maker = OrderedMatchMaker(self.model, self.cfg.redundancy_penalty()) res = ordered_match_maker.match([c1, c2]) self.assertIsInstance(res, System) # with one analog an one homolog self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 1 c1 = Cluster([self.c_hits['h_sctj_flg'], self.c_hits['h_sctn_flg']], self.model, self.cfg.hit_weights()) c2 = Cluster([self.c_hits['h_gspd_an']], self.model, self.cfg.hit_weights()) ordered_match_maker = OrderedMatchMaker(self.model, self.cfg.redundancy_penalty()) res = ordered_match_maker.match([c1, c2]) self.assertIsInstance(res, System) # with one analog an one homolog and one forbidden in 3 clusters self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 1 c1 = Cluster([self.c_hits['h_sctj_flg'], self.c_hits['h_sctn_flg']], self.model, self.cfg.hit_weights()) c2 = Cluster([self.c_hits['h_gspd']], self.model, self.cfg.hit_weights()) c3 = Cluster([self.c_hits['h_abc']], self.model, self.cfg.hit_weights()) ordered_match_maker = OrderedMatchMaker(self.model, self.cfg.redundancy_penalty()) res = ordered_match_maker.match([c1, c2, c3]) self.assertEqual(res.reasons, ["There is 1 forbidden genes occurrence(s): abc"]) def test_unordered_match(self): # it lack one mandatory gene self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 3 hits = [self.c_hits['h_sctj'], self.c_hits['h_gspd']] unordered_match_maker = UnorderedMatchMaker(self.model) res = unordered_match_maker.match(hits) self.assertIsInstance(res, UnlikelySystem) self.assertEqual(res.reasons, [ "The quorum of mandatory genes required (2) is not reached: 1", "The quorum of genes required (3) is not reached: 2" ]) # all quorum are reached self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 1 hits = [ self.c_hits['h_sctj'], self.c_hits['h_sctn'], self.c_hits['h_gspd'] ] unordered_match_maker = UnorderedMatchMaker(self.model) res = unordered_match_maker.match(hits) self.assertIsInstance(res, LikelySystem) # with one mandatory analog self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 1 hits = [ self.c_hits['h_sctj_flg'], self.c_hits['h_sctn'], self.c_hits['h_gspd'] ] unordered_match_maker = UnorderedMatchMaker(self.model) res = unordered_match_maker.match(hits) self.assertIsInstance(res, LikelySystem) # with one accessory analog self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 1 hits = [ self.c_hits['h_sctj'], self.c_hits['h_sctn'], self.c_hits['h_gspd_an'] ] unordered_match_maker = UnorderedMatchMaker(self.model) res = unordered_match_maker.match(hits) self.assertIsInstance(res, LikelySystem) # the min_gene_required quorum is not reached self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 4 hits = [ self.c_hits['h_sctj'], self.c_hits['h_sctn_flg'], self.c_hits['h_gspd'] ] unordered_match_maker = UnorderedMatchMaker(self.model) res = unordered_match_maker.match(hits) self.assertIsInstance(res, UnlikelySystem) self.assertEqual( res.reasons, ["The quorum of genes required (4) is not reached: 3"]) # the min_gene_required quorum is not reached even there is a neutral self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 4 hits = [ self.c_hits['h_sctj'], self.c_hits['h_sctn_flg'], self.c_hits['h_gspd'], self.c_hits['h_toto'] ] unordered_match_maker = UnorderedMatchMaker(self.model) res = unordered_match_maker.match(hits) self.assertIsInstance(res, UnlikelySystem) self.assertEqual( res.reasons, ["The quorum of genes required (4) is not reached: 3"]) self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 4 hits = [ self.c_hits['h_sctj'], self.c_hits['h_sctn_flg'], self.c_hits['h_gspd'], self.c_hits['h_toto_ho'] ] unordered_match_maker = UnorderedMatchMaker(self.model) res = unordered_match_maker.match(hits) self.assertIsInstance(res, UnlikelySystem) self.assertEqual( res.reasons, ["The quorum of genes required (4) is not reached: 3"]) # the hits contain a forbidden gene self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 1 allowed_hits = [ self.c_hits['h_sctj'], self.c_hits['h_sctn'], self.c_hits['h_gspd'] ] forbidden_hits = [self.c_hits['h_abc']] unordered_match_maker = UnorderedMatchMaker(self.model) res = unordered_match_maker.match(allowed_hits + forbidden_hits) self.assertIsInstance(res, LikelySystem) self.assertListEqual([(h.id, h.position) for h in res.hits], [(h.id, h.position) for h in allowed_hits + forbidden_hits]) self.assertListEqual(res._forbidden_hits, [self.c_hits['h_abc']]) # the cluster contain a forbidden gene homolog self.model._min_mandatory_genes_required = 2 self.model._min_genes_required = 1 hits = [ self.c_hits['h_sctj'], self.c_hits['h_sctn'], self.c_hits['h_gspd'], self.c_hits['h_abc_ho'] ] unordered_match_maker = UnorderedMatchMaker(self.model) res = unordered_match_maker.match(hits) self.assertIsInstance(res, LikelySystem) self.assertListEqual(res._forbidden_hits, [self.c_hits['h_abc_ho']])
def _build_systems(cfg, profile_factory): model_name = 'foo' model_location = ModelLocation( path=os.path.join(cfg.models_dir(), model_name)) model_A = Model("foo/A", 10) model_B = Model("foo/B", 10) model_C = Model("foo/C", 10) model_D = Model("foo/D", 10) model_E = Model("foo/E", 10) model_F = Model("foo/F", 10) model_G = Model("foo/G", 10) model_H = Model("foo/H", 10) c_gene_sctn_flg = CoreGene(model_location, "sctN_FLG", profile_factory) gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B) c_gene_sctj_flg = CoreGene(model_location, "sctJ_FLG", profile_factory) gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B) c_gene_flgB = CoreGene(model_location, "flgB", profile_factory) gene_flgB = ModelGene(c_gene_flgB, model_B) c_gene_tadZ = CoreGene(model_location, "tadZ", profile_factory) gene_tadZ = ModelGene(c_gene_tadZ, model_B) c_gene_sctn = CoreGene(model_location, "sctN", profile_factory) gene_sctn = ModelGene(c_gene_sctn, model_A) gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn) gene_sctn.add_exchangeable(gene_sctn_hom) c_gene_sctj = CoreGene(model_location, "sctJ", profile_factory) gene_sctj = ModelGene(c_gene_sctj, model_A) gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj) gene_sctj.add_exchangeable(gene_sctj_an) c_gene_gspd = CoreGene(model_location, "gspD", profile_factory) gene_gspd = ModelGene(c_gene_gspd, model_A) gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd) gene_gspd.add_exchangeable(gene_gspd_an) c_gene_abc = CoreGene(model_location, "abc", profile_factory) gene_abc = ModelGene(c_gene_abc, model_A) gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc) gene_abc.add_exchangeable(gene_abc_ho) model_A.add_mandatory_gene(gene_sctn) model_A.add_mandatory_gene(gene_sctj) model_A.add_accessory_gene(gene_gspd) model_A.add_forbidden_gene(gene_abc) model_B.add_mandatory_gene(gene_sctn_flg) model_B.add_mandatory_gene(gene_sctj_flg) model_B.add_accessory_gene(gene_flgB) model_B.add_accessory_gene(gene_tadZ) model_C.add_mandatory_gene(gene_sctn_flg) model_C.add_mandatory_gene(gene_sctj_flg) model_C.add_mandatory_gene(gene_flgB) model_C.add_accessory_gene(gene_tadZ) model_C.add_accessory_gene(gene_gspd) model_D.add_mandatory_gene(gene_abc) model_D.add_accessory_gene(gene_sctn) model_E.add_accessory_gene(gene_gspd) model_F.add_mandatory_gene(gene_abc) # idem as C model_G.add_mandatory_gene(gene_sctn_flg) model_G.add_mandatory_gene(gene_sctj_flg) model_G.add_mandatory_gene(gene_flgB) model_G.add_accessory_gene(gene_tadZ) model_G.add_accessory_gene(gene_gspd) # idem as D model_H.add_mandatory_gene(gene_abc) model_H.add_accessory_gene(gene_sctn) h_sctj = Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20) h_sctn = Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 2, 1.0, 1.0, 1.0, 1.0, 10, 20) h_gspd = Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 3, 1.0, 1.0, 1.0, 1.0, 10, 20) h_sctj_flg = Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id", 4, 1.0, 1.0, 1.0, 1.0, 10, 20) h_flgB = Hit(c_gene_flgB, "hit_flgB", 803, "replicon_id", 5, 1.0, 1.0, 1.0, 1.0, 10, 20) h_tadZ = Hit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 6, 1.0, 1.0, 1.0, 1.0, 10, 20) h_abc = Hit(c_gene_abc, "hit_abc", 803, "replicon_id", 7, 1.0, 1.0, 1.0, 1.0, 10, 20) model_A._min_mandatory_genes_required = 2 model_A._min_genes_required = 2 hit_weights = HitWeight(**cfg.hit_weights()) c1 = Cluster([ ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY), ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY), ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY) ], model_A, hit_weights) c2 = Cluster([ ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY), ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY) ], model_A, hit_weights) model_B._min_mandatory_genes_required = 1 model_B._min_genes_required = 2 c3 = Cluster([ ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY), ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY), ValidHit(h_flgB, gene_flgB, GeneStatus.ACCESSORY) ], model_B, hit_weights) model_C._min_mandatory_genes_required = 1 model_C._min_genes_required = 2 c4 = Cluster([ ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY), ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY), ValidHit(h_flgB, gene_flgB, GeneStatus.MANDATORY), ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY) ], model_C, hit_weights) model_D._min_mandatory_genes_required = 1 model_D._min_genes_required = 1 c5 = Cluster([ ValidHit(h_abc, gene_abc, GeneStatus.MANDATORY), ValidHit(h_sctn, gene_sctn, GeneStatus.ACCESSORY) ], model_D, hit_weights) model_E._min_mandatory_genes_required = 0 model_E._min_genes_required = 1 c6 = Cluster([ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)], model_E, hit_weights) model_F._min_mandatory_genes_required = 1 model_F._min_genes_required = 1 c7 = Cluster([ValidHit(h_abc, gene_abc, GeneStatus.MANDATORY)], model_F, hit_weights) systems = {} systems['A'] = System(model_A, [c1, c2], cfg.redundancy_penalty()) # 5 hits # we need to tweek the replicon_id to have stable ressults # whatever the number of tests ran # or the tests order systems['A'].id = "replicon_id_A" systems['B'] = System(model_B, [c3], cfg.redundancy_penalty()) # 3 hits systems['B'].id = "replicon_id_B" systems['C'] = System(model_C, [c4], cfg.redundancy_penalty()) # 4 hits systems['C'].id = "replicon_id_C" systems['D'] = System(model_D, [c5], cfg.redundancy_penalty()) # 2 hits systems['D'].id = "replicon_id_D" systems['E'] = System(model_E, [c6], cfg.redundancy_penalty()) # 1 hit systems['E'].id = "replicon_id_E" systems['F'] = System(model_F, [c7], cfg.redundancy_penalty()) # 1 hit systems['F'].id = "replicon_id_F" systems['G'] = System(model_G, [c4], cfg.redundancy_penalty()) # 4 hits systems['G'].id = "replicon_id_G" systems['H'] = System(model_H, [c5], cfg.redundancy_penalty()) # 2 hits systems['H'].id = "replicon_id_H" return systems