def test_str(self):
        model = Model("foo/T2SS", 10)
        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        c_gene_sctn = CoreGene(self.model_location, "sctN",
                               self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model)
        model.add_accessory_gene(gene_sctn)

        hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 2, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        hit_3 = Hit(c_gene_sctn, "hit_3", 803, "replicon_id", 3, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_3 = ValidHit(hit_3, gene_sctn, GeneStatus.ACCESSORY)
        uls_1 = UnlikelySystem(model, [v_hit_1], [v_hit_2, v_hit_3], [], [],
                               ["reason"])

        expected_str = """(hit_1, gspD, 1), (hit_2, sctJ, 2), (hit_3, sctN, 3): These hits does not probably constitute a system because:
reason"""
        self.assertEqual(str(uls_1), expected_str)
    def test_hits(self):
        model = Model("foo/T2SS", 10)
        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        c_gene_sctn = CoreGene(self.model_location, "sctN",
                               self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model)
        model.add_accessory_gene(gene_sctn)

        hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        hit_3 = Hit(c_gene_sctn, "hit_3", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_3 = ValidHit(hit_3, gene_sctn, GeneStatus.ACCESSORY)
        uls_1 = UnlikelySystem(model, [v_hit_1], [v_hit_2, v_hit_3], [], [],
                               ["reason"])

        self.assertListEqual(uls_1.hits, [v_hit_1, v_hit_2, v_hit_3])
Exemple #3
0
    def test_UnlikelySystemSerializer_txt(self):
        model = Model("foo/FOO", 10)
        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        c_gene_sctn = CoreGene(self.model_location, "sctN",
                               self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model)
        model.add_accessory_gene(gene_sctn)
        c_gene_abc = CoreGene(self.model_location, "abc", self.profile_factory)
        gene_abc = ModelGene(c_gene_abc, model)
        model.add_forbidden_gene(gene_abc)

        hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 2, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        hit_3 = Hit(c_gene_sctn, "hit_3", 803, "replicon_id", 3, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_3 = ValidHit(hit_3, gene_sctn, GeneStatus.ACCESSORY)
        hit_4 = Hit(c_gene_abc, "hit_4", 803, "replicon_id", 4, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_4 = ValidHit(hit_4, gene_abc, GeneStatus.FORBIDDEN)
        ser = TxtUnikelySystemSerializer()

        ls_1 = UnlikelySystem(model, [v_hit_1], [v_hit_2, v_hit_3], [],
                              [v_hit_4], ["the reason why"])
        txt = ser.serialize(ls_1)
        expected_txt = """This replicon probably not contains a system foo/FOO:
the reason why

system id = replicon_id_FOO_1
model = foo/FOO
replicon = replicon_id
hits = [('hit_1', 'gspD', 1), ('hit_2', 'sctJ', 2), ('hit_3', 'sctN', 3), ('hit_4', 'abc', 4)]
wholeness = 1.000

mandatory genes:
\t- gspD: 1 (gspD)

accessory genes:
\t- sctJ: 1 (sctJ)
\t- sctN: 1 (sctN)

neutral genes:

forbidden genes:
\t- abc: 1 (abc)

Use ordered replicon to have better prediction.
"""
        self.assertEqual(txt, expected_txt)
    def test_init(self):
        model = Model("foo/model_A", 10)
        # test if id is well incremented
        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)

        hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        uls_1 = UnlikelySystem(model, [v_hit_1], [v_hit_2], [], [], ["reason"])
        self.assertTrue(uls_1.id.startswith('replicon_id_model_A_'))

        uls_2 = UnlikelySystem(model, [v_hit_1], [v_hit_2], [], [], ["reason"])
        self.assertEqual(int(uls_2.id.split('_')[-1]),
                         int(uls_1.id.split('_')[-1]) + 1)
    def test_reason(self):
        model = Model("foo/model_A", 10)
        # test if id is well incremented
        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_forbidden_gene(gene_sctj)

        hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.FORBIDDEN)
        reason_2 = ["forbidden gene"]
        uls_2 = UnlikelySystem(model, [v_hit_1], [], [], [v_hit_2], reason_2)
        self.assertEqual(uls_2.reasons, reason_2)
Exemple #6
0
    def test_unnlikely_systems_to_txt(self):
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'unordered'
        args.models_dir = self.find_data('models')
        cfg = Config(MacsyDefaults(), args)

        model_name = 'foo'
        models_location = ModelLocation(
            path=os.path.join(args.models_dir, model_name))

        # we need to reset the ProfileFactory
        # because it's a like a singleton
        # so other tests are influenced by ProfileFactory and it's configuration
        # for instance search_genes get profile without hmmer_exe
        profile_factory = ProfileFactory(cfg)

        model = Model("foo/T2SS", 10)
        # test if id is well incremented
        gene_name = "gspD"
        c_gene_gspd = CoreGene(models_location, gene_name, profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        gene_name = "sctJ"
        c_gene_sctj = CoreGene(models_location, gene_name, profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        gene_name = "sctC"
        c_gene_sctc = CoreGene(models_location, gene_name, profile_factory)
        gene_sctc = ModelGene(c_gene_sctc, model)
        model.add_neutral_gene(gene_sctc)
        gene_name = "tadZ"
        c_gene_tadz = CoreGene(models_location, gene_name, profile_factory)
        gene_tadz = ModelGene(c_gene_tadz, model)
        model.add_forbidden_gene(gene_tadz)

        hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = Hit(c_gene_sctj, "hit_2", 804, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        hit_3 = Hit(c_gene_sctc, "hit_3", 805, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_3 = ValidHit(hit_3, gene_sctc, GeneStatus.NEUTRAL)
        hit_4 = Hit(c_gene_tadz, "hit_4", 806, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_4 = ValidHit(hit_4, gene_tadz, GeneStatus.FORBIDDEN)
        reason = "why it not a system"
        system_1 = UnlikelySystem(model, [v_hit_1], [v_hit_2], [v_hit_3],
                                  [v_hit_4], reason)

        exp_txt = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# Unlikely Systems found:

This replicon probably not contains a system foo/T2SS:
{reason}

system id = replicon_id_T2SS_1
model = foo/T2SS
replicon = replicon_id
hits = [('hit_1', 'gspD', 1), ('hit_2', 'sctJ', 1), ('hit_3', 'sctC', 1), ('hit_4', 'tadZ', 1)]
wholeness = 1.000

mandatory genes:
\t- gspD: 1 (gspD)

accessory genes:
\t- sctJ: 1 (sctJ)

neutral genes:
\t- sctC: 1 (sctC)

forbidden genes:
\t- tadZ: 1 (tadZ)

Use ordered replicon to have better prediction.

============================================================
"""

        f_out = StringIO()
        unlikely_systems_to_txt([system_1], f_out)
        self.assertMultiLineEqual(exp_txt, f_out.getvalue())

        f_out = StringIO()
        unlikely_systems_to_txt([], f_out)
        expected_out = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# No Unlikely Systems found
"""
        self.assertEqual(expected_out, f_out.getvalue())