コード例 #1
0
class SerializationTest(MacsyTest):
    def setUp(self) -> None:
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        self.cfg = Config(MacsyDefaults(), args)

        self.model_name = 'foo'
        self.model_location = ModelLocation(
            path=os.path.join(args.models_dir, self.model_name))
        self.profile_factory = ProfileFactory(self.cfg)
        self.hit_weights = HitWeight(**self.cfg.hit_weights())
        # reset the uniq id number for AbstractSetOfHits
        # to have predictable results
        AbstractSetOfHits._id = itertools.count(1)

    def test_SystemSerializer_str(self):
        model_name = 'foo'
        model_location = ModelLocation(
            path=os.path.join(self.cfg.models_dir(), model_name))
        model_A = Model("foo/A", 10)
        model_B = Model("foo/B", 10)

        c_gene_sctn_flg = CoreGene(model_location, "sctN_FLG",
                                   self.profile_factory)
        gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B)
        c_gene_sctj_flg = CoreGene(model_location, "sctJ_FLG",
                                   self.profile_factory)
        gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B)
        c_gene_flgB = CoreGene(model_location, "flgB", self.profile_factory)
        c_gene_tadZ = CoreGene(model_location, "tadZ", self.profile_factory)
        gene_tadZ = ModelGene(c_gene_tadZ, model_B)

        c_gene_sctn = CoreGene(model_location, "sctN", self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model_A)
        gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_hom)

        c_gene_sctj = CoreGene(model_location, "sctJ", self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model_A)
        gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj)
        gene_sctj.add_exchangeable(gene_sctj_an)

        c_gene_gspd = CoreGene(model_location, "gspD", self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model_A)
        gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd)
        gene_gspd.add_exchangeable(gene_gspd_an)

        c_gene_abc = CoreGene(model_location, "abc", self.profile_factory)
        gene_abc = ModelGene(c_gene_abc, model_A)
        gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc)
        gene_abc.add_exchangeable(gene_abc_ho)

        model_A.add_mandatory_gene(gene_sctn)
        model_A.add_mandatory_gene(gene_sctj)
        model_A.add_accessory_gene(gene_gspd)
        model_A.add_forbidden_gene(gene_abc)

        model_B.add_mandatory_gene(gene_sctn_flg)
        model_B.add_mandatory_gene(gene_sctj_flg)
        model_B.add_accessory_gene(gene_gspd)
        model_B.add_accessory_gene(gene_tadZ)

        h_sctj = Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_sctn = Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_gspd = Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)

        h_sctj_flg = Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id",
                         1, 1.0, 1.0, 1.0, 1.0, 10, 20)
        h_tadZ = Hit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)

        model_A._min_mandatory_genes_required = 2
        model_A._min_genes_required = 2
        c1 = Cluster([
            ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY),
            ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
        ], model_A, self.hit_weights)

        c2 = Cluster([
            ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY)
        ], model_A, self.hit_weights)

        model_B._min_mandatory_genes_required = 1
        model_B._min_genes_required = 2
        c3 = Cluster([
            ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY),
            ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY),
            ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
        ], model_B, self.hit_weights)

        sys_A = System(model_A, [c1, c2], self.cfg.redundancy_penalty())
        sys_A.id = "sys_id_A"
        sys_B = System(model_B, [c3], self.cfg.redundancy_penalty())
        sys_B.id = "sys_id_B"
        hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B])
        system_serializer = TxtSystemSerializer()

        sys_str = f"""system id = {sys_A.id}
model = foo/A
replicon = replicon_id
clusters = [('hit_sctj', 'sctJ', 1), ('hit_sctn', 'sctN', 1), ('hit_gspd', 'gspD', 1)], [('hit_sctj', 'sctJ', 1), ('hit_sctn', 'sctN', 1)]
occ = 2
wholeness = 1.000
loci nb = 2
score = 1.500

mandatory genes:
\t- sctN: 2 (sctN, sctN)
\t- sctJ: 2 (sctJ, sctJ)

accessory genes:
\t- gspD: 1 (gspD [sys_id_B])

neutral genes:
"""
        self.assertEqual(
            sys_str, system_serializer.serialize(sys_A, hit_multi_sys_tracker))

    def test_SystemSerializer_tsv(self):
        model = Model("foo/T2SS", 10)
        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        c_gene_sctn = CoreGene(self.model_location, "sctN",
                               self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model)
        c_gene_sctn_flg = CoreGene(self.model_location, "sctN_FLG",
                                   self.profile_factory)
        gene_sctn_flg = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_flg)
        model.add_accessory_gene(gene_sctn)

        h_gspd = Hit(c_gene_gspd, "h_gspd", 803, "replicon_id", 10, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        v_h_gspd = ValidHit(h_gspd, gene_gspd, GeneStatus.MANDATORY)
        h_sctj = Hit(c_gene_sctj, "h_sctj", 803, "replicon_id", 20, 1.0, 1.0,
                     1.0, 1.0, 20, 30)
        v_h_sctj = ValidHit(h_sctj, gene_sctj, GeneStatus.ACCESSORY)
        h_sctn_flg = Hit(c_gene_sctn_flg, "h_sctn_flg", 803, "replicon_id", 30,
                         1.0, 1.0, 1.0, 1.0, 30, 40)
        v_h_sctn_flg = ValidHit(h_sctn_flg, gene_sctn_flg,
                                GeneStatus.ACCESSORY)
        c1 = Cluster([v_h_gspd, v_h_sctj], model, self.hit_weights)
        c2 = Cluster([v_h_sctn_flg], model, self.hit_weights)
        sys_multi_loci = System(model, [c1, c2], self.cfg.redundancy_penalty())
        hit_multi_sys_tracker = HitSystemTracker([sys_multi_loci])
        system_serializer = TsvSystemSerializer()

        sys_tsv = "\t".join([
            "replicon_id", "h_gspd", "gspD", "10", "foo/T2SS",
            sys_multi_loci.id, "1", "1.000", "1.900", "1", "gspD", "mandatory",
            "803", "1.0", "1.000", "1.000", "1.000", "10", "20", ""
        ])
        sys_tsv += "\n"
        sys_tsv += "\t".join([
            "replicon_id", "h_sctj", "sctJ", "20", "foo/T2SS",
            sys_multi_loci.id, "1", "1.000", "1.900", "1", "sctJ", "accessory",
            "803", "1.0", "1.000", "1.000", "1.000", "20", "30", ""
        ])
        sys_tsv += "\n"
        sys_tsv += "\t".join([
            "replicon_id", "h_sctn_flg", "sctN_FLG", "30", "foo/T2SS",
            sys_multi_loci.id, "1", "1.000", "1.900", "1", "sctN", "accessory",
            "803", "1.0", "1.000", "1.000", "1.000", "30", "40", ""
        ])
        sys_tsv += "\n"
        self.assertEqual(
            sys_tsv,
            system_serializer.serialize(sys_multi_loci, hit_multi_sys_tracker))

    def test_SolutionSerializer_tsv(self):
        model_name = 'foo'
        model_location = ModelLocation(
            path=os.path.join(self.cfg.models_dir(), model_name))
        model_A = Model("foo/A", 10)
        model_B = Model("foo/B", 10)

        c_gene_sctn_flg = CoreGene(model_location, "sctN_FLG",
                                   self.profile_factory)
        gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B)
        c_gene_sctj_flg = CoreGene(model_location, "sctJ_FLG",
                                   self.profile_factory)
        gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B)
        c_gene_flgB = CoreGene(model_location, "flgB", self.profile_factory)
        gene_flgB = ModelGene(c_gene_flgB, model_B)
        c_gene_tadZ = CoreGene(model_location, "tadZ", self.profile_factory)
        gene_tadZ = ModelGene(c_gene_tadZ, model_B)

        c_gene_sctn = CoreGene(model_location, "sctN", self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model_A)
        gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_hom)

        c_gene_sctj = CoreGene(model_location, "sctJ", self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model_A)
        gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj)
        gene_sctj.add_exchangeable(gene_sctj_an)

        c_gene_gspd = CoreGene(model_location, "gspD", self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model_A)
        gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd)
        gene_gspd.add_exchangeable(gene_gspd_an)

        c_gene_abc = CoreGene(model_location, "abc", self.profile_factory)
        gene_abc = ModelGene(c_gene_abc, model_A)
        gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc)
        gene_abc.add_exchangeable(gene_abc_ho)

        model_A.add_mandatory_gene(gene_sctn)
        model_A.add_mandatory_gene(gene_sctj)
        model_A.add_accessory_gene(gene_gspd)
        model_A.add_forbidden_gene(gene_abc)

        model_B.add_mandatory_gene(gene_sctn_flg)
        model_B.add_mandatory_gene(gene_sctj_flg)
        model_B.add_accessory_gene(gene_flgB)
        model_B.add_accessory_gene(gene_tadZ)

        h_sctj = Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_sctn = Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_gspd = Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)

        h_sctj_flg = Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id",
                         1, 1.0, 1.0, 1.0, 1.0, 10, 20)
        h_flgB = Hit(c_gene_flgB, "hit_flgB", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_tadZ = Hit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)

        model_A._min_mandatory_genes_required = 2
        model_A._min_genes_required = 2
        c1 = Cluster([
            ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY),
            ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
        ], model_A, self.hit_weights)

        c2 = Cluster([
            ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY)
        ], model_A, self.hit_weights)

        model_B._min_mandatory_genes_required = 1
        model_B._min_genes_required = 2
        c3 = Cluster([
            ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY),
            ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY),
            ValidHit(h_flgB, gene_flgB, GeneStatus.ACCESSORY)
        ], model_B, self.hit_weights)

        sys_A = System(model_A, [c1, c2], self.cfg.redundancy_penalty())
        sys_A.id = "sys_id_A"
        sys_B = System(model_B, [c3], self.cfg.redundancy_penalty())
        sys_B.id = "sys_id_B"

        sol = [sys_A, sys_B]
        sol_id = '12'

        hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B])
        system_serializer = TsvSolutionSerializer()

        sol_tsv = '\t'.join([
            sol_id, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'gspD', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'sctJ_FLG', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_tadZ', 'tadZ', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'tadZ', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_flgB', 'flgB', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'flgB', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        ser = system_serializer.serialize(sol, sol_id, hit_multi_sys_tracker)
        self.assertEqual(ser, sol_tsv)

    def test_LikelySystemSerializer_txt(self):
        model = Model("foo/FOO", 10)
        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        c_gene_sctn = CoreGene(self.model_location, "sctN",
                               self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model)
        model.add_accessory_gene(gene_sctn)
        c_gene_abc = CoreGene(self.model_location, "abc", self.profile_factory)
        gene_abc = ModelGene(c_gene_abc, model)
        model.add_forbidden_gene(gene_abc)

        hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 2, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        hit_3 = Hit(c_gene_sctn, "hit_3", 803, "replicon_id", 3, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_3 = ValidHit(hit_3, gene_sctn, GeneStatus.ACCESSORY)
        hit_4 = Hit(c_gene_abc, "hit_4", 803, "replicon_id", 4, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_4 = ValidHit(hit_4, gene_abc, GeneStatus.FORBIDDEN)

        ls_1 = LikelySystem(model, [v_hit_1], [v_hit_2, v_hit_3], [],
                            [v_hit_4])
        hit_multi_sys_tracker = HitSystemTracker([ls_1])
        ser = TxtLikelySystemSerializer()

        txt = ser.serialize(ls_1, hit_multi_sys_tracker)
        expected_txt = """This replicon contains genetic materials needed for system foo/FOO
WARNING there quorum is reached but there is also some forbidden genes.

system id = replicon_id_FOO_1
model = foo/FOO
replicon = replicon_id
hits = [('hit_1', 'gspD', 1), ('hit_2', 'sctJ', 2), ('hit_3', 'sctN', 3), ('hit_4', 'abc', 4)]
wholeness = 1.000

mandatory genes:
\t- gspD: 1 (gspD)

accessory genes:
\t- sctJ: 1 (sctJ)
\t- sctN: 1 (sctN)

neutral genes:

forbidden genes:
\t- abc: 1 (abc)

Use ordered replicon to have better prediction.
"""
        self.assertEqual(txt, expected_txt)

    def test_UnlikelySystemSerializer_txt(self):
        model = Model("foo/FOO", 10)
        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        c_gene_sctn = CoreGene(self.model_location, "sctN",
                               self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model)
        model.add_accessory_gene(gene_sctn)
        c_gene_abc = CoreGene(self.model_location, "abc", self.profile_factory)
        gene_abc = ModelGene(c_gene_abc, model)
        model.add_forbidden_gene(gene_abc)

        hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 2, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        hit_3 = Hit(c_gene_sctn, "hit_3", 803, "replicon_id", 3, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_3 = ValidHit(hit_3, gene_sctn, GeneStatus.ACCESSORY)
        hit_4 = Hit(c_gene_abc, "hit_4", 803, "replicon_id", 4, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_4 = ValidHit(hit_4, gene_abc, GeneStatus.FORBIDDEN)
        ser = TxtUnikelySystemSerializer()

        ls_1 = UnlikelySystem(model, [v_hit_1], [v_hit_2, v_hit_3], [],
                              [v_hit_4], ["the reason why"])
        txt = ser.serialize(ls_1)
        expected_txt = """This replicon probably not contains a system foo/FOO:
the reason why

system id = replicon_id_FOO_1
model = foo/FOO
replicon = replicon_id
hits = [('hit_1', 'gspD', 1), ('hit_2', 'sctJ', 2), ('hit_3', 'sctN', 3), ('hit_4', 'abc', 4)]
wholeness = 1.000

mandatory genes:
\t- gspD: 1 (gspD)

accessory genes:
\t- sctJ: 1 (sctJ)
\t- sctN: 1 (sctN)

neutral genes:

forbidden genes:
\t- abc: 1 (abc)

Use ordered replicon to have better prediction.
"""
        self.assertEqual(txt, expected_txt)
コード例 #2
0
    def test_systems_to_txt(self):
        system_str = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# No Systems found
"""
        f_out = StringIO()
        track_multi_systems_hit = HitSystemTracker([])
        systems_to_txt([], track_multi_systems_hit, f_out)
        self.assertMultiLineEqual(system_str, f_out.getvalue())

        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        cfg = Config(MacsyDefaults(), args)

        model_name = 'foo'
        models_location = ModelLocation(
            path=os.path.join(args.models_dir, model_name))

        # we need to reset the ProfileFactory
        # because it's a like a singleton
        # so other tests are influenced by ProfileFactory and it's configuration
        # for instance search_genes get profile without hmmer_exe
        profile_factory = ProfileFactory(cfg)

        model = Model("foo/T2SS", 10)
        # test if id is well incremented
        gene_name = "gspD"
        c_gene_gspd = CoreGene(models_location, gene_name, profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        gene_name = "sctJ"
        c_gene_sctj = CoreGene(models_location, gene_name, profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)

        hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        system_1 = System(model, [
            Cluster([v_hit_1, v_hit_2], model, HitWeight(**cfg.hit_weights()))
        ], cfg.redundancy_penalty())

        system_str = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# Systems found:

system id = replicon_id_T2SS_{next(System._id) - 1}
model = foo/T2SS
replicon = replicon_id
clusters = [('hit_1', 'gspD', 1), ('hit_2', 'sctJ', 1)]
occ = 1
wholeness = 1.000
loci nb = 1
score = 1.500

mandatory genes:
\t- gspD: 1 (gspD)

accessory genes:
\t- sctJ: 1 (sctJ)

neutral genes:

============================================================
"""

        f_out = StringIO()
        track_multi_systems_hit = HitSystemTracker([system_1])
        systems_to_txt([system_1], track_multi_systems_hit, f_out)
        self.assertMultiLineEqual(system_str, f_out.getvalue())
コード例 #3
0
    def test_solutions_to_tsv(self):
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        cfg = Config(MacsyDefaults(), args)
        model_name = 'foo'
        models_location = ModelLocation(
            path=os.path.join(args.models_dir, model_name))

        # we need to reset the ProfileFactory
        # because it's a like a singleton
        # so other tests are influenced by ProfileFactory and it's configuration
        # for instance search_genes get profile without hmmer_exe
        profile_factory = ProfileFactory(cfg)

        model_A = Model("foo/A", 10)
        model_B = Model("foo/B", 10)
        model_C = Model("foo/C", 10)

        c_gene_sctn_flg = CoreGene(models_location, "sctN_FLG",
                                   profile_factory)
        gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B)
        c_gene_sctj_flg = CoreGene(models_location, "sctJ_FLG",
                                   profile_factory)
        gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B)
        c_gene_flgB = CoreGene(models_location, "flgB", profile_factory)
        gene_flgB = ModelGene(c_gene_flgB, model_B)
        c_gene_tadZ = CoreGene(models_location, "tadZ", profile_factory)
        gene_tadZ = ModelGene(c_gene_tadZ, model_B)

        c_gene_sctn = CoreGene(models_location, "sctN", profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model_A)
        gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_hom)

        c_gene_sctj = CoreGene(models_location, "sctJ", profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model_A)
        gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj)
        gene_sctj.add_exchangeable(gene_sctj_an)

        c_gene_gspd = CoreGene(models_location, "gspD", profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model_A)
        gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd)
        gene_gspd.add_exchangeable(gene_gspd_an)

        c_gene_abc = CoreGene(models_location, "abc", profile_factory)
        gene_abc = ModelGene(c_gene_abc, model_A)
        gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc)
        gene_abc.add_exchangeable(gene_abc_ho)

        model_A.add_mandatory_gene(gene_sctn)
        model_A.add_mandatory_gene(gene_sctj)
        model_A.add_accessory_gene(gene_gspd)
        model_A.add_forbidden_gene(gene_abc)

        model_B.add_mandatory_gene(gene_sctn_flg)
        model_B.add_mandatory_gene(gene_sctj_flg)
        model_B.add_accessory_gene(gene_flgB)
        model_B.add_accessory_gene(gene_tadZ)

        model_C.add_mandatory_gene(gene_sctn_flg)
        model_C.add_mandatory_gene(gene_sctj_flg)
        model_C.add_mandatory_gene(gene_flgB)
        model_C.add_accessory_gene(gene_tadZ)
        model_C.add_accessory_gene(gene_gspd)

        h_sctj = Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_sctn = Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_gspd = Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)

        h_sctj_flg = Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id",
                         1, 1.0, 1.0, 1.0, 1.0, 10, 20)
        h_flgB = Hit(c_gene_flgB, "hit_flgB", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_tadZ = Hit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)

        model_A._min_mandatory_genes_required = 2
        model_A._min_genes_required = 2
        hit_weights = HitWeight(**cfg.hit_weights())
        c1 = Cluster([
            ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY),
            ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
        ], model_A, hit_weights)

        c2 = Cluster([
            ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY)
        ], model_A, hit_weights)

        model_B._min_mandatory_genes_required = 1
        model_B._min_genes_required = 2
        c3 = Cluster([
            ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY),
            ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY),
            ValidHit(h_flgB, gene_flgB, GeneStatus.ACCESSORY)
        ], model_B, hit_weights)

        model_C._min_mandatory_genes_required = 1
        model_C._min_genes_required = 2
        c4 = Cluster([
            ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY),
            ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY),
            ValidHit(h_flgB, gene_flgB, GeneStatus.MANDATORY),
            ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
        ], model_C, hit_weights)

        sys_A = System(model_A, [c1, c2], cfg.redundancy_penalty())
        sys_A.id = "sys_id_A"
        sys_B = System(model_B, [c3], cfg.redundancy_penalty())
        sys_B.id = "sys_id_B"
        sys_C = System(model_C, [c4], cfg.redundancy_penalty())
        sys_C.id = "sys_id_C"

        sol_1 = [sys_A, sys_B]
        sol_2 = [sys_A, sys_C]
        sol_id_1 = '1'
        sol_id_2 = '2'

        sol_tsv = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# Systems found:
"""
        sol_tsv += "\t".join([
            "sol_id", "replicon", "hit_id", "gene_name", "hit_pos",
            "model_fqn", "sys_id", "sys_loci", "sys_wholeness", "sys_score",
            "sys_occ", "hit_gene_ref", "hit_status", "hit_seq_len",
            "hit_i_eval", "hit_score", "hit_profile_cov", "hit_seq_cov",
            "hit_begin_match", "hit_end_match", "used_in"
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'gspD', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'sctJ_FLG', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_tadZ', 'tadZ', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'tadZ', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_flgB', 'flgB', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'flgB', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'gspD', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '1', 'foo/C',
            'sys_id_C', '1', '0.800', '3.000', '1', 'sctJ_FLG', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_B'
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_tadZ', 'tadZ', '1', 'foo/C',
            'sys_id_C', '1', '0.800', '3.000', '1', 'tadZ', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_B'
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_flgB', 'flgB', '1', 'foo/C',
            'sys_id_C', '1', '0.800', '3.000', '1', 'flgB', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_B'
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/C',
            'sys_id_C', '1', '0.800', '3.000', '1', 'gspD', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_A'
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"

        f_out = StringIO()
        hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B])
        solutions_to_tsv([sol_1, sol_2], hit_multi_sys_tracker, f_out)
        self.assertMultiLineEqual(sol_tsv, f_out.getvalue())
コード例 #4
0
    def test_systems_to_tsv(self):
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        cfg = Config(MacsyDefaults(), args)

        model_name = 'foo'
        models_location = ModelLocation(
            path=os.path.join(args.models_dir, model_name))

        # we need to reset the ProfileFactory
        # because it's a like a singleton
        # so other tests are influenced by ProfileFactory and it's configuration
        # for instance search_genes get profile without hmmer_exe
        profile_factory = ProfileFactory(cfg)

        model = Model("foo/T2SS", 10)
        gene_name = "gspD"
        c_gene_gspd = CoreGene(models_location, gene_name, profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        gene_name = "sctJ"
        c_gene_sctj = CoreGene(models_location, gene_name, profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)

        hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        system_1 = System(model, [
            Cluster([v_hit_1, v_hit_2], model, HitWeight(**cfg.hit_weights()))
        ], cfg.redundancy_penalty())

        system_tsv = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# Systems found:
"""
        system_tsv += "\t".join([
            "replicon", "hit_id", "gene_name", "hit_pos", "model_fqn",
            "sys_id", "sys_loci", "sys_wholeness", "sys_score", "sys_occ",
            "hit_gene_ref", "hit_status", "hit_seq_len", "hit_i_eval",
            "hit_score", "hit_profile_cov", "hit_seq_cov", "hit_begin_match",
            "hit_end_match", "used_in"
        ])
        system_tsv += "\n"
        system_tsv += "\t".join([
            "replicon_id", "hit_1", "gspD", "1", "foo/T2SS", system_1.id, "1",
            "1.000", "1.500", "1", "gspD", "mandatory", "803", "1.0", "1.000",
            "1.000", "1.000", "10", "20", ""
        ])
        system_tsv += "\n"
        system_tsv += "\t".join([
            "replicon_id", "hit_2", "sctJ", "1", "foo/T2SS", system_1.id, "1",
            "1.000", "1.500", "1", "sctJ", "accessory", "803", "1.0", "1.000",
            "1.000", "1.000", "10", "20", ""
        ])
        system_tsv += "\n\n"

        f_out = StringIO()
        track_multi_systems_hit = HitSystemTracker([system_1])
        systems_to_tsv([system_1], track_multi_systems_hit, f_out)
        self.assertMultiLineEqual(system_tsv, f_out.getvalue())

        # test No system found
        system_str = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# No Systems found
"""
        f_out = StringIO()
        track_multi_systems_hit = HitSystemTracker([])
        systems_to_tsv([], track_multi_systems_hit, f_out)
        self.assertMultiLineEqual(system_str, f_out.getvalue())
コード例 #5
0
class MatchMakerTest(MacsyTest):
    def setUp(self) -> None:
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        self.cfg = Config(MacsyDefaults(), args)

        self.model_name = 'foo'
        self.model_location = ModelLocation(
            path=os.path.join(args.models_dir, self.model_name))
        self.profile_factory = ProfileFactory(self.cfg)

        self.model = Model("foo/model_A", 10)
        c_gene_sctn = CoreGene(self.model_location, "sctN",
                               self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, self.model)

        c_gene_sctn_flg = CoreGene(self.model_location, "sctN_FLG",
                                   self.profile_factory)
        gene_sctn_flg = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_flg)

        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, self.model)
        c_gene_sctj_flg = CoreGene(self.model_location, "sctJ_FLG",
                                   self.profile_factory)
        gene_sctj_flg = Exchangeable(c_gene_sctj_flg, gene_sctj)
        gene_sctj.add_exchangeable(gene_sctj_flg)

        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, self.model)

        c_gene_flgb = CoreGene(self.model_location, "flgB",
                               self.profile_factory)
        gene_gspd_an = Exchangeable(c_gene_flgb, gene_gspd)
        gene_gspd.add_exchangeable(gene_gspd_an)

        c_gene_abc = CoreGene(self.model_location, "abc", self.profile_factory)
        gene_abc = ModelGene(c_gene_abc, self.model)
        c_gene_tadz = CoreGene(self.model_location, "tadZ",
                               self.profile_factory)
        gene_abc_ho = Exchangeable(c_gene_tadz, gene_abc)
        gene_abc.add_exchangeable(gene_abc_ho)

        c_gene_toto = CoreGene(self.model_location, "toto",
                               self.profile_factory)
        gene_toto = ModelGene(c_gene_toto, self.model)
        c_gene_totote = CoreGene(self.model_location, "totote",
                                 self.profile_factory)
        gene_toto_ho = Exchangeable(c_gene_totote, gene_toto)
        gene_toto.add_exchangeable(gene_toto_ho)

        self.model.add_mandatory_gene(gene_sctn)
        self.model.add_mandatory_gene(gene_sctj)
        self.model.add_accessory_gene(gene_gspd)
        self.model.add_neutral_gene(gene_toto)
        self.model.add_forbidden_gene(gene_abc)

        self.c_hits = {
            'h_sctj':
            Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                1.0, 10, 20),
            'h_sctj_flg':
            Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id", 1, 1.0,
                1.0, 1.0, 1.0, 10, 20),
            'h_sctn':
            Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                1.0, 10, 20),
            'h_sctn_flg':
            Hit(c_gene_sctn_flg, "hit_sctn_flg", 803, "replicon_id", 1, 1.0,
                1.0, 1.0, 1.0, 10, 20),
            'h_gspd':
            Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                1.0, 10, 20),
            'h_gspd_an':
            Hit(c_gene_flgb, "hit_gspd_an", 803, "replicon_id", 1, 1.0, 1.0,
                1.0, 1.0, 10, 20),
            'h_abc':
            Hit(c_gene_abc, "hit_abc", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                1.0, 10, 20),
            'h_abc_ho':
            Hit(c_gene_tadz, "hit_abc_ho", 803, "replicon_id", 1, 1.0, 1.0,
                1.0, 1.0, 10, 20),
            'h_toto':
            Hit(c_gene_toto, "hit_toto", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                1.0, 10, 20),
            'h_toto_ho':
            Hit(c_gene_totote, "hit_toto_ho", 803, "replicon_id", 1, 1.0, 1.0,
                1.0, 1.0, 10, 20),
        }

    def test_sort_hits_by_status(self):
        ordered_match_maker = OrderedMatchMaker(self.model,
                                                self.cfg.redundancy_penalty())
        mandatory_exp = [self.c_hits['h_sctn'], self.c_hits['h_sctj']]
        accessory_exp = [self.c_hits['h_gspd']]
        neutral_exp = [self.c_hits['h_toto']]
        forbidden_exp = [self.c_hits['h_abc']]

        mandatory, accessory, neutral, forbidden = ordered_match_maker.sort_hits_by_status(
            mandatory_exp + accessory_exp + neutral_exp + forbidden_exp)
        self.assertListEqual([h.gene.name for h in mandatory_exp],
                             [h.gene.name for h in mandatory])
        self.assertListEqual([h.gene.name for h in accessory_exp],
                             [h.gene.name for h in accessory])
        self.assertListEqual([h.gene.name for h in neutral_exp],
                             [h.gene.name for h in neutral])
        self.assertListEqual([h.gene.name for h in forbidden_exp],
                             [h.gene.name for h in forbidden])

        # do the same but with exchangeable
        mandatory_exp_exch = [
            self.c_hits['h_sctn_flg'], self.c_hits['h_sctj_flg']
        ]
        accessory_exp_exch = [self.c_hits['h_gspd_an']]
        neutral_exp_exch = [self.c_hits['h_toto_ho']]
        forbidden_exp_exch = [self.c_hits['h_abc_ho']]

        mandatory, accessory, neutral, forbidden = ordered_match_maker.sort_hits_by_status(
            mandatory_exp_exch + accessory_exp_exch + neutral_exp_exch +
            forbidden_exp_exch)
        self.assertListEqual([h.gene.name for h in mandatory_exp_exch],
                             [h.gene.name for h in mandatory])
        self.assertListEqual([h.gene.name for h in accessory_exp_exch],
                             [h.gene.name for h in accessory])
        self.assertListEqual([h.gene.name for h in neutral_exp_exch],
                             [h.gene.name for h in neutral])
        self.assertListEqual([h.gene.name for h in forbidden_exp_exch],
                             [h.gene.name for h in forbidden])

        # test if gene_ref is the ModelGene
        # alternate_of return the ModelGene of the function
        self.assertListEqual(
            [h.gene.name for h in mandatory_exp],
            [h.gene_ref.alternate_of().name for h in mandatory])
        self.assertListEqual(
            [h.gene.name for h in accessory_exp],
            [h.gene_ref.alternate_of().name for h in accessory])
        self.assertListEqual([h.gene.name for h in neutral_exp],
                             [h.gene_ref.alternate_of().name for h in neutral])
        self.assertListEqual(
            [h.gene.name for h in forbidden_exp],
            [h.gene_ref.alternate_of().name for h in forbidden])

    def test_ordered_match(self):

        #####################
        # test single locus #
        #####################

        # it lack one mandatory gene
        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 3
        c1 = Cluster([self.c_hits['h_sctj'], self.c_hits['h_gspd']],
                     self.model, self.cfg.hit_weights())
        ordered_match_maker = OrderedMatchMaker(self.model,
                                                self.cfg.redundancy_penalty())
        res = ordered_match_maker.match([c1])
        self.assertIsInstance(res, RejectedClusters)
        self.assertEqual(res.reasons, [
            "The quorum of mandatory genes required (2) is not reached: 1",
            "The quorum of genes required (3) is not reached: 2"
        ])

        # all quorum are reached
        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 1
        c1 = Cluster([
            self.c_hits['h_sctj'], self.c_hits['h_sctn'], self.c_hits['h_gspd']
        ], self.model, self.cfg.hit_weights())
        ordered_match_maker = OrderedMatchMaker(self.model,
                                                self.cfg.redundancy_penalty())
        res = ordered_match_maker.match([c1])
        self.assertIsInstance(res, System)

        # with one mandatory analog
        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 1
        c1 = Cluster([
            self.c_hits['h_sctj_flg'], self.c_hits['h_sctn'],
            self.c_hits['h_gspd']
        ], self.model, self.cfg.hit_weights())
        ordered_match_maker = OrderedMatchMaker(self.model,
                                                self.cfg.redundancy_penalty())
        res = ordered_match_maker.match([c1])
        self.assertIsInstance(res, System)

        # with one accessory analog
        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 1
        c1 = Cluster([
            self.c_hits['h_sctj'], self.c_hits['h_sctn'],
            self.c_hits['h_gspd_an']
        ], self.model, self.cfg.hit_weights())
        ordered_match_maker = OrderedMatchMaker(self.model,
                                                self.cfg.redundancy_penalty())
        res = ordered_match_maker.match([c1])
        self.assertIsInstance(res, System)

        # the min_gene_required quorum is not reached
        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 4
        c1 = Cluster([
            self.c_hits['h_sctj'], self.c_hits['h_sctn_flg'],
            self.c_hits['h_gspd']
        ], self.model, self.cfg.hit_weights())
        ordered_match_maker = OrderedMatchMaker(self.model,
                                                self.cfg.redundancy_penalty())
        res = ordered_match_maker.match([c1])
        self.assertIsInstance(res, RejectedClusters)
        self.assertListEqual(
            res.reasons,
            ["The quorum of genes required (4) is not reached: 3"])

        # the min_gene_required quorum is not reached even there is a neutral
        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 4
        c1 = Cluster([
            self.c_hits['h_sctj'], self.c_hits['h_sctn_flg'],
            self.c_hits['h_gspd'], self.c_hits['h_toto']
        ], self.model, self.cfg.hit_weights())
        ordered_match_maker = OrderedMatchMaker(self.model,
                                                self.cfg.redundancy_penalty())
        res = ordered_match_maker.match([c1])
        self.assertIsInstance(res, RejectedClusters)
        self.assertEqual(
            res.reasons,
            ["The quorum of genes required (4) is not reached: 3"])

        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 4
        c1 = Cluster([
            self.c_hits['h_sctj'], self.c_hits['h_sctn_flg'],
            self.c_hits['h_gspd'], self.c_hits['h_toto_ho']
        ], self.model, self.cfg.hit_weights())
        ordered_match_maker = OrderedMatchMaker(self.model,
                                                self.cfg.redundancy_penalty())
        res = ordered_match_maker.match([c1])
        self.assertIsInstance(res, RejectedClusters)
        self.assertEqual(
            res.reasons,
            ["The quorum of genes required (4) is not reached: 3"])

        # the cluster contain a forbidden gene
        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 1
        c1 = Cluster([
            self.c_hits['h_sctj'], self.c_hits['h_sctn'],
            self.c_hits['h_gspd'], self.c_hits['h_abc']
        ], self.model, self.cfg.hit_weights())
        ordered_match_maker = OrderedMatchMaker(self.model,
                                                self.cfg.redundancy_penalty())
        res = ordered_match_maker.match([c1])
        self.assertIsInstance(res, RejectedClusters)
        self.assertEqual(res.reasons,
                         ["There is 1 forbidden genes occurrence(s): abc"])

        # the cluster contain a forbidden gene homolog
        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 1
        c1 = Cluster([
            self.c_hits['h_sctj'], self.c_hits['h_sctn'],
            self.c_hits['h_gspd'], self.c_hits['h_abc_ho']
        ], self.model, self.cfg.hit_weights())
        ordered_match_maker = OrderedMatchMaker(self.model,
                                                self.cfg.redundancy_penalty())
        res = ordered_match_maker.match([c1])
        self.assertIsInstance(res, RejectedClusters)
        self.assertEqual(res.reasons,
                         ["There is 1 forbidden genes occurrence(s): tadZ"])

        #####################
        # test multi loci   #
        #####################
        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 1
        c1 = Cluster([self.c_hits['h_sctj'], self.c_hits['h_sctn']],
                     self.model, self.cfg.hit_weights())
        c2 = Cluster([self.c_hits['h_gspd']], self.model,
                     self.cfg.hit_weights())
        ordered_match_maker = OrderedMatchMaker(self.model,
                                                self.cfg.redundancy_penalty())
        res = ordered_match_maker.match([c1, c2])
        self.assertIsInstance(res, System)

        # with one analog an one homolog
        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 1
        c1 = Cluster([self.c_hits['h_sctj_flg'], self.c_hits['h_sctn_flg']],
                     self.model, self.cfg.hit_weights())
        c2 = Cluster([self.c_hits['h_gspd_an']], self.model,
                     self.cfg.hit_weights())
        ordered_match_maker = OrderedMatchMaker(self.model,
                                                self.cfg.redundancy_penalty())
        res = ordered_match_maker.match([c1, c2])
        self.assertIsInstance(res, System)

        # with one analog an one homolog and one forbidden in 3 clusters
        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 1
        c1 = Cluster([self.c_hits['h_sctj_flg'], self.c_hits['h_sctn_flg']],
                     self.model, self.cfg.hit_weights())
        c2 = Cluster([self.c_hits['h_gspd']], self.model,
                     self.cfg.hit_weights())
        c3 = Cluster([self.c_hits['h_abc']], self.model,
                     self.cfg.hit_weights())
        ordered_match_maker = OrderedMatchMaker(self.model,
                                                self.cfg.redundancy_penalty())
        res = ordered_match_maker.match([c1, c2, c3])
        self.assertEqual(res.reasons,
                         ["There is 1 forbidden genes occurrence(s): abc"])

    def test_unordered_match(self):

        # it lack one mandatory gene
        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 3
        hits = [self.c_hits['h_sctj'], self.c_hits['h_gspd']]
        unordered_match_maker = UnorderedMatchMaker(self.model)
        res = unordered_match_maker.match(hits)
        self.assertIsInstance(res, UnlikelySystem)
        self.assertEqual(res.reasons, [
            "The quorum of mandatory genes required (2) is not reached: 1",
            "The quorum of genes required (3) is not reached: 2"
        ])

        # all quorum are reached
        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 1
        hits = [
            self.c_hits['h_sctj'], self.c_hits['h_sctn'], self.c_hits['h_gspd']
        ]
        unordered_match_maker = UnorderedMatchMaker(self.model)
        res = unordered_match_maker.match(hits)
        self.assertIsInstance(res, LikelySystem)

        # with one mandatory analog
        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 1
        hits = [
            self.c_hits['h_sctj_flg'], self.c_hits['h_sctn'],
            self.c_hits['h_gspd']
        ]
        unordered_match_maker = UnorderedMatchMaker(self.model)
        res = unordered_match_maker.match(hits)
        self.assertIsInstance(res, LikelySystem)

        # with one accessory analog
        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 1
        hits = [
            self.c_hits['h_sctj'], self.c_hits['h_sctn'],
            self.c_hits['h_gspd_an']
        ]
        unordered_match_maker = UnorderedMatchMaker(self.model)
        res = unordered_match_maker.match(hits)
        self.assertIsInstance(res, LikelySystem)

        # the min_gene_required quorum is not reached
        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 4
        hits = [
            self.c_hits['h_sctj'], self.c_hits['h_sctn_flg'],
            self.c_hits['h_gspd']
        ]
        unordered_match_maker = UnorderedMatchMaker(self.model)
        res = unordered_match_maker.match(hits)
        self.assertIsInstance(res, UnlikelySystem)
        self.assertEqual(
            res.reasons,
            ["The quorum of genes required (4) is not reached: 3"])

        # the min_gene_required quorum is not reached even there is a neutral
        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 4
        hits = [
            self.c_hits['h_sctj'], self.c_hits['h_sctn_flg'],
            self.c_hits['h_gspd'], self.c_hits['h_toto']
        ]
        unordered_match_maker = UnorderedMatchMaker(self.model)
        res = unordered_match_maker.match(hits)
        self.assertIsInstance(res, UnlikelySystem)
        self.assertEqual(
            res.reasons,
            ["The quorum of genes required (4) is not reached: 3"])

        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 4
        hits = [
            self.c_hits['h_sctj'], self.c_hits['h_sctn_flg'],
            self.c_hits['h_gspd'], self.c_hits['h_toto_ho']
        ]
        unordered_match_maker = UnorderedMatchMaker(self.model)
        res = unordered_match_maker.match(hits)
        self.assertIsInstance(res, UnlikelySystem)
        self.assertEqual(
            res.reasons,
            ["The quorum of genes required (4) is not reached: 3"])

        # the hits contain a forbidden gene
        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 1
        allowed_hits = [
            self.c_hits['h_sctj'], self.c_hits['h_sctn'], self.c_hits['h_gspd']
        ]
        forbidden_hits = [self.c_hits['h_abc']]
        unordered_match_maker = UnorderedMatchMaker(self.model)
        res = unordered_match_maker.match(allowed_hits + forbidden_hits)
        self.assertIsInstance(res, LikelySystem)
        self.assertListEqual([(h.id, h.position) for h in res.hits],
                             [(h.id, h.position)
                              for h in allowed_hits + forbidden_hits])
        self.assertListEqual(res._forbidden_hits, [self.c_hits['h_abc']])

        # the cluster contain a forbidden gene homolog
        self.model._min_mandatory_genes_required = 2
        self.model._min_genes_required = 1
        hits = [
            self.c_hits['h_sctj'], self.c_hits['h_sctn'],
            self.c_hits['h_gspd'], self.c_hits['h_abc_ho']
        ]

        unordered_match_maker = UnorderedMatchMaker(self.model)
        res = unordered_match_maker.match(hits)
        self.assertIsInstance(res, LikelySystem)
        self.assertListEqual(res._forbidden_hits, [self.c_hits['h_abc_ho']])
コード例 #6
0
class SerializationTest(MacsyTest):
    def setUp(self) -> None:
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        self.cfg = Config(MacsyDefaults(), args)

        self.model_name = 'foo'
        self.model_location = ModelLocation(
            path=os.path.join(args.models_dir, self.model_name))
        self.profile_factory = ProfileFactory(self.cfg)
        self.hit_weights = HitWeight(**self.cfg.hit_weights())
        # reset the uniq id number for AbstractUnordered
        # to have predictable results for (Likely/Unlikely)Systems
        System._id = itertools.count(1)
        AbstractUnordered._id = itertools.count(1)

    def test_SystemSerializer_str(self):
        model_name = 'foo'
        model_location = ModelLocation(
            path=os.path.join(self.cfg.models_dir()[0], model_name))
        model_A = Model("foo/A", 10)
        model_B = Model("foo/B", 10)

        c_gene_sctn_flg = CoreGene(model_location, "sctN_FLG",
                                   self.profile_factory)
        gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B)
        c_gene_sctj_flg = CoreGene(model_location, "sctJ_FLG",
                                   self.profile_factory)
        gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B)
        c_gene_flgB = CoreGene(model_location, "flgB", self.profile_factory)
        c_gene_tadZ = CoreGene(model_location, "tadZ", self.profile_factory)
        gene_tadZ = ModelGene(c_gene_tadZ, model_B)

        c_gene_sctn = CoreGene(model_location, "sctN", self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model_A)
        gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_hom)

        c_gene_sctj = CoreGene(model_location, "sctJ", self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model_A)
        gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj)
        gene_sctj.add_exchangeable(gene_sctj_an)

        c_gene_gspd = CoreGene(model_location, "gspD", self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model_A)
        gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd)
        gene_gspd.add_exchangeable(gene_gspd_an)

        c_gene_abc = CoreGene(model_location, "abc", self.profile_factory)
        gene_abc = ModelGene(c_gene_abc, model_A)
        gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc)
        gene_abc.add_exchangeable(gene_abc_ho)

        model_A.add_mandatory_gene(gene_sctn)
        model_A.add_mandatory_gene(gene_sctj)
        model_A.add_accessory_gene(gene_gspd)
        model_A.add_forbidden_gene(gene_abc)

        model_B.add_mandatory_gene(gene_sctn_flg)
        model_B.add_mandatory_gene(gene_sctj_flg)
        model_B.add_accessory_gene(gene_gspd)
        model_B.add_accessory_gene(gene_tadZ)

        h_sctj = CoreHit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0,
                         1.0, 1.0, 1.0, 10, 20)
        h_sctn = CoreHit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0,
                         1.0, 1.0, 1.0, 10, 20)
        h_gspd = CoreHit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0,
                         1.0, 1.0, 1.0, 10, 20)

        h_sctj_flg = CoreHit(c_gene_sctj_flg, "hit_sctj_flg", 803,
                             "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20)
        h_tadZ = CoreHit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 1, 1.0,
                         1.0, 1.0, 1.0, 10, 20)

        model_A._min_mandatory_genes_required = 2
        model_A._min_genes_required = 2
        c1 = Cluster([
            ModelHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ModelHit(h_sctn, gene_sctn, GeneStatus.MANDATORY),
            ModelHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
        ], model_A, self.hit_weights)

        c2 = Cluster([
            ModelHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ModelHit(h_sctn, gene_sctn, GeneStatus.MANDATORY)
        ], model_A, self.hit_weights)

        model_B._min_mandatory_genes_required = 1
        model_B._min_genes_required = 2
        c3 = Cluster([
            ModelHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY),
            ModelHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY),
            ModelHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
        ], model_B, self.hit_weights)

        sys_A = System(model_A, [c1, c2], self.cfg.redundancy_penalty())
        sys_A.id = "sys_id_A"
        sys_B = System(model_B, [c3], self.cfg.redundancy_penalty())
        sys_B.id = "sys_id_B"
        hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B])
        system_serializer = TxtSystemSerializer()

        sys_str = f"""system id = {sys_A.id}
model = foo/A
replicon = replicon_id
clusters = [('hit_sctj', 'sctJ', 1), ('hit_sctn', 'sctN', 1), ('hit_gspd', 'gspD', 1)], [('hit_sctj', 'sctJ', 1), ('hit_sctn', 'sctN', 1)]
occ = 2
wholeness = 1.000
loci nb = 2
score = 1.500

mandatory genes:
\t- sctN: 2 (sctN, sctN)
\t- sctJ: 2 (sctJ, sctJ)

accessory genes:
\t- gspD: 1 (gspD [sys_id_B])

neutral genes:
"""
        self.assertEqual(
            sys_str, system_serializer.serialize(sys_A, hit_multi_sys_tracker))

    def test_SystemSerializer_tsv(self):
        model = Model("foo/T2SS", 10)
        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        c_gene_sctn = CoreGene(self.model_location, "sctN",
                               self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model, loner=True)
        c_gene_sctn_flg = CoreGene(self.model_location, "sctN_FLG",
                                   self.profile_factory)
        gene_sctn_flg = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_flg)
        model.add_accessory_gene(gene_sctn)

        #CoreHit(gene, hit_id, hit_seq_length, replicon_name, position, i_eval, score,
        #         profile_coverage, sequence_coverage, begin_match, end_match
        #                                                           pos      score
        ch_gspd = CoreHit(c_gene_gspd, "h_gspd", 803, "replicon_id", 10, 1.0,
                          1.0, 1.0, 1.0, 10, 20)
        mh_gspd = ModelHit(ch_gspd,
                           gene_ref=gene_gspd,
                           gene_status=GeneStatus.MANDATORY)
        ch_sctj = CoreHit(c_gene_sctj, "h_sctj", 803, "replicon_id", 20, 1.0,
                          1.0, 1.0, 1.0, 20, 30)
        mh_sctj = ModelHit(ch_sctj,
                           gene_ref=gene_sctj,
                           gene_status=GeneStatus.ACCESSORY)

        ch_sctn_flg = CoreHit(c_gene_sctn_flg, "h_sctn_flg", 803,
                              "replicon_id", 40, 1.0, 1.0, 1.0, 1.0, 30, 40)
        mh_sctn_flg = ModelHit(ch_sctn_flg,
                               gene_ref=gene_sctn_flg,
                               gene_status=GeneStatus.ACCESSORY)
        ch_sctn = CoreHit(c_gene_sctn, "h_sctn", 803, "replicon_id", 80, 1.0,
                          1.0, 1.0, 1.0, 30, 40)
        mh_sctn = Loner(ch_sctn,
                        gene_ref=gene_sctn,
                        gene_status=GeneStatus.ACCESSORY,
                        counterpart=[mh_sctn_flg])

        c1 = Cluster([mh_gspd, mh_sctj], model, self.hit_weights)
        c2 = Cluster([mh_sctn], model, self.hit_weights)
        sys_multi_loci = System(model, [c1, c2], self.cfg.redundancy_penalty())
        # score                         1.5 .35 = 1.85
        hit_multi_sys_tracker = HitSystemTracker([sys_multi_loci])
        system_serializer = TsvSystemSerializer()

        sys_tsv = "\t".join([
            "replicon_id", "h_gspd", "gspD", "10", "foo/T2SS",
            sys_multi_loci.id, "1", "1", "1.000", "1.850", "1", "gspD",
            "mandatory", "803", "1.0", "1.000", "1.000", "1.000", "10", "20",
            "", ""
        ])
        sys_tsv += "\n"
        sys_tsv += "\t".join([
            "replicon_id", "h_sctj", "sctJ", "20", "foo/T2SS",
            sys_multi_loci.id, "1", "1", "1.000", "1.850", "1", "sctJ",
            "accessory", "803", "1.0", "1.000", "1.000", "1.000", "20", "30",
            "", ""
        ])
        sys_tsv += "\n"
        sys_tsv += "\t".join([
            "replicon_id", "h_sctn", "sctN", "80", "foo/T2SS",
            sys_multi_loci.id, "1", "-1", "1.000", "1.850", "1", "sctN",
            "accessory", "803", "1.0", "1.000", "1.000", "1.000", "30", "40",
            "h_sctn_flg", ""
        ])
        sys_tsv += "\n"
        self.maxDiff = None
        self.assertEqual(
            sys_tsv,
            system_serializer.serialize(sys_multi_loci, hit_multi_sys_tracker))

    def test_SolutionSerializer_tsv(self):
        model_name = 'foo'
        model_location = ModelLocation(
            path=os.path.join(self.cfg.models_dir()[0], model_name))

        ###########
        # Model B #
        ###########
        model_B = Model("foo/B", 10)
        c_gene_sctn_flg = CoreGene(model_location, "sctN_FLG",
                                   self.profile_factory)
        gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B)
        c_gene_sctj_flg = CoreGene(model_location, "sctJ_FLG",
                                   self.profile_factory)
        gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B)
        c_gene_flgB = CoreGene(model_location, "flgB", self.profile_factory)
        gene_flgB = ModelGene(c_gene_flgB, model_B)
        c_gene_tadZ = CoreGene(model_location, "tadZ", self.profile_factory)
        gene_tadZ = ModelGene(c_gene_tadZ, model_B)

        model_B.add_mandatory_gene(gene_sctn_flg)
        model_B.add_mandatory_gene(gene_sctj_flg)
        model_B.add_accessory_gene(gene_flgB)
        model_B.add_accessory_gene(gene_tadZ)

        ###########
        # Model A #
        ###########
        model_A = Model("foo/A", 10)
        c_gene_sctn = CoreGene(model_location, "sctN", self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model_A)
        gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_hom)

        c_gene_sctj = CoreGene(model_location, "sctJ", self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model_A)
        gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj)
        gene_sctj.add_exchangeable(gene_sctj_an)

        c_gene_gspd = CoreGene(model_location, "gspD", self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model_A)
        gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd)
        gene_gspd.add_exchangeable(gene_gspd_an)

        c_gene_abc = CoreGene(model_location, "abc", self.profile_factory)
        gene_abc = ModelGene(c_gene_abc, model_A, loner=True)
        gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc)
        gene_abc.add_exchangeable(gene_abc_ho)

        model_A.add_mandatory_gene(gene_sctn)
        model_A.add_mandatory_gene(gene_sctj)
        model_A.add_accessory_gene(gene_gspd)
        model_A.add_accessory_gene(gene_abc)

        #       CoreHit(gene, hit_id, hit_seq_length, replicon_name, position, i_eval, score,
        #         profile_coverage, sequence_coverage, begin_match, end_match
        #                                                           pos      score
        h_sctj = CoreHit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0,
                         1.0, 1.0, 1.0, 10, 20)
        mh_sctj = ModelHit(h_sctj, gene_sctj, GeneStatus.MANDATORY)
        h_sctn = CoreHit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 2, 1.0,
                         1.0, 1.0, 1.0, 10, 20)
        mh_sctn = ModelHit(h_sctn, gene_sctn, GeneStatus.MANDATORY)
        h_gspd = CoreHit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 3, 1.0,
                         1.0, 1.0, 1.0, 10, 20)
        mh_gspd = ModelHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)

        h_sctj_flg = CoreHit(c_gene_sctj_flg, "hit_sctj_flg", 803,
                             "replicon_id", 10, 1.0, 1.0, 1.0, 1.0, 10, 20)
        h_flgB = CoreHit(c_gene_flgB, "hit_flgB", 803, "replicon_id", 11, 1.0,
                         1.0, 1.0, 1.0, 10, 20)
        h_abc = CoreHit(c_gene_abc, "hit_abc", 803, "replicon_id", 20, 1.0,
                        1.0, 1.0, 1.0, 10, 20)
        h_abc2 = CoreHit(c_gene_abc, "hit_abc2", 803, "replicon_id", 50, 1.0,
                         1.0, 1.0, 1.0, 10, 20)
        h_tadZ = CoreHit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 40, 1.0,
                         1.0, 1.0, 1.0, 10, 20)
        mh_sctj_flg = ModelHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY)
        mh_flgB = ModelHit(h_flgB, gene_flgB, GeneStatus.ACCESSORY)
        mh_abc = ModelHit(h_abc, gene_abc, GeneStatus.ACCESSORY)
        mh_abc2 = ModelHit(h_abc2, gene_abc, GeneStatus.ACCESSORY)
        mh_tadZ = ModelHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY)

        model_A._min_mandatory_genes_required = 2
        model_A._min_genes_required = 2
        c1 = Cluster([mh_sctj, mh_sctn, mh_gspd], model_A, self.hit_weights)
        c2 = Cluster([mh_sctj, mh_sctn], model_A, self.hit_weights)
        c3 = Cluster([
            Loner(h_abc,
                  gene_ref=gene_abc,
                  gene_status=GeneStatus.ACCESSORY,
                  counterpart=[mh_abc2])
        ], model_A, self.hit_weights)

        model_B._min_mandatory_genes_required = 1
        model_B._min_genes_required = 2
        c5 = Cluster([mh_sctj_flg, mh_tadZ, mh_flgB], model_B,
                     self.hit_weights)

        sys_A = System(model_A, [c1, c2, c3], self.cfg.redundancy_penalty())
        # score =               2.5, 2 , 0.35 = 4.85 - (2 * 1.5) = 1.85

        sys_A.id = "sys_id_A"
        sys_B = System(model_B, [c5], self.cfg.redundancy_penalty())
        # score =                2.0
        sys_B.id = "sys_id_B"

        sol = Solution([sys_A, sys_B])
        sol_id = '12'

        hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B])
        sol_serializer = TsvSolutionSerializer()

        sol_tsv = '\t'.join([
            sol_id, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1', '1.000', '1.850', '2', 'sctJ', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctn', 'sctN', '2', 'foo/A',
            'sys_id_A', '2', '1', '1.000', '1.850', '2', 'sctN', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_gspd', 'gspD', '3', 'foo/A',
            'sys_id_A', '2', '1', '1.000', '1.850', '2', 'gspD', 'accessory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '2', '1.000', '1.850', '2', 'sctJ', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctn', 'sctN', '2', 'foo/A',
            'sys_id_A', '2', '2', '1.000', '1.850', '2', 'sctN', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_abc', 'abc', '20', 'foo/A', 'sys_id_A',
            '2', '-1', '1.000', '1.850', '2', 'abc', 'accessory', '803', '1.0',
            '1.000', '1.000', '1.000', '10', '20', 'hit_abc2', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '10', 'foo/B',
            'sys_id_B', '1', '1', '0.750', '2.000', '1', 'sctJ_FLG',
            'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20',
            '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_flgB', 'flgB', '11', 'foo/B',
            'sys_id_B', '1', '1', '0.750', '2.000', '1', 'flgB', 'accessory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_tadZ', 'tadZ', '40', 'foo/B',
            'sys_id_B', '1', '1', '0.750', '2.000', '1', 'tadZ', 'accessory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        ser = sol_serializer.serialize(sol, sol_id, hit_multi_sys_tracker)
        self.maxDiff = None
        self.assertEqual(ser, sol_tsv)

    def test_LikelySystemSerializer_txt(self):
        model = Model("foo/FOO", 10)
        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        c_gene_sctn = CoreGene(self.model_location, "sctN",
                               self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model)
        model.add_accessory_gene(gene_sctn)
        c_gene_abc = CoreGene(self.model_location, "abc", self.profile_factory)
        gene_abc = ModelGene(c_gene_abc, model)
        model.add_forbidden_gene(gene_abc)

        hit_1 = CoreHit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0,
                        1.0, 1.0, 10, 20)
        v_hit_1 = ModelHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = CoreHit(c_gene_sctj, "hit_2", 803, "replicon_id", 2, 1.0, 1.0,
                        1.0, 1.0, 10, 20)
        v_hit_2 = ModelHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        hit_3 = CoreHit(c_gene_sctn, "hit_3", 803, "replicon_id", 3, 1.0, 1.0,
                        1.0, 1.0, 10, 20)
        v_hit_3 = ModelHit(hit_3, gene_sctn, GeneStatus.ACCESSORY)
        hit_4 = CoreHit(c_gene_abc, "hit_4", 803, "replicon_id", 4, 1.0, 1.0,
                        1.0, 1.0, 10, 20)
        v_hit_4 = ModelHit(hit_4, gene_abc, GeneStatus.FORBIDDEN)

        ls_1 = LikelySystem(model, [v_hit_1], [v_hit_2, v_hit_3], [],
                            [v_hit_4])
        hit_multi_sys_tracker = HitSystemTracker([ls_1])
        ser = TxtLikelySystemSerializer()

        txt = ser.serialize(ls_1, hit_multi_sys_tracker)
        expected_txt = """This replicon contains genetic materials needed for system foo/FOO
WARNING there quorum is reached but there is also some forbidden genes.

system id = replicon_id_FOO_1
model = foo/FOO
replicon = replicon_id
hits = [('hit_1', 'gspD', 1), ('hit_2', 'sctJ', 2), ('hit_3', 'sctN', 3), ('hit_4', 'abc', 4)]
wholeness = 1.000

mandatory genes:
\t- gspD: 1 (gspD)

accessory genes:
\t- sctJ: 1 (sctJ)
\t- sctN: 1 (sctN)

neutral genes:

forbidden genes:
\t- abc: 1 (abc)

Use ordered replicon to have better prediction.
"""
        self.assertEqual(txt, expected_txt)

    def test_UnlikelySystemSerializer_txt(self):
        model = Model("foo/FOO", 10)
        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        c_gene_sctn = CoreGene(self.model_location, "sctN",
                               self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model)
        model.add_accessory_gene(gene_sctn)
        c_gene_abc = CoreGene(self.model_location, "abc", self.profile_factory)
        gene_abc = ModelGene(c_gene_abc, model)
        model.add_forbidden_gene(gene_abc)

        hit_1 = CoreHit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0,
                        1.0, 1.0, 10, 20)
        v_hit_1 = ModelHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = CoreHit(c_gene_sctj, "hit_2", 803, "replicon_id", 2, 1.0, 1.0,
                        1.0, 1.0, 10, 20)
        v_hit_2 = ModelHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        hit_3 = CoreHit(c_gene_sctn, "hit_3", 803, "replicon_id", 3, 1.0, 1.0,
                        1.0, 1.0, 10, 20)
        v_hit_3 = ModelHit(hit_3, gene_sctn, GeneStatus.ACCESSORY)
        hit_4 = CoreHit(c_gene_abc, "hit_4", 803, "replicon_id", 4, 1.0, 1.0,
                        1.0, 1.0, 10, 20)
        v_hit_4 = ModelHit(hit_4, gene_abc, GeneStatus.FORBIDDEN)
        ser = TxtUnikelySystemSerializer()

        ls_1 = UnlikelySystem(model, [v_hit_1], [v_hit_2, v_hit_3], [],
                              [v_hit_4], ["the reason why"])
        txt = ser.serialize(ls_1)
        expected_txt = """This replicon probably not contains a system foo/FOO:
the reason why

system id = replicon_id_FOO_1
model = foo/FOO
replicon = replicon_id
hits = [('hit_1', 'gspD', 1), ('hit_2', 'sctJ', 2), ('hit_3', 'sctN', 3), ('hit_4', 'abc', 4)]
wholeness = 1.000

mandatory genes:
\t- gspD: 1 (gspD)

accessory genes:
\t- sctJ: 1 (sctJ)
\t- sctN: 1 (sctN)

neutral genes:

forbidden genes:
\t- abc: 1 (abc)

Use ordered replicon to have better prediction.
"""
        self.assertEqual(txt, expected_txt)

    def test_SpecialHitSerializer_tsv(self):
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        cfg = Config(MacsyDefaults(), args)

        model_name = 'foo'
        models_location = ModelLocation(
            path=os.path.join(args.models_dir, model_name))

        # we need to reset the ProfileFactory
        # because it's a like a singleton
        # so other tests are influenced by ProfileFactory and it's configuration
        # for instance search_genes get profile without hmmer_exe
        profile_factory = ProfileFactory(cfg)
        model = Model("foo/T2SS", 10)

        gene_name = "gspD"
        cg_gspd = CoreGene(models_location, gene_name, profile_factory)
        mg_gspd = ModelGene(cg_gspd, model, loner=True)

        gene_name = "sctJ"
        cg_sctj = CoreGene(models_location, gene_name, profile_factory)
        mg_sctj = ModelGene(cg_sctj, model)

        gene_name = "abc"
        cg_abc = CoreGene(models_location, gene_name, profile_factory)
        mg_abc = ModelGene(cg_abc, model)

        model.add_mandatory_gene(mg_gspd)
        model.add_accessory_gene(mg_sctj)
        model.add_accessory_gene(mg_abc)

        chit_abc = CoreHit(cg_abc, "hit_abc", 803, "replicon_id", 3, 1.0, 1.0,
                           1.0, 1.0, 10, 20)
        chit_sctj = CoreHit(cg_sctj, "hit_sctj", 803, "replicon_id", 4, 1.0,
                            1.0, 1.0, 1.0, 10, 20)
        chit_gspd1 = CoreHit(cg_gspd, "hit_gspd1", 803, "replicon_id", 20, 1.0,
                             2.0, 1.0, 1.0, 10, 20)
        chit_gspd2 = CoreHit(cg_gspd, "hit_gspd2", 803, "replicon_id", 30, 1.0,
                             3.0, 1.0, 1.0, 10, 20)
        mhit_abc = ModelHit(chit_abc, mg_abc, GeneStatus.ACCESSORY)
        mhit_sctj = ModelHit(chit_sctj, mg_sctj, GeneStatus.ACCESSORY)
        mhit_gspd1 = ModelHit(chit_gspd1, mg_gspd, GeneStatus.MANDATORY)
        mhit_gspd2 = ModelHit(chit_gspd2, mg_gspd, GeneStatus.MANDATORY)
        l_gspd1 = Loner(mhit_gspd1, counterpart=[mhit_gspd2])
        l_gspd2 = Loner(mhit_gspd2, counterpart=[mhit_gspd1])
        ser = TsvSpecialHitSerializer()
        txt = ser.serialize([l_gspd1, l_gspd2])

        expected_txt = "\t".join([
            'replicon', 'model_fqn', 'function', 'gene_name', 'hit_id',
            'hit_pos', 'hit_status', 'hit_seq_len', 'hit_i_eval', 'hit_score',
            'hit_profile_cov', 'hit_seq_cov', 'hit_begin_match',
            'hit_end_match'
        ])
        expected_txt += "\n"
        expected_txt += "\t".join([
            'replicon_id', 'foo/T2SS', 'gspD', 'gspD', 'hit_gspd1', '20',
            'mandatory', '803', '1.000e+00', '2.000', '1.000', '1.000', '10',
            '20'
        ])
        expected_txt += "\n"
        expected_txt += "\t".join([
            'replicon_id', 'foo/T2SS', 'gspD', 'gspD', 'hit_gspd2', '30',
            'mandatory', '803', '1.000e+00', '3.000', '1.000', '1.000', '10',
            '20'
        ])
        expected_txt += "\n"
        self.maxDiff = None
        self.assertEqual(txt, expected_txt)