Ejemplo n.º 1
0
    def test_fulfilled_function(self):
        model = Model("foo/T2SS", 11)

        c_gene_1 = CoreGene(self.model_location, "gspD", self.profile_factory)
        c_gene_2 = CoreGene(self.model_location, "sctC", self.profile_factory)
        c_gene_3 = CoreGene(self.model_location, "sctJ", self.profile_factory)
        c_gene_4 = CoreGene(self.model_location, "sctJ_FLG", self.profile_factory)

        gene_1 = ModelGene(c_gene_1, model)
        gene_2 = ModelGene(c_gene_2, model)
        gene_3 = ModelGene(c_gene_3, model)
        gene_4 = Exchangeable(c_gene_4, gene_3)
        gene_3.add_exchangeable(gene_4)

        #     Hit(gene, model, hit_id, hit_seq_length, replicon_name, position, i_eval, score,
        #         profile_coverage, sequence_coverage, begin_match, end_match
        h10 = Hit(c_gene_1, "h10", 10, "replicon_1", 10, 1.0, 10.0, 1.0, 1.0, 10, 20)
        v_h10 = ValidHit(h10, gene_1, GeneStatus.MANDATORY)
        h20 = Hit(c_gene_2, "h20", 10, "replicon_1", 20, 1.0, 20.0, 1.0, 1.0, 10, 20)
        v_h20 = ValidHit(h20, gene_2, GeneStatus.MANDATORY)

        c = Cluster([v_h10, v_h20], model, self.hit_weights)

        self.assertTrue(c.fulfilled_function(gene_1))
        self.assertFalse(c.fulfilled_function(gene_3))

        h50 = Hit(c_gene_4, "h50", 10, "replicon_1", 50, 1.0, 50.0, 1.0, 1.0, 10, 20)
        v_h50 = ValidHit(h50, gene_4, GeneStatus.ACCESSORY)
        c = Cluster([v_h10, v_h50], model, self.hit_weights)
        self.assertTrue(c.fulfilled_function(gene_3))
Ejemplo n.º 2
0
    def test_str(self):
        """
        """
        model_foo = Model("foo", 10)

        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ_FLG = ModelGene(c_gene, model_foo)

        gene_name = 'sctJ'
        c_sctJ = CoreGene(self.model_location, gene_name, self.profile_factory)
        homolog = Exchangeable(c_sctJ, sctJ_FLG)
        sctJ_FLG.add_exchangeable(homolog)

        gene_name = 'sctN'
        c_sctN = CoreGene(self.model_location, gene_name, self.profile_factory)
        analog = Exchangeable(c_sctN, sctJ_FLG)
        sctJ_FLG.add_exchangeable(analog)
        s = """name : sctJ_FLG
inter_gene_max_space: 10
    exchangeables: sctJ, sctN"""
        self.assertEqual(str(sctJ_FLG), s)

        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ_FLG = ModelGene(c_gene,
                             model_foo,
                             loner=True,
                             multi_system=True,
                             inter_gene_max_space=10)
        s = """name : sctJ_FLG
inter_gene_max_space: 10
loner
multi_system"""
        self.assertEqual(str(sctJ_FLG), s)
Ejemplo n.º 3
0
    def test_SystemSerializer_tsv(self):
        model = Model("foo/T2SS", 10)
        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        c_gene_sctn = CoreGene(self.model_location, "sctN",
                               self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model)
        c_gene_sctn_flg = CoreGene(self.model_location, "sctN_FLG",
                                   self.profile_factory)
        gene_sctn_flg = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_flg)
        model.add_accessory_gene(gene_sctn)

        h_gspd = Hit(c_gene_gspd, "h_gspd", 803, "replicon_id", 10, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        v_h_gspd = ValidHit(h_gspd, gene_gspd, GeneStatus.MANDATORY)
        h_sctj = Hit(c_gene_sctj, "h_sctj", 803, "replicon_id", 20, 1.0, 1.0,
                     1.0, 1.0, 20, 30)
        v_h_sctj = ValidHit(h_sctj, gene_sctj, GeneStatus.ACCESSORY)
        h_sctn_flg = Hit(c_gene_sctn_flg, "h_sctn_flg", 803, "replicon_id", 30,
                         1.0, 1.0, 1.0, 1.0, 30, 40)
        v_h_sctn_flg = ValidHit(h_sctn_flg, gene_sctn_flg,
                                GeneStatus.ACCESSORY)
        c1 = Cluster([v_h_gspd, v_h_sctj], model, self.hit_weights)
        c2 = Cluster([v_h_sctn_flg], model, self.hit_weights)
        sys_multi_loci = System(model, [c1, c2], self.cfg.redundancy_penalty())
        hit_multi_sys_tracker = HitSystemTracker([sys_multi_loci])
        system_serializer = TsvSystemSerializer()

        sys_tsv = "\t".join([
            "replicon_id", "h_gspd", "gspD", "10", "foo/T2SS",
            sys_multi_loci.id, "1", "1.000", "1.900", "1", "gspD", "mandatory",
            "803", "1.0", "1.000", "1.000", "1.000", "10", "20", ""
        ])
        sys_tsv += "\n"
        sys_tsv += "\t".join([
            "replicon_id", "h_sctj", "sctJ", "20", "foo/T2SS",
            sys_multi_loci.id, "1", "1.000", "1.900", "1", "sctJ", "accessory",
            "803", "1.0", "1.000", "1.000", "1.000", "20", "30", ""
        ])
        sys_tsv += "\n"
        sys_tsv += "\t".join([
            "replicon_id", "h_sctn_flg", "sctN_FLG", "30", "foo/T2SS",
            sys_multi_loci.id, "1", "1.000", "1.900", "1", "sctN", "accessory",
            "803", "1.0", "1.000", "1.000", "1.000", "30", "40", ""
        ])
        sys_tsv += "\n"
        self.assertEqual(
            sys_tsv,
            system_serializer.serialize(sys_multi_loci, hit_multi_sys_tracker))
Ejemplo n.º 4
0
    def setUp(self) -> None:
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        cfg = Config(MacsyDefaults(), args)

        model_name = 'foo'
        self.models_location = ModelLocation(path=os.path.join(args.models_dir, model_name))

        # we need to reset the ProfileFactory
        # because it's a like a singleton
        # so other tests are influenced by ProfileFactory and it's configuration
        # for instance search_genes get profile without hmmer_exe
        profile_factory = ProfileFactory(cfg)

        model = Model(model_name, 10)
        self.profile_factory = ProfileFactory(cfg)

        gene_name = "gspD"
        c_gene_gspd = CoreGene(self.models_location, gene_name, self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model, multi_system=True)

        gene_name = "sctJ"
        c_gene_sctj = CoreGene(self.models_location, gene_name, self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model, multi_system=True)

        gene_name = "sctN"
        c_gene_sctn = CoreGene(self.models_location, gene_name, self.profile_factory)
        gene_sctn = Exchangeable(c_gene_sctn, gene_sctj)
        gene_sctj.add_exchangeable(gene_sctn)

        model.add_mandatory_gene(gene_gspd)
        model.add_accessory_gene(gene_sctj)

        #        CoreHit(gene, hit_id, hit_seq_length, replicon_name, position, i_eval, score,
        #                       profile_coverage, sequence_coverage, begin_match, end_match
        #                                                        pos      score
        chit_1 = CoreHit(c_gene_gspd, "hit_1", 803, "replicon_id", 2, 1.0, 1.0, 1.0, 1.0, 10, 20)
        chit_2 = CoreHit(c_gene_sctj, "hit_2", 803, "replicon_id", 3, 1.0, 1.0, 1.0, 1.0, 10, 20)
        chit_3 = CoreHit(c_gene_gspd, "hit_3", 803, "replicon_id", 10, 1.0, 3.0, 1.0, 1.0, 10, 20)
        chit_4 = CoreHit(c_gene_sctn, "hit_4", 803, "replicon_id", 14, 1.0, 4.0, 1.0, 1.0, 10, 20)
        chit_5 = CoreHit(c_gene_gspd, "hit_5", 803, "replicon_id", 20, 1.0, 2.0, 1.0, 1.0, 10, 20)

        self.mhit_1 = ModelHit(chit_1, gene_gspd, GeneStatus.MANDATORY)
        self.mhit_2 = ModelHit(chit_2, gene_sctj, GeneStatus.ACCESSORY)
        self.mhit_3 = ModelHit(chit_3, gene_gspd, GeneStatus.MANDATORY)
        self.mhit_4 = ModelHit(chit_4, gene_sctn, GeneStatus.ACCESSORY)
        self.mhit_5 = ModelHit(chit_5, gene_gspd, GeneStatus.MANDATORY)

        self.ms_1 = MultiSystem(chit_1, gene_ref=gene_gspd, gene_status=GeneStatus.MANDATORY)
        self.ms_2 = MultiSystem(chit_2, gene_ref=gene_sctj, gene_status=GeneStatus.ACCESSORY)
        self.ms_3 = MultiSystem(chit_3, gene_ref=gene_gspd, gene_status=GeneStatus.MANDATORY)
        self.ms_4 = MultiSystem(chit_4, gene_ref=gene_sctn, gene_status=GeneStatus.ACCESSORY)
        self.ms_5 = MultiSystem(chit_5, gene_ref=gene_gspd, gene_status=GeneStatus.MANDATORY)
Ejemplo n.º 5
0
    def test_alternate_of(self):
        model_foo = Model("foo", 10)

        gene_name = 'sctJ'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctj = ModelGene(c_gene, model_foo)

        gene_name = 'sctJ_FLG'
        c_sctj_flg = CoreGene(self.model_location, gene_name, self.profile_factory)
        analog = Exchangeable(c_sctj_flg, sctj)
        sctj.add_exchangeable(analog)
        self.assertEqual(sctj.alternate_of(), sctj)
Ejemplo n.º 6
0
    def test_add_exchangeable(self):
        model_foo = Model("foo", 10)
        gene_name = 'sctJ'
        c_gene_ref = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene_ref = ModelGene(c_gene_ref,  model_foo)

        h_gene_name = 'sctJ_FLG'
        h_c_gene = CoreGene(self.model_location, h_gene_name, self.profile_factory)

        homolog = Exchangeable(h_c_gene, gene_ref)
        gene_ref.add_exchangeable(homolog)
        self.assertEqual(len(gene_ref.exchangeables), 1)
        self.assertEqual(gene_ref.exchangeables[0], homolog)
Ejemplo n.º 7
0
    def test_alternate_of(self):
        model = Model("T2SS", 10)

        gene_name = 'sctJ_FLG'
        c_gene_ref = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene_ref = ModelGene(c_gene_ref, model)

        gene_name = 'sctJ'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        homolog_1 = Exchangeable(c_gene, gene_ref)
        gene_ref.add_exchangeable(homolog_1)

        self.assertEqual(homolog_1.alternate_of(), gene_ref)
Ejemplo n.º 8
0
    def test_exhangeables(self):
        model_foo = Model("foo", 10)

        gene_name = 'sctN'
        c_sctn = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctn = ModelGene(c_sctn, model_foo)

        gene_name = 'sctJ_FLG'
        c_sctJ_FLG = CoreGene(self.model_location, gene_name, self.profile_factory)

        gene_name = 'sctJ'
        c_sctJ = CoreGene(self.model_location, gene_name, self.profile_factory)

        homolog_1 = Exchangeable(c_sctJ, sctn)
        sctn.add_exchangeable(homolog_1)
        homolog_2 = Exchangeable(c_sctJ_FLG, sctn)
        sctn.add_exchangeable(homolog_2)
        self.assertEqual(sctn.exchangeables, [homolog_1, homolog_2])
Ejemplo n.º 9
0
    def test_str(self):
        model_fqn = "foo/bar"
        model = Model(model_fqn, 10)
        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        mandatory_gene = ModelGene(c_gene, model)
        model.add_mandatory_gene(mandatory_gene)
        homolog_name = 'sctJ'
        c_gene_homolg = CoreGene(self.model_location, homolog_name, self.profile_factory)
        homolog = Exchangeable(c_gene_homolg, mandatory_gene)
        mandatory_gene.add_exchangeable(homolog)

        gene_name = 'sctN_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        accessory_gene = ModelGene(c_gene, model)
        model.add_accessory_gene(accessory_gene)
        analog_name = 'sctN'
        c_gene_analog = CoreGene(self.model_location, analog_name, self.profile_factory)
        analog = Exchangeable(c_gene_analog, accessory_gene)
        accessory_gene.add_exchangeable(analog)

        gene_name = 'toto'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        neutral_gene = ModelGene(c_gene, model)
        model.add_neutral_gene(neutral_gene)

        gene_name = 'sctC'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        forbidden_gene = ModelGene(c_gene, model)
        model.add_forbidden_gene(forbidden_gene)

        exp_str = """name: bar
fqn: foo/bar
==== mandatory genes ====
sctJ_FLG
==== accessory genes ====
sctN_FLG
==== neutral genes ====
toto
==== forbidden genes ====
sctC
============== end pprint model ================
"""
        self.assertEqual(str(model), exp_str)
Ejemplo n.º 10
0
    def test_is_exchangeable(self):
        model_foo = Model("foo", 10)

        gene_name = 'sctN'
        c_sctn = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctn = ModelGene(c_sctn, model_foo)

        gene_name = 'sctJ_FLG'
        c_sctj_flg = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctj_flg = ModelGene(c_sctj_flg, model_foo)

        gene_name = 'sctJ'
        c_sctj = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctj = ModelGene(c_sctj, model_foo)
        homolog = Exchangeable(c_sctj_flg, sctj)
        sctj.add_exchangeable(homolog)

        self.assertFalse(sctj_flg.is_exchangeable)
        self.assertFalse(sctj.is_exchangeable)
        self.assertTrue(homolog.is_exchangeable)
        self.assertFalse(sctn.is_exchangeable)
Ejemplo n.º 11
0
    def test_get_gene(self):
        model = Model("foo", 10)
        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)
        for meth in [getattr(model, f'add_{cat}_gene') for cat in model.gene_category]:
            for cat in model.gene_category:
                setattr(model, f'_{cat}_genes', [])
            meth(gene)
            self.assertEqual(gene, model.get_gene(gene_name))

        self.assertRaises(KeyError, model.get_gene, 'bar')

        homolog_name = 'sctJ'
        c_gene_homolog = CoreGene(self.model_location, homolog_name, self.profile_factory)
        homolog = Exchangeable(c_gene_homolog, gene)
        gene.add_exchangeable(homolog)
        for meth in [getattr(model, f'add_{cat}_gene') for cat in model.gene_category]:
            for cat in model.gene_category:
                setattr(model, f'_{cat}_genes', [])
            meth(gene)
            self.assertEqual(homolog, model.get_gene(homolog_name))
Ejemplo n.º 12
0
    def test_SystemSerializer_tsv(self):
        model = Model("foo/T2SS", 10)
        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        c_gene_sctn = CoreGene(self.model_location, "sctN",
                               self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model, loner=True)
        c_gene_sctn_flg = CoreGene(self.model_location, "sctN_FLG",
                                   self.profile_factory)
        gene_sctn_flg = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_flg)
        model.add_accessory_gene(gene_sctn)

        #CoreHit(gene, hit_id, hit_seq_length, replicon_name, position, i_eval, score,
        #         profile_coverage, sequence_coverage, begin_match, end_match
        #                                                           pos      score
        ch_gspd = CoreHit(c_gene_gspd, "h_gspd", 803, "replicon_id", 10, 1.0,
                          1.0, 1.0, 1.0, 10, 20)
        mh_gspd = ModelHit(ch_gspd,
                           gene_ref=gene_gspd,
                           gene_status=GeneStatus.MANDATORY)
        ch_sctj = CoreHit(c_gene_sctj, "h_sctj", 803, "replicon_id", 20, 1.0,
                          1.0, 1.0, 1.0, 20, 30)
        mh_sctj = ModelHit(ch_sctj,
                           gene_ref=gene_sctj,
                           gene_status=GeneStatus.ACCESSORY)

        ch_sctn_flg = CoreHit(c_gene_sctn_flg, "h_sctn_flg", 803,
                              "replicon_id", 40, 1.0, 1.0, 1.0, 1.0, 30, 40)
        mh_sctn_flg = ModelHit(ch_sctn_flg,
                               gene_ref=gene_sctn_flg,
                               gene_status=GeneStatus.ACCESSORY)
        ch_sctn = CoreHit(c_gene_sctn, "h_sctn", 803, "replicon_id", 80, 1.0,
                          1.0, 1.0, 1.0, 30, 40)
        mh_sctn = Loner(ch_sctn,
                        gene_ref=gene_sctn,
                        gene_status=GeneStatus.ACCESSORY,
                        counterpart=[mh_sctn_flg])

        c1 = Cluster([mh_gspd, mh_sctj], model, self.hit_weights)
        c2 = Cluster([mh_sctn], model, self.hit_weights)
        sys_multi_loci = System(model, [c1, c2], self.cfg.redundancy_penalty())
        # score                         1.5 .35 = 1.85
        hit_multi_sys_tracker = HitSystemTracker([sys_multi_loci])
        system_serializer = TsvSystemSerializer()

        sys_tsv = "\t".join([
            "replicon_id", "h_gspd", "gspD", "10", "foo/T2SS",
            sys_multi_loci.id, "1", "1", "1.000", "1.850", "1", "gspD",
            "mandatory", "803", "1.0", "1.000", "1.000", "1.000", "10", "20",
            "", ""
        ])
        sys_tsv += "\n"
        sys_tsv += "\t".join([
            "replicon_id", "h_sctj", "sctJ", "20", "foo/T2SS",
            sys_multi_loci.id, "1", "1", "1.000", "1.850", "1", "sctJ",
            "accessory", "803", "1.0", "1.000", "1.000", "1.000", "20", "30",
            "", ""
        ])
        sys_tsv += "\n"
        sys_tsv += "\t".join([
            "replicon_id", "h_sctn", "sctN", "80", "foo/T2SS",
            sys_multi_loci.id, "1", "-1", "1.000", "1.850", "1", "sctN",
            "accessory", "803", "1.0", "1.000", "1.000", "1.000", "30", "40",
            "h_sctn_flg", ""
        ])
        sys_tsv += "\n"
        self.maxDiff = None
        self.assertEqual(
            sys_tsv,
            system_serializer.serialize(sys_multi_loci, hit_multi_sys_tracker))
Ejemplo n.º 13
0
    def test_SolutionSerializer_tsv(self):
        model_name = 'foo'
        model_location = ModelLocation(
            path=os.path.join(self.cfg.models_dir()[0], model_name))

        ###########
        # Model B #
        ###########
        model_B = Model("foo/B", 10)
        c_gene_sctn_flg = CoreGene(model_location, "sctN_FLG",
                                   self.profile_factory)
        gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B)
        c_gene_sctj_flg = CoreGene(model_location, "sctJ_FLG",
                                   self.profile_factory)
        gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B)
        c_gene_flgB = CoreGene(model_location, "flgB", self.profile_factory)
        gene_flgB = ModelGene(c_gene_flgB, model_B)
        c_gene_tadZ = CoreGene(model_location, "tadZ", self.profile_factory)
        gene_tadZ = ModelGene(c_gene_tadZ, model_B)

        model_B.add_mandatory_gene(gene_sctn_flg)
        model_B.add_mandatory_gene(gene_sctj_flg)
        model_B.add_accessory_gene(gene_flgB)
        model_B.add_accessory_gene(gene_tadZ)

        ###########
        # Model A #
        ###########
        model_A = Model("foo/A", 10)
        c_gene_sctn = CoreGene(model_location, "sctN", self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model_A)
        gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_hom)

        c_gene_sctj = CoreGene(model_location, "sctJ", self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model_A)
        gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj)
        gene_sctj.add_exchangeable(gene_sctj_an)

        c_gene_gspd = CoreGene(model_location, "gspD", self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model_A)
        gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd)
        gene_gspd.add_exchangeable(gene_gspd_an)

        c_gene_abc = CoreGene(model_location, "abc", self.profile_factory)
        gene_abc = ModelGene(c_gene_abc, model_A, loner=True)
        gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc)
        gene_abc.add_exchangeable(gene_abc_ho)

        model_A.add_mandatory_gene(gene_sctn)
        model_A.add_mandatory_gene(gene_sctj)
        model_A.add_accessory_gene(gene_gspd)
        model_A.add_accessory_gene(gene_abc)

        #       CoreHit(gene, hit_id, hit_seq_length, replicon_name, position, i_eval, score,
        #         profile_coverage, sequence_coverage, begin_match, end_match
        #                                                           pos      score
        h_sctj = CoreHit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0,
                         1.0, 1.0, 1.0, 10, 20)
        mh_sctj = ModelHit(h_sctj, gene_sctj, GeneStatus.MANDATORY)
        h_sctn = CoreHit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 2, 1.0,
                         1.0, 1.0, 1.0, 10, 20)
        mh_sctn = ModelHit(h_sctn, gene_sctn, GeneStatus.MANDATORY)
        h_gspd = CoreHit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 3, 1.0,
                         1.0, 1.0, 1.0, 10, 20)
        mh_gspd = ModelHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)

        h_sctj_flg = CoreHit(c_gene_sctj_flg, "hit_sctj_flg", 803,
                             "replicon_id", 10, 1.0, 1.0, 1.0, 1.0, 10, 20)
        h_flgB = CoreHit(c_gene_flgB, "hit_flgB", 803, "replicon_id", 11, 1.0,
                         1.0, 1.0, 1.0, 10, 20)
        h_abc = CoreHit(c_gene_abc, "hit_abc", 803, "replicon_id", 20, 1.0,
                        1.0, 1.0, 1.0, 10, 20)
        h_abc2 = CoreHit(c_gene_abc, "hit_abc2", 803, "replicon_id", 50, 1.0,
                         1.0, 1.0, 1.0, 10, 20)
        h_tadZ = CoreHit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 40, 1.0,
                         1.0, 1.0, 1.0, 10, 20)
        mh_sctj_flg = ModelHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY)
        mh_flgB = ModelHit(h_flgB, gene_flgB, GeneStatus.ACCESSORY)
        mh_abc = ModelHit(h_abc, gene_abc, GeneStatus.ACCESSORY)
        mh_abc2 = ModelHit(h_abc2, gene_abc, GeneStatus.ACCESSORY)
        mh_tadZ = ModelHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY)

        model_A._min_mandatory_genes_required = 2
        model_A._min_genes_required = 2
        c1 = Cluster([mh_sctj, mh_sctn, mh_gspd], model_A, self.hit_weights)
        c2 = Cluster([mh_sctj, mh_sctn], model_A, self.hit_weights)
        c3 = Cluster([
            Loner(h_abc,
                  gene_ref=gene_abc,
                  gene_status=GeneStatus.ACCESSORY,
                  counterpart=[mh_abc2])
        ], model_A, self.hit_weights)

        model_B._min_mandatory_genes_required = 1
        model_B._min_genes_required = 2
        c5 = Cluster([mh_sctj_flg, mh_tadZ, mh_flgB], model_B,
                     self.hit_weights)

        sys_A = System(model_A, [c1, c2, c3], self.cfg.redundancy_penalty())
        # score =               2.5, 2 , 0.35 = 4.85 - (2 * 1.5) = 1.85

        sys_A.id = "sys_id_A"
        sys_B = System(model_B, [c5], self.cfg.redundancy_penalty())
        # score =                2.0
        sys_B.id = "sys_id_B"

        sol = Solution([sys_A, sys_B])
        sol_id = '12'

        hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B])
        sol_serializer = TsvSolutionSerializer()

        sol_tsv = '\t'.join([
            sol_id, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1', '1.000', '1.850', '2', 'sctJ', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctn', 'sctN', '2', 'foo/A',
            'sys_id_A', '2', '1', '1.000', '1.850', '2', 'sctN', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_gspd', 'gspD', '3', 'foo/A',
            'sys_id_A', '2', '1', '1.000', '1.850', '2', 'gspD', 'accessory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '2', '1.000', '1.850', '2', 'sctJ', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctn', 'sctN', '2', 'foo/A',
            'sys_id_A', '2', '2', '1.000', '1.850', '2', 'sctN', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_abc', 'abc', '20', 'foo/A', 'sys_id_A',
            '2', '-1', '1.000', '1.850', '2', 'abc', 'accessory', '803', '1.0',
            '1.000', '1.000', '1.000', '10', '20', 'hit_abc2', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '10', 'foo/B',
            'sys_id_B', '1', '1', '0.750', '2.000', '1', 'sctJ_FLG',
            'mandatory', '803', '1.0', '1.000', '1.000', '1.000', '10', '20',
            '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_flgB', 'flgB', '11', 'foo/B',
            'sys_id_B', '1', '1', '0.750', '2.000', '1', 'flgB', 'accessory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_tadZ', 'tadZ', '40', 'foo/B',
            'sys_id_B', '1', '1', '0.750', '2.000', '1', 'tadZ', 'accessory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', '', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        ser = sol_serializer.serialize(sol, sol_id, hit_multi_sys_tracker)
        self.maxDiff = None
        self.assertEqual(ser, sol_tsv)
Ejemplo n.º 14
0
    def test_SystemSerializer_str(self):
        model_name = 'foo'
        model_location = ModelLocation(
            path=os.path.join(self.cfg.models_dir(), model_name))
        model_A = Model("foo/A", 10)
        model_B = Model("foo/B", 10)

        c_gene_sctn_flg = CoreGene(model_location, "sctN_FLG",
                                   self.profile_factory)
        gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B)
        c_gene_sctj_flg = CoreGene(model_location, "sctJ_FLG",
                                   self.profile_factory)
        gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B)
        c_gene_flgB = CoreGene(model_location, "flgB", self.profile_factory)
        c_gene_tadZ = CoreGene(model_location, "tadZ", self.profile_factory)
        gene_tadZ = ModelGene(c_gene_tadZ, model_B)

        c_gene_sctn = CoreGene(model_location, "sctN", self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model_A)
        gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_hom)

        c_gene_sctj = CoreGene(model_location, "sctJ", self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model_A)
        gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj)
        gene_sctj.add_exchangeable(gene_sctj_an)

        c_gene_gspd = CoreGene(model_location, "gspD", self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model_A)
        gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd)
        gene_gspd.add_exchangeable(gene_gspd_an)

        c_gene_abc = CoreGene(model_location, "abc", self.profile_factory)
        gene_abc = ModelGene(c_gene_abc, model_A)
        gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc)
        gene_abc.add_exchangeable(gene_abc_ho)

        model_A.add_mandatory_gene(gene_sctn)
        model_A.add_mandatory_gene(gene_sctj)
        model_A.add_accessory_gene(gene_gspd)
        model_A.add_forbidden_gene(gene_abc)

        model_B.add_mandatory_gene(gene_sctn_flg)
        model_B.add_mandatory_gene(gene_sctj_flg)
        model_B.add_accessory_gene(gene_gspd)
        model_B.add_accessory_gene(gene_tadZ)

        h_sctj = Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_sctn = Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_gspd = Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)

        h_sctj_flg = Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id",
                         1, 1.0, 1.0, 1.0, 1.0, 10, 20)
        h_tadZ = Hit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)

        model_A._min_mandatory_genes_required = 2
        model_A._min_genes_required = 2
        c1 = Cluster([
            ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY),
            ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
        ], model_A, self.hit_weights)

        c2 = Cluster([
            ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY)
        ], model_A, self.hit_weights)

        model_B._min_mandatory_genes_required = 1
        model_B._min_genes_required = 2
        c3 = Cluster([
            ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY),
            ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY),
            ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
        ], model_B, self.hit_weights)

        sys_A = System(model_A, [c1, c2], self.cfg.redundancy_penalty())
        sys_A.id = "sys_id_A"
        sys_B = System(model_B, [c3], self.cfg.redundancy_penalty())
        sys_B.id = "sys_id_B"
        hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B])
        system_serializer = TxtSystemSerializer()

        sys_str = f"""system id = {sys_A.id}
model = foo/A
replicon = replicon_id
clusters = [('hit_sctj', 'sctJ', 1), ('hit_sctn', 'sctN', 1), ('hit_gspd', 'gspD', 1)], [('hit_sctj', 'sctJ', 1), ('hit_sctn', 'sctN', 1)]
occ = 2
wholeness = 1.000
loci nb = 2
score = 1.500

mandatory genes:
\t- sctN: 2 (sctN, sctN)
\t- sctJ: 2 (sctJ, sctJ)

accessory genes:
\t- gspD: 1 (gspD [sys_id_B])

neutral genes:
"""
        self.assertEqual(
            sys_str, system_serializer.serialize(sys_A, hit_multi_sys_tracker))
Ejemplo n.º 15
0
    def test_SolutionSerializer_tsv(self):
        model_name = 'foo'
        model_location = ModelLocation(
            path=os.path.join(self.cfg.models_dir(), model_name))
        model_A = Model("foo/A", 10)
        model_B = Model("foo/B", 10)

        c_gene_sctn_flg = CoreGene(model_location, "sctN_FLG",
                                   self.profile_factory)
        gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B)
        c_gene_sctj_flg = CoreGene(model_location, "sctJ_FLG",
                                   self.profile_factory)
        gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B)
        c_gene_flgB = CoreGene(model_location, "flgB", self.profile_factory)
        gene_flgB = ModelGene(c_gene_flgB, model_B)
        c_gene_tadZ = CoreGene(model_location, "tadZ", self.profile_factory)
        gene_tadZ = ModelGene(c_gene_tadZ, model_B)

        c_gene_sctn = CoreGene(model_location, "sctN", self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model_A)
        gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_hom)

        c_gene_sctj = CoreGene(model_location, "sctJ", self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model_A)
        gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj)
        gene_sctj.add_exchangeable(gene_sctj_an)

        c_gene_gspd = CoreGene(model_location, "gspD", self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model_A)
        gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd)
        gene_gspd.add_exchangeable(gene_gspd_an)

        c_gene_abc = CoreGene(model_location, "abc", self.profile_factory)
        gene_abc = ModelGene(c_gene_abc, model_A)
        gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc)
        gene_abc.add_exchangeable(gene_abc_ho)

        model_A.add_mandatory_gene(gene_sctn)
        model_A.add_mandatory_gene(gene_sctj)
        model_A.add_accessory_gene(gene_gspd)
        model_A.add_forbidden_gene(gene_abc)

        model_B.add_mandatory_gene(gene_sctn_flg)
        model_B.add_mandatory_gene(gene_sctj_flg)
        model_B.add_accessory_gene(gene_flgB)
        model_B.add_accessory_gene(gene_tadZ)

        h_sctj = Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_sctn = Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_gspd = Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)

        h_sctj_flg = Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id",
                         1, 1.0, 1.0, 1.0, 1.0, 10, 20)
        h_flgB = Hit(c_gene_flgB, "hit_flgB", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_tadZ = Hit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)

        model_A._min_mandatory_genes_required = 2
        model_A._min_genes_required = 2
        c1 = Cluster([
            ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY),
            ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
        ], model_A, self.hit_weights)

        c2 = Cluster([
            ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY)
        ], model_A, self.hit_weights)

        model_B._min_mandatory_genes_required = 1
        model_B._min_genes_required = 2
        c3 = Cluster([
            ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY),
            ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY),
            ValidHit(h_flgB, gene_flgB, GeneStatus.ACCESSORY)
        ], model_B, self.hit_weights)

        sys_A = System(model_A, [c1, c2], self.cfg.redundancy_penalty())
        sys_A.id = "sys_id_A"
        sys_B = System(model_B, [c3], self.cfg.redundancy_penalty())
        sys_B.id = "sys_id_B"

        sol = [sys_A, sys_B]
        sol_id = '12'

        hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B])
        system_serializer = TsvSolutionSerializer()

        sol_tsv = '\t'.join([
            sol_id, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'gspD', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'sctJ_FLG', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_tadZ', 'tadZ', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'tadZ', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id, 'replicon_id', 'hit_flgB', 'flgB', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'flgB', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        ser = system_serializer.serialize(sol, sol_id, hit_multi_sys_tracker)
        self.assertEqual(ser, sol_tsv)
Ejemplo n.º 16
0
    def setUp(self) -> None:
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        self.cfg = Config(MacsyDefaults(), args)

        self.model_name = 'foo'
        self.model_location = ModelLocation(
            path=os.path.join(args.models_dir, self.model_name))
        self.profile_factory = ProfileFactory(self.cfg)

        self.model = Model("foo/model_A", 10)
        c_gene_sctn = CoreGene(self.model_location, "sctN",
                               self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, self.model)

        c_gene_sctn_flg = CoreGene(self.model_location, "sctN_FLG",
                                   self.profile_factory)
        gene_sctn_flg = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_flg)

        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, self.model)
        c_gene_sctj_flg = CoreGene(self.model_location, "sctJ_FLG",
                                   self.profile_factory)
        gene_sctj_flg = Exchangeable(c_gene_sctj_flg, gene_sctj)
        gene_sctj.add_exchangeable(gene_sctj_flg)

        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, self.model)

        c_gene_flgb = CoreGene(self.model_location, "flgB",
                               self.profile_factory)
        gene_gspd_an = Exchangeable(c_gene_flgb, gene_gspd)
        gene_gspd.add_exchangeable(gene_gspd_an)

        c_gene_abc = CoreGene(self.model_location, "abc", self.profile_factory)
        gene_abc = ModelGene(c_gene_abc, self.model)
        c_gene_tadz = CoreGene(self.model_location, "tadZ",
                               self.profile_factory)
        gene_abc_ho = Exchangeable(c_gene_tadz, gene_abc)
        gene_abc.add_exchangeable(gene_abc_ho)

        c_gene_toto = CoreGene(self.model_location, "toto",
                               self.profile_factory)
        gene_toto = ModelGene(c_gene_toto, self.model)
        c_gene_totote = CoreGene(self.model_location, "totote",
                                 self.profile_factory)
        gene_toto_ho = Exchangeable(c_gene_totote, gene_toto)
        gene_toto.add_exchangeable(gene_toto_ho)

        self.model.add_mandatory_gene(gene_sctn)
        self.model.add_mandatory_gene(gene_sctj)
        self.model.add_accessory_gene(gene_gspd)
        self.model.add_neutral_gene(gene_toto)
        self.model.add_forbidden_gene(gene_abc)

        self.c_hits = {
            'h_sctj':
            Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                1.0, 10, 20),
            'h_sctj_flg':
            Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id", 1, 1.0,
                1.0, 1.0, 1.0, 10, 20),
            'h_sctn':
            Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                1.0, 10, 20),
            'h_sctn_flg':
            Hit(c_gene_sctn_flg, "hit_sctn_flg", 803, "replicon_id", 1, 1.0,
                1.0, 1.0, 1.0, 10, 20),
            'h_gspd':
            Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                1.0, 10, 20),
            'h_gspd_an':
            Hit(c_gene_flgb, "hit_gspd_an", 803, "replicon_id", 1, 1.0, 1.0,
                1.0, 1.0, 10, 20),
            'h_abc':
            Hit(c_gene_abc, "hit_abc", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                1.0, 10, 20),
            'h_abc_ho':
            Hit(c_gene_tadz, "hit_abc_ho", 803, "replicon_id", 1, 1.0, 1.0,
                1.0, 1.0, 10, 20),
            'h_toto':
            Hit(c_gene_toto, "hit_toto", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                1.0, 10, 20),
            'h_toto_ho':
            Hit(c_gene_totote, "hit_toto_ho", 803, "replicon_id", 1, 1.0, 1.0,
                1.0, 1.0, 10, 20),
        }
Ejemplo n.º 17
0
def _build_clusters(cfg, profile_factory):

    model_name = 'foo'
    model_location = ModelLocation(
        path=os.path.join(cfg.models_dir()[0], model_name))

    models = {}
    cg_sctn_flg = CoreGene(model_location, "sctN_FLG", profile_factory)
    cg_sctj_flg = CoreGene(model_location, "sctJ_FLG", profile_factory)
    cg_flgB = CoreGene(model_location, "flgB", profile_factory)
    cg_tadZ = CoreGene(model_location, "tadZ", profile_factory)
    cg_sctn = CoreGene(model_location, "sctN", profile_factory)
    cg_sctj = CoreGene(model_location, "sctJ", profile_factory)
    cg_gspd = CoreGene(model_location, "gspD", profile_factory)
    cg_abc = CoreGene(model_location, "abc", profile_factory)
    cg_sctc = CoreGene(model_location, "sctC", profile_factory)

    ###########
    # Model A #
    ###########
    models['A'] = Model("foo/A", 10)
    mgA_sctn = ModelGene(cg_sctn, models['A'])
    mgA_sctn_hom = Exchangeable(cg_sctn_flg, mgA_sctn)
    mgA_sctn.add_exchangeable(mgA_sctn_hom)
    mgA_sctj = ModelGene(cg_sctj, models['A'])
    mgA_sctj_an = Exchangeable(cg_sctj_flg, mgA_sctj)
    mgA_sctj.add_exchangeable(mgA_sctj_an)
    mgA_gspd = ModelGene(cg_gspd, models['A'])
    mgA_gspd_an = Exchangeable(cg_flgB, mgA_gspd)
    mgA_gspd.add_exchangeable(mgA_gspd_an)
    mgA_abc = ModelGene(cg_abc, models['A'])
    mgA_abc_ho = Exchangeable(cg_tadZ, mgA_abc)
    mgA_abc.add_exchangeable(mgA_abc_ho)

    models['A'].add_mandatory_gene(mgA_sctn)
    models['A'].add_mandatory_gene(mgA_sctj)
    models['A'].add_accessory_gene(mgA_gspd)
    models['A'].add_forbidden_gene(mgA_abc)

    models['A']._min_mandatory_genes_required = 2
    models['A']._min_genes_required = 2

    ###########
    # Model B #
    ###########
    models['B'] = Model("foo/B", 10)
    mgB_sctn_flg = ModelGene(cg_sctn_flg, models['B'])
    mgB_sctj_flg = ModelGene(cg_sctj_flg, models['B'])
    mgB_flgB = ModelGene(cg_flgB, models['B'])
    mgB_tadZ = ModelGene(cg_tadZ, models['B'])

    models['B'].add_mandatory_gene(mgB_sctn_flg)
    models['B'].add_mandatory_gene(mgB_sctj_flg)
    models['B'].add_accessory_gene(mgB_flgB)
    models['B'].add_accessory_gene(mgB_tadZ)

    models['B']._min_mandatory_genes_required = 1
    models['B']._min_genes_required = 2

    ###########
    # Model C #
    ###########
    models['C'] = Model("foo/C", 10)
    mgC_sctn_flg = ModelGene(cg_sctn_flg, models['C'])
    mgC_sctj_flg = ModelGene(cg_sctj_flg, models['C'])
    mgC_flgB = ModelGene(cg_flgB, models['C'])
    mgC_tadZ = ModelGene(cg_tadZ, models['C'])
    mgC_gspd = ModelGene(cg_gspd, models['C'])

    models['C'].add_mandatory_gene(mgC_sctn_flg)
    models['C'].add_mandatory_gene(mgC_sctj_flg)
    models['C'].add_mandatory_gene(mgC_flgB)
    models['C'].add_accessory_gene(mgC_tadZ)
    models['C'].add_accessory_gene(mgC_gspd)

    models['C']._min_mandatory_genes_required = 1
    models['C']._min_genes_required = 2

    ###########
    # Model D #
    ###########
    models['D'] = Model("foo/D", 10)
    mgD_abc = ModelGene(cg_abc, models['D'])
    mgD_sctn = ModelGene(cg_sctn, models['D'])
    models['D'].add_mandatory_gene(mgD_abc)
    models['D'].add_accessory_gene(mgD_sctn)

    models['D']._min_mandatory_genes_required = 1
    models['D']._min_genes_required = 1
    ###########
    # Model E #
    ###########
    models['E'] = Model("foo/E", 10)
    mgE_gspd = ModelGene(cg_gspd, models['E'])
    models['E'].add_accessory_gene(mgE_gspd)

    models['E']._min_mandatory_genes_required = 0
    models['E']._min_genes_required = 1

    ###########
    # Model F #
    ###########
    models['F'] = Model("foo/F", 10)
    mgF_abc = ModelGene(cg_abc, models['F'])
    models['F'].add_mandatory_gene(mgF_abc)

    models['F']._min_mandatory_genes_required = 1
    models['F']._min_genes_required = 1

    #####################
    # Model G idem as C #
    #####################
    models['G'] = Model("foo/G", 10)
    mgG_sctn_flg = ModelGene(cg_sctn_flg, models['G'])
    mgG_sctj_flg = ModelGene(cg_sctj_flg, models['G'])
    mgG_flgB = ModelGene(cg_flgB, models['G'])
    mgG_tadZ = ModelGene(cg_tadZ, models['G'])
    mgG_gspd = ModelGene(cg_gspd, models['G'])
    models['G'].add_mandatory_gene(mgG_sctn_flg)
    models['G'].add_mandatory_gene(mgG_sctj_flg)
    models['G'].add_mandatory_gene(mgG_flgB)
    models['G'].add_accessory_gene(mgG_tadZ)
    models['G'].add_accessory_gene(mgG_gspd)

    #####################
    # Model H idem as D #
    #####################
    models['H'] = Model("foo/H", 10)
    mgH_abc = ModelGene(cg_abc, models['H'])
    mgH_sctn = ModelGene(cg_sctn, models['H'])
    models['H'].add_mandatory_gene(mgH_abc)
    models['H'].add_accessory_gene(mgH_sctn)

    models['H']._min_mandatory_genes_required = 1
    models['H']._min_genes_required = 1

    ###########
    # Model I #
    ###########
    models['I'] = Model("foo/I", 10)
    mgI_abc = ModelGene(cg_abc, models['I'])
    mgI_flgB = ModelGene(cg_flgB, models['I'])
    mgI_tadZ = ModelGene(cg_tadZ, models['I'])
    models['I'].add_mandatory_gene(mgI_abc)
    models['I'].add_mandatory_gene(mgI_flgB)
    models['I'].add_accessory_gene(mgI_tadZ)

    models['I']._min_mandatory_genes_required = 1
    models['I']._min_genes_required = 1

    ###########
    # model J #
    ###########
    models['J'] = Model("foo/J", 10)
    mgJ_abc = ModelGene(cg_abc, models['J'])
    mgJ_gspd = ModelGene(cg_gspd, models['J'])
    mgJ_tadZ = ModelGene(cg_tadZ, models['J'])
    mgJ_sctc = ModelGene(cg_sctc, models['J'])
    models['J'].add_mandatory_gene(mgJ_abc)
    models['J'].add_mandatory_gene(mgJ_gspd)
    models['J'].add_accessory_gene(mgJ_tadZ)
    models['J'].add_accessory_gene(mgJ_sctc)

    models['J']._min_mandatory_genes_required = 1
    models['J']._min_genes_required = 1

    ###########
    # model K #
    ###########
    models['K'] = Model("foo/K", 10)
    mgK_flgB = ModelGene(cg_flgB, models['K'])
    mgK_sctn_flg = ModelGene(cg_sctn_flg, models['K'])
    mgK_sctj_flg = ModelGene(cg_sctj_flg, models['K'])
    mgK_sctn = ModelGene(cg_sctn, models['K'])
    models['K'].add_mandatory_gene(mgK_flgB)
    models['K'].add_mandatory_gene(mgK_sctn_flg)
    models['K'].add_accessory_gene(mgK_sctj_flg)
    models['K'].add_accessory_gene(mgK_sctn)

    models['K']._min_mandatory_genes_required = 1
    models['K']._min_genes_required = 1

    ###########
    # model L #
    ###########
    models['L'] = Model("foo/L", 10)
    mgL_flgB = ModelGene(cg_flgB, models['L'])
    mgL_sctn_flg = ModelGene(cg_sctn_flg, models['L'])
    mgL_sctj_flg = ModelGene(cg_sctj_flg, models['L'])
    mgL_sctn = ModelGene(cg_sctn, models['L'], loner=True)
    models['L'].add_mandatory_gene(mgL_flgB)
    models['L'].add_mandatory_gene(mgL_sctn_flg)
    models['L'].add_accessory_gene(mgL_sctj_flg)
    models['L'].add_accessory_gene(mgL_sctn)

    ###########
    # model M #
    ###########
    models['M'] = Model("foo/L", 10)
    mgM_sctj = ModelGene(cg_sctj, models['M'])
    mgM_gspd = ModelGene(cg_gspd, models['M'])
    mgM_sctn = ModelGene(cg_sctn, models['M'], multi_system=True)
    mgM_tadZ = ModelGene(cg_tadZ, models['M'])
    mgM_abc = ModelGene(cg_abc, models['M'])
    models['M'].add_mandatory_gene(mgM_sctj)
    models['M'].add_mandatory_gene(mgM_gspd)
    models['M'].add_accessory_gene(mgM_sctn)
    models['M'].add_accessory_gene(mgM_tadZ)
    models['M'].add_accessory_gene(mgM_abc)

    ###########
    # model N #
    ###########
    models['N'] = Model("foo/N", 10)
    mgN_flgB = ModelGene(cg_flgB, models['N'])
    mgN_sctn_flg = ModelGene(cg_sctn_flg, models['N'])
    mgN_sctj = ModelGene(cg_sctj, models['N'])
    mgN_sctj_flg = ModelGene(cg_sctj_flg, models['N'])
    mgN_sctn = ModelGene(cg_sctn, models['N'], loner=True)
    mgN_tadZ = ModelGene(cg_tadZ, models['N'], loner=True)
    models['N'].add_mandatory_gene(mgN_flgB)
    models['N'].add_mandatory_gene(mgN_sctn_flg)
    models['N'].add_accessory_gene(mgN_sctj)
    models['N'].add_accessory_gene(mgN_sctj_flg)
    models['N'].add_accessory_gene(mgN_sctn)
    models['N'].add_accessory_gene(mgN_tadZ)

    ###########
    # model O #
    ###########
    models['O'] = Model("foo/O", 10)
    mgO_sctj = ModelGene(cg_sctj, models['O'], multi_system=True)
    mgO_sctj_flg = Exchangeable(cg_sctj_flg, mgO_sctj)
    mgO_sctj.add_exchangeable(mgO_sctj_flg)
    mgO_gspd = ModelGene(cg_gspd, models['O'], loner=True, multi_system=True)
    mgO_sctn = ModelGene(cg_sctn, models['O'], multi_system=True)
    mgO_sctn_flg = Exchangeable(cg_sctn_flg, mgO_sctn)
    mgO_sctn.add_exchangeable(mgO_sctn_flg)
    mgO_tadZ = ModelGene(cg_tadZ, models['O'], loner=True)
    mgO_abc = ModelGene(cg_abc, models['O'])

    models['O'].add_mandatory_gene(mgO_sctj)
    models['O'].add_mandatory_gene(mgO_gspd)
    models['O'].add_accessory_gene(mgO_sctn)
    models['O'].add_accessory_gene(mgO_tadZ)
    models['O'].add_neutral_gene(mgO_abc)

    ch_sctj = CoreHit(cg_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0,
                      1.0, 1.0, 10, 20)
    ch_sctn = CoreHit(cg_sctn, "hit_sctn", 803, "replicon_id", 2, 1.0, 1.0,
                      1.0, 1.0, 10, 20)
    ch_gspd = CoreHit(cg_gspd, "hit_gspd", 803, "replicon_id", 3, 1.0, 1.0,
                      1.0, 1.0, 10, 20)
    ch_sctn_flg = CoreHit(cg_sctn_flg, "hit_sctn_flg", 803, "replicon_id", 4,
                          1.0, 1.0, 1.0, 1.0, 10, 20)
    ch_sctj = CoreHit(cg_sctj, "hit_sctj", 803, "replicon_id", 5, 1.0, 1.0,
                      1.0, 1.0, 10, 20)
    ch_sctj_flg = CoreHit(cg_sctj_flg, "hit_sctj_flg", 803, "replicon_id", 6,
                          1.0, 1.0, 1.0, 1.0, 10, 20)
    ch_flgB = CoreHit(cg_flgB, "hit_flgB", 803, "replicon_id", 7, 1.0, 1.0,
                      1.0, 1.0, 10, 20)
    ch_tadZ = CoreHit(cg_tadZ, "hit_tadZ", 803, "replicon_id", 8, 1.0, 1.0,
                      1.0, 1.0, 10, 20)
    ch_abc = CoreHit(cg_abc, "hit_abc", 803, "replicon_id", 9, 1.0, 1.0, 1.0,
                     1.0, 10, 20)

    hit_weights = HitWeight(**cfg.hit_weights())

    clusters = {}
    clusters['c1'] = Cluster([
        ModelHit(ch_sctj, gene_ref=mgA_sctj, gene_status=GeneStatus.MANDATORY),
        ModelHit(ch_sctn, gene_ref=mgA_sctn, gene_status=GeneStatus.MANDATORY),
        ModelHit(ch_gspd, gene_ref=mgA_gspd, gene_status=GeneStatus.ACCESSORY)
    ], models['A'], hit_weights)
    clusters['c2'] = Cluster([
        ModelHit(ch_sctj, gene_ref=mgA_sctj, gene_status=GeneStatus.MANDATORY),
        ModelHit(ch_sctn, gene_ref=mgA_sctn, gene_status=GeneStatus.MANDATORY)
    ], models['A'], hit_weights)

    clusters['c3'] = Cluster([
        ModelHit(ch_sctj_flg,
                 gene_ref=mgB_sctj_flg,
                 gene_status=GeneStatus.MANDATORY),
        ModelHit(ch_tadZ, gene_ref=mgB_tadZ, gene_status=GeneStatus.ACCESSORY),
        ModelHit(ch_flgB, gene_ref=mgB_flgB, gene_status=GeneStatus.ACCESSORY)
    ], models['B'], hit_weights)

    clusters['c4'] = Cluster([
        ModelHit(ch_sctj_flg,
                 gene_ref=mgC_sctj_flg,
                 gene_status=GeneStatus.MANDATORY),
        ModelHit(ch_tadZ, gene_ref=mgC_tadZ, gene_status=GeneStatus.ACCESSORY),
        ModelHit(ch_flgB, gene_ref=mgC_flgB, gene_status=GeneStatus.MANDATORY),
        ModelHit(ch_gspd, gene_ref=mgC_gspd, gene_status=GeneStatus.ACCESSORY)
    ], models['C'], hit_weights)

    clusters['c5'] = Cluster([
        ModelHit(ch_abc, gene_ref=mgD_abc, gene_status=GeneStatus.MANDATORY),
        ModelHit(ch_sctn, gene_ref=mgD_sctn, gene_status=GeneStatus.ACCESSORY)
    ], models['D'], hit_weights)

    clusters['c6'] = Cluster([
        ModelHit(ch_gspd, gene_ref=mgE_gspd, gene_status=GeneStatus.ACCESSORY)
    ], models['E'], hit_weights)

    clusters['c7'] = Cluster(
        [ModelHit(ch_abc, gene_ref=mgF_abc, gene_status=GeneStatus.MANDATORY)],
        models['F'], hit_weights)

    clusters['c8'] = Cluster([
        ModelHit(ch_flgB, gene_ref=mgI_flgB, gene_status=GeneStatus.MANDATORY),
        ModelHit(ch_tadZ, gene_ref=mgI_tadZ, gene_status=GeneStatus.ACCESSORY)
    ], models['I'], hit_weights)

    clusters['c9'] = Cluster([
        ModelHit(ch_abc, gene_ref=mgJ_abc, gene_status=GeneStatus.MANDATORY),
        ModelHit(ch_tadZ, gene_ref=mgJ_tadZ, gene_status=GeneStatus.ACCESSORY)
    ], models['J'], hit_weights)

    clusters['c10'] = Cluster([
        ModelHit(ch_flgB, gene_ref=mgK_flgB, gene_status=GeneStatus.MANDATORY),
        ModelHit(ch_sctn, gene_ref=mgK_sctn, gene_status=GeneStatus.ACCESSORY)
    ], models['K'], hit_weights)
    clusters['c11'] = Cluster([
        ModelHit(ch_flgB, gene_ref=mgL_flgB, gene_status=GeneStatus.MANDATORY),
        ModelHit(ch_sctn_flg,
                 gene_ref=mgL_sctn_flg,
                 gene_status=GeneStatus.MANDATORY)
    ], models['L'], hit_weights)
    clusters['c12'] = Cluster([
        ModelHit(ch_sctj_flg,
                 gene_ref=mgL_sctj_flg,
                 gene_status=GeneStatus.ACCESSORY),
        ModelHit(ch_sctn, gene_ref=mgL_sctn, gene_status=GeneStatus.ACCESSORY)
    ], models['L'], hit_weights)
    clusters['c13'] = Cluster(
        [Loner(ch_sctn, gene_ref=mgL_sctn, gene_status=GeneStatus.ACCESSORY)],
        models['L'], hit_weights)

    clusters['c14'] = Cluster([
        ModelHit(ch_sctj, mgM_sctj, gene_status=GeneStatus.MANDATORY),
        MultiSystem(
            ch_sctn, gene_ref=mgM_sctn, gene_status=GeneStatus.ACCESSORY),
        ModelHit(ch_gspd, gene_ref=mgM_gspd, gene_status=GeneStatus.ACCESSORY)
    ], models['M'], hit_weights)
    clusters['c15'] = Cluster([
        ModelHit(ch_tadZ, gene_ref=mgM_tadZ, gene_status=GeneStatus.ACCESSORY),
        ModelHit(ch_abc, gene_ref=mgM_abc, gene_status=GeneStatus.ACCESSORY)
    ], models['M'], hit_weights)
    clusters['c16'] = Cluster([
        MultiSystem(
            ch_sctn, gene_ref=mgM_sctn, gene_status=GeneStatus.ACCESSORY)
    ], models['M'], hit_weights)

    clusters['c17'] = Cluster([
        ModelHit(ch_flgB, mgL_flgB, GeneStatus.MANDATORY),
        ModelHit(ch_sctn_flg, mgL_sctn_flg, GeneStatus.MANDATORY)
    ], models['N'], hit_weights)
    clusters['c18'] = Cluster([
        ModelHit(ch_sctj, mgN_sctj, GeneStatus.MANDATORY),
        ModelHit(ch_sctj_flg, mgL_sctj_flg, GeneStatus.MANDATORY)
    ], models['N'], hit_weights)
    clusters['c19'] = Cluster([Loner(ch_sctn, mgL_sctn, GeneStatus.ACCESSORY)],
                              models['N'], hit_weights)
    clusters['c20'] = Cluster([Loner(ch_tadZ, mgN_tadZ, GeneStatus.ACCESSORY)],
                              models['N'], hit_weights)
    clusters['c21'] = Cluster([
        ModelHit(ch_sctj, mgO_sctj, GeneStatus.MANDATORY),
        ModelHit(ch_abc, mgO_abc, GeneStatus.NEUTRAL),
        ModelHit(ch_tadZ, mgO_tadZ, GeneStatus.ACCESSORY)
    ], models['O'], hit_weights)
    clusters['c22'] = Cluster([
        ModelHit(ch_sctn_flg, mgO_sctn_flg, GeneStatus.ACCESSORY),
        ModelHit(ch_gspd, mgO_gspd, GeneStatus.MANDATORY),
        ModelHit(ch_tadZ, mgO_tadZ, GeneStatus.ACCESSORY)
    ], models['O'], hit_weights)
    clusters['c23'] = Cluster(
        [Loner(ch_gspd, mgO_gspd, gene_status=GeneStatus.MANDATORY)],
        models['O'], hit_weights)
    clusters['c24'] = Cluster(
        [MultiSystem(ch_gspd, mgO_gspd, gene_status=GeneStatus.MANDATORY)],
        models['O'], hit_weights)
    clusters['c25'] = Cluster(
        [MultiSystem(ch_sctn, mgO_sctn, gene_status=GeneStatus.ACCESSORY)],
        models['O'], hit_weights)
    clusters['c26'] = Cluster([
        MultiSystem(
            ch_sctj_flg, mgO_sctj_flg, gene_status=GeneStatus.MANDATORY)
    ], models['O'], hit_weights)
    return models, clusters
Ejemplo n.º 18
0
    def test_filter(self):
        model_fqn = "foo/bar"
        model = Model(model_fqn, 10)
        model_2 = Model("foo/buz", 10)

        gene_name = 'sctJ_FLG'
        sctJ_FLG_core = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ_FLG = ModelGene(sctJ_FLG_core, model)
        model.add_mandatory_gene(sctJ_FLG)

        gene_name = 'sctJ'
        sctJ_core = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctj = Exchangeable(sctJ_core, sctJ_FLG)
        sctJ_FLG.add_exchangeable(sctj)

        gene_name = 'sctN_FLG'
        sctN_FLG_core = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctN_FLG = ModelGene(sctN_FLG_core, model)
        model.add_accessory_gene(sctN_FLG)

        gene_name = 'sctN'
        sctN_core = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctn = Exchangeable(sctN_core, sctN_FLG)
        sctN_FLG.add_exchangeable(sctn)

        gene_name = 'sctC'
        sctC_core = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctC = ModelGene(sctC_core, model)
        model.add_forbidden_gene(sctC)

        gene_name = 'toto'
        toto_core = CoreGene(self.model_location, gene_name, self.profile_factory)
        toto = ModelGene(toto_core, model)
        model.add_neutral_gene(toto)

        gene_name = 'totote'
        totote_core = CoreGene(self.model_location, gene_name, self.profile_factory)
        totote = Exchangeable(totote_core, toto)
        toto.add_exchangeable(totote)

        gene_name = 'gspD'
        gspd_core = CoreGene(self.model_location, gene_name, self.profile_factory)
        gspd = ModelGene(gspd_core, model_2)

        gene_name = 'tadZ'
        tadz_core = CoreGene(self.model_location, gene_name, self.profile_factory)
        tadz = Exchangeable(tadz_core, gspd)
        gspd.add_exchangeable(tadz)

        hit_to_keep = []
        for gene in (sctJ_FLG, sctN_FLG, sctC, toto, totote):
            hit_to_keep.append(CoreHit(gene,
                                   f"PSAE001c01_{gene.name}",
                                       1, "PSAE001c01", 1, 1.0, 1.0, 1.0, 1.0, 1, 2)
                               )
        hit_to_filter_out = []
        for gene in (gspd, tadz):
            hit_to_filter_out.append(CoreHit(gene,
                                     f"PSAE001c01_{gene.name}",
                                             1, "PSAE001c01", 1, 1.0, 1.0, 1.0, 1.0, 1, 2)
                                     )

        filtered_hits = model.filter(hit_to_keep + hit_to_filter_out)

        self.assertListEqual(sorted(hit_to_keep), sorted(filtered_hits))
Ejemplo n.º 19
0
    def test_solutions_to_tsv(self):
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        cfg = Config(MacsyDefaults(), args)
        model_name = 'foo'
        models_location = ModelLocation(
            path=os.path.join(args.models_dir, model_name))

        # we need to reset the ProfileFactory
        # because it's a like a singleton
        # so other tests are influenced by ProfileFactory and it's configuration
        # for instance search_genes get profile without hmmer_exe
        profile_factory = ProfileFactory(cfg)

        model_A = Model("foo/A", 10)
        model_B = Model("foo/B", 10)
        model_C = Model("foo/C", 10)

        c_gene_sctn_flg = CoreGene(models_location, "sctN_FLG",
                                   profile_factory)
        gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B)
        c_gene_sctj_flg = CoreGene(models_location, "sctJ_FLG",
                                   profile_factory)
        gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B)
        c_gene_flgB = CoreGene(models_location, "flgB", profile_factory)
        gene_flgB = ModelGene(c_gene_flgB, model_B)
        c_gene_tadZ = CoreGene(models_location, "tadZ", profile_factory)
        gene_tadZ = ModelGene(c_gene_tadZ, model_B)

        c_gene_sctn = CoreGene(models_location, "sctN", profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model_A)
        gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_hom)

        c_gene_sctj = CoreGene(models_location, "sctJ", profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model_A)
        gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj)
        gene_sctj.add_exchangeable(gene_sctj_an)

        c_gene_gspd = CoreGene(models_location, "gspD", profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model_A)
        gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd)
        gene_gspd.add_exchangeable(gene_gspd_an)

        c_gene_abc = CoreGene(models_location, "abc", profile_factory)
        gene_abc = ModelGene(c_gene_abc, model_A)
        gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc)
        gene_abc.add_exchangeable(gene_abc_ho)

        model_A.add_mandatory_gene(gene_sctn)
        model_A.add_mandatory_gene(gene_sctj)
        model_A.add_accessory_gene(gene_gspd)
        model_A.add_forbidden_gene(gene_abc)

        model_B.add_mandatory_gene(gene_sctn_flg)
        model_B.add_mandatory_gene(gene_sctj_flg)
        model_B.add_accessory_gene(gene_flgB)
        model_B.add_accessory_gene(gene_tadZ)

        model_C.add_mandatory_gene(gene_sctn_flg)
        model_C.add_mandatory_gene(gene_sctj_flg)
        model_C.add_mandatory_gene(gene_flgB)
        model_C.add_accessory_gene(gene_tadZ)
        model_C.add_accessory_gene(gene_gspd)

        h_sctj = Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_sctn = Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_gspd = Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)

        h_sctj_flg = Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id",
                         1, 1.0, 1.0, 1.0, 1.0, 10, 20)
        h_flgB = Hit(c_gene_flgB, "hit_flgB", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_tadZ = Hit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)

        model_A._min_mandatory_genes_required = 2
        model_A._min_genes_required = 2
        hit_weights = HitWeight(**cfg.hit_weights())
        c1 = Cluster([
            ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY),
            ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
        ], model_A, hit_weights)

        c2 = Cluster([
            ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY)
        ], model_A, hit_weights)

        model_B._min_mandatory_genes_required = 1
        model_B._min_genes_required = 2
        c3 = Cluster([
            ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY),
            ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY),
            ValidHit(h_flgB, gene_flgB, GeneStatus.ACCESSORY)
        ], model_B, hit_weights)

        model_C._min_mandatory_genes_required = 1
        model_C._min_genes_required = 2
        c4 = Cluster([
            ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY),
            ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY),
            ValidHit(h_flgB, gene_flgB, GeneStatus.MANDATORY),
            ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
        ], model_C, hit_weights)

        sys_A = System(model_A, [c1, c2], cfg.redundancy_penalty())
        sys_A.id = "sys_id_A"
        sys_B = System(model_B, [c3], cfg.redundancy_penalty())
        sys_B.id = "sys_id_B"
        sys_C = System(model_C, [c4], cfg.redundancy_penalty())
        sys_C.id = "sys_id_C"

        sol_1 = [sys_A, sys_B]
        sol_2 = [sys_A, sys_C]
        sol_id_1 = '1'
        sol_id_2 = '2'

        sol_tsv = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# Systems found:
"""
        sol_tsv += "\t".join([
            "sol_id", "replicon", "hit_id", "gene_name", "hit_pos",
            "model_fqn", "sys_id", "sys_loci", "sys_wholeness", "sys_score",
            "sys_occ", "hit_gene_ref", "hit_status", "hit_seq_len",
            "hit_i_eval", "hit_score", "hit_profile_cov", "hit_seq_cov",
            "hit_begin_match", "hit_end_match", "used_in"
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'gspD', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'sctJ_FLG', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_tadZ', 'tadZ', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'tadZ', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_flgB', 'flgB', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'flgB', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'gspD', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '1', 'foo/C',
            'sys_id_C', '1', '0.800', '3.000', '1', 'sctJ_FLG', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_B'
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_tadZ', 'tadZ', '1', 'foo/C',
            'sys_id_C', '1', '0.800', '3.000', '1', 'tadZ', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_B'
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_flgB', 'flgB', '1', 'foo/C',
            'sys_id_C', '1', '0.800', '3.000', '1', 'flgB', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_B'
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/C',
            'sys_id_C', '1', '0.800', '3.000', '1', 'gspD', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_A'
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"

        f_out = StringIO()
        hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B])
        solutions_to_tsv([sol_1, sol_2], hit_multi_sys_tracker, f_out)
        self.assertMultiLineEqual(sol_tsv, f_out.getvalue())
Ejemplo n.º 20
0
    def test_score(self):
        model = Model("foo/T2SS", 10)
        c_gene_gspd = CoreGene(self.model_location, "gspD", self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)

        c_gene_tadZ = CoreGene(self.model_location, "tadZ", self.profile_factory)
        gene_tadZ = ModelGene(c_gene_tadZ, model)
        model.add_mandatory_gene(gene_tadZ)

        c_gene_sctj = CoreGene(self.model_location, "sctC", self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)

        c_gene_sctJ_FLG = CoreGene(self.model_location, "sctJ_FLG", self.profile_factory)

        analog_sctJ_FLG = Exchangeable(c_gene_sctJ_FLG, gene_sctj)
        gene_sctj.add_exchangeable(analog_sctJ_FLG)
        model.add_accessory_gene(gene_sctj)

        c_gene_sctn = CoreGene(self.model_location, "sctN", self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model, loner=True)
        c_gene_sctn_FLG = CoreGene(self.model_location, "sctN_FLG", self.profile_factory)
        homolog_sctn_FLG = Exchangeable(c_gene_sctn_FLG, gene_sctn)
        gene_sctn.add_exchangeable(homolog_sctn_FLG)
        model.add_accessory_gene(gene_sctn)

        c_gene_toto = CoreGene(self.model_location, "toto", self.profile_factory)
        gene_toto = ModelGene(c_gene_toto, model)
        model.add_neutral_gene(gene_toto)

        c_gene_flie = CoreGene(self.model_location, "fliE", self.profile_factory)
        gene_flie = ModelGene(c_gene_flie, model, loner=True, multi_system=True)
        model.add_mandatory_gene(gene_flie)

        h_gspd = Hit(c_gene_gspd, "h_gspd", 10, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20)
        v_h_gspd = ValidHit(h_gspd, gene_gspd, GeneStatus.MANDATORY)
        h_tadz = Hit(c_gene_tadZ, "h_tadz", 20, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20)
        v_h_tadz = ValidHit(h_tadz, gene_tadZ, GeneStatus.MANDATORY)

        h_sctj = Hit(c_gene_sctj, "h_sctj", 30, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20)
        v_h_sctj = ValidHit(h_sctj, gene_sctj, GeneStatus.ACCESSORY)
        h_sctj_an = Hit(c_gene_sctJ_FLG, "h_sctj_an", 30, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20)
        v_h_sctj_an = ValidHit(h_sctj_an, analog_sctJ_FLG, GeneStatus.ACCESSORY)

        h_sctn = Hit(c_gene_sctn, "sctn", 40, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20)
        v_h_sctn = ValidHit(h_sctn, gene_sctn, GeneStatus.ACCESSORY)
        h_sctn_hom = Hit(c_gene_sctn_FLG, "h_scth_hom", 30, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20)
        v_h_sctn_hom = ValidHit(h_sctn_hom, homolog_sctn_FLG, GeneStatus.ACCESSORY)

        h_toto = Hit(c_gene_sctn, "toto", 50, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20)
        v_h_toto = ValidHit(h_toto, gene_toto, GeneStatus.NEUTRAL)

        h_flie = Hit(c_gene_flie, "h_flie", 100, "replicon_id", 1, 1.0, 1.0, 1.0, 1.0, 10, 20)
        v_h_flie = ValidHit(h_flie, gene_flie, GeneStatus.MANDATORY)

        # 2 mandatory, 2 accessory no analog/homolog
        c1 = Cluster([v_h_gspd, v_h_tadz, v_h_sctj, v_h_sctn], model, self.hit_weights)
        self.assertEqual(c1.score, 3.0)

        # 2 mandatory, 2 accessory 1 neutral, no analog/homolog
        c1 = Cluster([v_h_gspd, v_h_tadz, v_h_sctj, v_h_sctn, v_h_toto], model, self.hit_weights)
        self.assertEqual(c1.score, 3.0)

        # 1 mandatory + 1 mandatory duplicated 1 time
        # 1 accessory + 1 accessory duplicated 1 times
        # no analog/homolog
        c1 = Cluster([v_h_gspd, v_h_tadz, v_h_sctj, v_h_sctn, v_h_gspd, v_h_sctn], model, self.hit_weights)
        self.assertEqual(c1.score, 3.0)

        # 2 mandatory
        # 1 accessory + 1 accessory homolog
        c1 = Cluster([v_h_gspd, v_h_tadz, v_h_sctj, v_h_sctn_hom], model, self.hit_weights)
        self.assertEqual(c1.score, 2.9)

        # # 2 mandatory
        # # 1 accessory + 1 accessory analog of the 1rst accessory
        # c1 = Cluster([v_h_gspd, v_h_tadz, v_h_sctj, v_h_sctj_an], model, self.hit_weights)
        # self.assertEqual(c1.score, 2.5)

        # test loners multi system
        c1 = Cluster([v_h_flie], model, self.hit_weights)
        self.assertEqual(c1.score, 0.7)

        # test the cache score
        self.assertEqual(c1.score, 0.7)

        non_valid_hit = ValidHit(h_sctn, gene_sctn, GeneStatus.FORBIDDEN)
        c1 = Cluster([v_h_gspd, non_valid_hit, v_h_tadz], model, self.hit_weights)
        with self.assertRaises(MacsypyError) as ctx:
            c1.score
        self.assertEqual(str(ctx.exception),
                         "a Cluster contains hit which is neither mandatory nor accessory")
Ejemplo n.º 21
0
def _build_systems(cfg, profile_factory):
    model_name = 'foo'
    model_location = ModelLocation(
        path=os.path.join(cfg.models_dir(), model_name))
    model_A = Model("foo/A", 10)
    model_B = Model("foo/B", 10)
    model_C = Model("foo/C", 10)
    model_D = Model("foo/D", 10)
    model_E = Model("foo/E", 10)
    model_F = Model("foo/F", 10)
    model_G = Model("foo/G", 10)
    model_H = Model("foo/H", 10)

    c_gene_sctn_flg = CoreGene(model_location, "sctN_FLG", profile_factory)
    gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B)
    c_gene_sctj_flg = CoreGene(model_location, "sctJ_FLG", profile_factory)
    gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B)
    c_gene_flgB = CoreGene(model_location, "flgB", profile_factory)
    gene_flgB = ModelGene(c_gene_flgB, model_B)
    c_gene_tadZ = CoreGene(model_location, "tadZ", profile_factory)
    gene_tadZ = ModelGene(c_gene_tadZ, model_B)

    c_gene_sctn = CoreGene(model_location, "sctN", profile_factory)
    gene_sctn = ModelGene(c_gene_sctn, model_A)
    gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn)
    gene_sctn.add_exchangeable(gene_sctn_hom)

    c_gene_sctj = CoreGene(model_location, "sctJ", profile_factory)
    gene_sctj = ModelGene(c_gene_sctj, model_A)
    gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj)
    gene_sctj.add_exchangeable(gene_sctj_an)

    c_gene_gspd = CoreGene(model_location, "gspD", profile_factory)
    gene_gspd = ModelGene(c_gene_gspd, model_A)
    gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd)
    gene_gspd.add_exchangeable(gene_gspd_an)

    c_gene_abc = CoreGene(model_location, "abc", profile_factory)
    gene_abc = ModelGene(c_gene_abc, model_A)
    gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc)
    gene_abc.add_exchangeable(gene_abc_ho)

    model_A.add_mandatory_gene(gene_sctn)
    model_A.add_mandatory_gene(gene_sctj)
    model_A.add_accessory_gene(gene_gspd)
    model_A.add_forbidden_gene(gene_abc)

    model_B.add_mandatory_gene(gene_sctn_flg)
    model_B.add_mandatory_gene(gene_sctj_flg)
    model_B.add_accessory_gene(gene_flgB)
    model_B.add_accessory_gene(gene_tadZ)

    model_C.add_mandatory_gene(gene_sctn_flg)
    model_C.add_mandatory_gene(gene_sctj_flg)
    model_C.add_mandatory_gene(gene_flgB)
    model_C.add_accessory_gene(gene_tadZ)
    model_C.add_accessory_gene(gene_gspd)

    model_D.add_mandatory_gene(gene_abc)
    model_D.add_accessory_gene(gene_sctn)

    model_E.add_accessory_gene(gene_gspd)

    model_F.add_mandatory_gene(gene_abc)

    # idem as C
    model_G.add_mandatory_gene(gene_sctn_flg)
    model_G.add_mandatory_gene(gene_sctj_flg)
    model_G.add_mandatory_gene(gene_flgB)
    model_G.add_accessory_gene(gene_tadZ)
    model_G.add_accessory_gene(gene_gspd)

    # idem as D
    model_H.add_mandatory_gene(gene_abc)
    model_H.add_accessory_gene(gene_sctn)

    h_sctj = Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                 1.0, 10, 20)
    h_sctn = Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 2, 1.0, 1.0, 1.0,
                 1.0, 10, 20)
    h_gspd = Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 3, 1.0, 1.0, 1.0,
                 1.0, 10, 20)

    h_sctj_flg = Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id", 4,
                     1.0, 1.0, 1.0, 1.0, 10, 20)
    h_flgB = Hit(c_gene_flgB, "hit_flgB", 803, "replicon_id", 5, 1.0, 1.0, 1.0,
                 1.0, 10, 20)
    h_tadZ = Hit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 6, 1.0, 1.0, 1.0,
                 1.0, 10, 20)

    h_abc = Hit(c_gene_abc, "hit_abc", 803, "replicon_id", 7, 1.0, 1.0, 1.0,
                1.0, 10, 20)

    model_A._min_mandatory_genes_required = 2
    model_A._min_genes_required = 2
    hit_weights = HitWeight(**cfg.hit_weights())
    c1 = Cluster([
        ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
        ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY),
        ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
    ], model_A, hit_weights)

    c2 = Cluster([
        ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
        ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY)
    ], model_A, hit_weights)

    model_B._min_mandatory_genes_required = 1
    model_B._min_genes_required = 2
    c3 = Cluster([
        ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY),
        ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY),
        ValidHit(h_flgB, gene_flgB, GeneStatus.ACCESSORY)
    ], model_B, hit_weights)
    model_C._min_mandatory_genes_required = 1
    model_C._min_genes_required = 2
    c4 = Cluster([
        ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY),
        ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY),
        ValidHit(h_flgB, gene_flgB, GeneStatus.MANDATORY),
        ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
    ], model_C, hit_weights)
    model_D._min_mandatory_genes_required = 1
    model_D._min_genes_required = 1
    c5 = Cluster([
        ValidHit(h_abc, gene_abc, GeneStatus.MANDATORY),
        ValidHit(h_sctn, gene_sctn, GeneStatus.ACCESSORY)
    ], model_D, hit_weights)

    model_E._min_mandatory_genes_required = 0
    model_E._min_genes_required = 1
    c6 = Cluster([ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)], model_E,
                 hit_weights)

    model_F._min_mandatory_genes_required = 1
    model_F._min_genes_required = 1
    c7 = Cluster([ValidHit(h_abc, gene_abc, GeneStatus.MANDATORY)], model_F,
                 hit_weights)

    systems = {}

    systems['A'] = System(model_A, [c1, c2],
                          cfg.redundancy_penalty())  # 5 hits
    # we need to tweek the replicon_id to have stable ressults
    # whatever the number of tests ran
    # or the tests order
    systems['A'].id = "replicon_id_A"
    systems['B'] = System(model_B, [c3], cfg.redundancy_penalty())  # 3 hits
    systems['B'].id = "replicon_id_B"
    systems['C'] = System(model_C, [c4], cfg.redundancy_penalty())  # 4 hits
    systems['C'].id = "replicon_id_C"
    systems['D'] = System(model_D, [c5], cfg.redundancy_penalty())  # 2 hits
    systems['D'].id = "replicon_id_D"
    systems['E'] = System(model_E, [c6], cfg.redundancy_penalty())  # 1 hit
    systems['E'].id = "replicon_id_E"
    systems['F'] = System(model_F, [c7], cfg.redundancy_penalty())  # 1 hit
    systems['F'].id = "replicon_id_F"
    systems['G'] = System(model_G, [c4], cfg.redundancy_penalty())  # 4 hits
    systems['G'].id = "replicon_id_G"
    systems['H'] = System(model_H, [c5], cfg.redundancy_penalty())  # 2 hits
    systems['H'].id = "replicon_id_H"

    return systems