Exemple #1
0
 def test_hash(self):
     model_bar = Model('Foo/bar', 10)
     model_bar_bis = Model('Foo/bar', 10)
     model_buz = Model('Foo/buz', 10)
     self.assertTrue(isinstance(hash(model_bar), int))
     self.assertEqual(hash(model_bar), hash(model_bar_bis))
     self.assertNotEqual(hash(model_bar), hash(model_buz))
Exemple #2
0
    def test_UnlikelySystemSerializer_txt(self):
        model = Model("foo/FOO", 10)
        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        c_gene_sctn = CoreGene(self.model_location, "sctN",
                               self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model)
        model.add_accessory_gene(gene_sctn)
        c_gene_abc = CoreGene(self.model_location, "abc", self.profile_factory)
        gene_abc = ModelGene(c_gene_abc, model)
        model.add_forbidden_gene(gene_abc)

        hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 2, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        hit_3 = Hit(c_gene_sctn, "hit_3", 803, "replicon_id", 3, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_3 = ValidHit(hit_3, gene_sctn, GeneStatus.ACCESSORY)
        hit_4 = Hit(c_gene_abc, "hit_4", 803, "replicon_id", 4, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_4 = ValidHit(hit_4, gene_abc, GeneStatus.FORBIDDEN)
        ser = TxtUnikelySystemSerializer()

        ls_1 = UnlikelySystem(model, [v_hit_1], [v_hit_2, v_hit_3], [],
                              [v_hit_4], ["the reason why"])
        txt = ser.serialize(ls_1)
        expected_txt = """This replicon probably not contains a system foo/FOO:
the reason why

system id = replicon_id_FOO_1
model = foo/FOO
replicon = replicon_id
hits = [('hit_1', 'gspD', 1), ('hit_2', 'sctJ', 2), ('hit_3', 'sctN', 3), ('hit_4', 'abc', 4)]
wholeness = 1.000

mandatory genes:
\t- gspD: 1 (gspD)

accessory genes:
\t- sctJ: 1 (sctJ)
\t- sctN: 1 (sctN)

neutral genes:

forbidden genes:
\t- abc: 1 (abc)

Use ordered replicon to have better prediction.
"""
        self.assertEqual(txt, expected_txt)
 def test_iter(self):
     systems = [Model('foo', 10), Model('bar', 10)]
     for s in systems:
         self.system_bank.add_model(s)
     i = 0
     for s in self.system_bank:
         self.assertIn(s, systems)
         i += 1
     self.assertEqual(i, len(systems))
Exemple #4
0
    def test_max_nb_genes(self):
        model_fqn = 'foo/bar'
        inter_gene_max_space = 40
        max_nb_genes_xml = 10
        model = Model(model_fqn, inter_gene_max_space, max_nb_genes=max_nb_genes_xml)
        self.assertEqual(model.max_nb_genes, max_nb_genes_xml)
        model = Model(model_fqn, inter_gene_max_space)
        self.assertIsNone(model.max_nb_genes)

        self.clean_working_dir()
Exemple #5
0
    def test_min_mandatory_genes_required(self):
        model_fqn = 'foo/bar'
        min_mandatory_genes_required_xml = 40
        model = Model(model_fqn, 10, min_mandatory_genes_required=min_mandatory_genes_required_xml)
        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)
        model.add_mandatory_gene(gene)
        self.assertEqual(model.min_mandatory_genes_required, min_mandatory_genes_required_xml)

        system = Model(model_fqn, 10)
        self.assertEqual(system.min_mandatory_genes_required, len(system.mandatory_genes))

        self.clean_working_dir()
    def test_hits(self):
        model = Model("foo/T2SS", 10)
        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        c_gene_sctn = CoreGene(self.model_location, "sctN",
                               self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model)
        model.add_accessory_gene(gene_sctn)

        hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        hit_3 = Hit(c_gene_sctn, "hit_3", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_3 = ValidHit(hit_3, gene_sctn, GeneStatus.ACCESSORY)
        rc = RejectedClusters(model, [
            Cluster([v_hit_1, v_hit_2], model, self.hit_weights),
            Cluster([v_hit_3], model, self.hit_weights)
        ], ["bla bla"])

        self.assertEqual(rc.hits, [v_hit_1, v_hit_2, v_hit_3])
        self.assertEqual(rc.reasons, ["bla bla"])
    def test_hits(self):
        model = Model("foo/T2SS", 10)
        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        c_gene_sctn = CoreGene(self.model_location, "sctN",
                               self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model)
        model.add_accessory_gene(gene_sctn)

        hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        hit_3 = Hit(c_gene_sctn, "hit_3", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_3 = ValidHit(hit_3, gene_sctn, GeneStatus.ACCESSORY)
        ls_1 = LikelySystem(model, [v_hit_1], [v_hit_2, v_hit_3], [], [])

        self.assertListEqual(ls_1.hits, [v_hit_1, v_hit_2, v_hit_3])
Exemple #8
0
    def setUp(self) -> None:
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        cfg = Config(MacsyDefaults(), args)

        model_name = 'foo'
        models_location = ModelLocation(path=os.path.join(args.models_dir, model_name))

        model = Model("foo/T2SS", 10)
        profile_factory = ProfileFactory(cfg)

        gene_name = "gspD"
        self.cg_gspd = CoreGene(models_location, gene_name, profile_factory)
        self.mg_gspd = ModelGene(self.cg_gspd, model, multi_system=True)

        gene_name = "sctJ"
        self.cg_sctj = CoreGene(models_location, gene_name, profile_factory)
        self.mg_sctj = ModelGene(self.cg_sctj, model, loner=True)

        gene_name = "abc"
        self.cg_abc = CoreGene(models_location, gene_name, profile_factory)
        self.mg_abc = ModelGene(self.cg_abc, model, multi_model=True)

        model.add_mandatory_gene(self.mg_gspd)
        model.add_accessory_gene(self.mg_sctj)
        model.add_accessory_gene(self.mg_abc)

        self.chit_1 = CoreHit(self.cg_gspd, "hit_1", 803, "replicon_id", 2, 1.0, 1.0, 1.0, 1.0, 10, 20)
        self.chit_2 = CoreHit(self.cg_sctj, "hit_2", 803, "replicon_id", 3, 1.0, 1.0, 1.0, 1.0, 10, 20)
        self.chit_3 = CoreHit(self.cg_abc, "hit_3", 803, "replicon_id", 4, 1.0, 1.0, 1.0, 1.0, 10, 20)
    def test_get_loners(self):
        model = Model("foo/T2SS", 11)
        # handle name, topology type, and min/max positions in the sequence dataset for a replicon and list of genes.
        # each genes is representing by a tuple (seq_id, length)"""
        rep_info = RepliconInfo('linear', 1, 60, [(f"g_{i}", i * 10) for i in range(1, 7)])

        core_genes = []
        model_genes = []
        for g_name in ('gspD', 'sctC', 'sctJ', 'sctN', 'abc'):
            core_gene = CoreGene(self.model_location, g_name, self.profile_factory)
            core_genes.append(core_gene)
            model_genes.append(ModelGene(core_gene, model))
        model_genes[3]._loner = True
        model_genes[4]._loner = True

        model.add_mandatory_gene(model_genes[0])
        model.add_mandatory_gene(model_genes[1])
        model.add_accessory_gene(model_genes[2])
        model.add_accessory_gene(model_genes[3])
        model.add_neutral_gene(model_genes[4])

        #     Hit(gene, model, hit_id, hit_seq_length, replicon_name, position, i_eval, score,
        #         profile_coverage, sequence_coverage, begin_match, end_match
        h10 = Hit(core_genes[0], "h10", 10, "replicon_1", 10, 1.0, 10.0, 1.0, 1.0, 10, 20)
        h20 = Hit(core_genes[1], "h20", 10, "replicon_1", 20, 1.0, 20.0, 1.0, 1.0, 10, 20)
        h30 = Hit(core_genes[2], "h30", 10, "replicon_1", 30, 1.0, 30.0, 1.0, 1.0, 10, 20)
        h61 = Hit(core_genes[3], "h61", 10, "replicon_1", 60, 1.0, 61.0, 1.0, 1.0, 10, 20)
        h80 = Hit(core_genes[4], "h80", 10, "replicon_1", 80, 1.0, 80.0, 1.0, 1.0, 10, 20)

        # loners are clusters of one hit
        loners = get_loners([h10, h20, h30, h61, h80], model, self.hit_weights)
        hit_from_clusters = [h.hits[0] for h in loners]
        self.assertListEqual(hit_from_clusters, [h61, h80])
    def test_str(self):
        model = Model("foo/T2SS", 10)
        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        c_gene_sctn = CoreGene(self.model_location, "sctN",
                               self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model)
        model.add_accessory_gene(gene_sctn)

        hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 2, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        hit_3 = Hit(c_gene_sctn, "hit_3", 803, "replicon_id", 3, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_3 = ValidHit(hit_3, gene_sctn, GeneStatus.ACCESSORY)
        uls_1 = UnlikelySystem(model, [v_hit_1], [v_hit_2, v_hit_3], [], [],
                               ["reason"])

        expected_str = """(hit_1, gspD, 1), (hit_2, sctJ, 2), (hit_3, sctN, 3): These hits does not probably constitute a system because:
reason"""
        self.assertEqual(str(uls_1), expected_str)
    def test_str(self):
        model = Model("foo/T2SS", 10)
        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        c_gene_sctn = CoreGene(self.model_location, "sctN",
                               self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model)
        model.add_accessory_gene(gene_sctn)

        hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        hit_3 = Hit(c_gene_sctn, "hit_3", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_3 = ValidHit(hit_3, gene_sctn, GeneStatus.ACCESSORY)
        ls_1 = LikelySystem(model, [v_hit_1], [v_hit_2, v_hit_3], [], [])
        expected_str = ', '.join([
            f"({h.id}, {h.gene.name}, {h.position})"
            for h in (v_hit_1, v_hit_2, v_hit_3)
        ])
        self.assertEqual(str(ls_1), expected_str)
    def test_merge(self):
        model = Model("foo/T2SS", 11)

        c_gene_1 = CoreGene(self.model_location, "gspD", self.profile_factory)
        c_gene_2 = CoreGene(self.model_location, "sctC", self.profile_factory)
        c_gene_3 = CoreGene(self.model_location, "sctJ", self.profile_factory)

        gene_1 = ModelGene(c_gene_1, model)
        gene_2 = ModelGene(c_gene_2, model)
        gene_3 = ModelGene(c_gene_3, model)

        #     Hit(gene, model, hit_id, hit_seq_length, replicon_name, position, i_eval, score,
        #         profile_coverage, sequence_coverage, begin_match, end_match
        h10 = Hit(c_gene_1, "h10", 10, "replicon_1", 10, 1.0, 10.0, 1.0, 1.0, 10, 20)
        v_h10 = ValidHit(h10, gene_1, GeneStatus.MANDATORY)
        h20 = Hit(c_gene_2, "h20", 10, "replicon_1", 20, 1.0, 20.0, 1.0, 1.0, 10, 20)
        v_h20 = ValidHit(h20, gene_2, GeneStatus.MANDATORY)
        h30 = Hit(c_gene_3, "h30", 10, "replicon_1", 30, 1.0, 30.0, 1.0, 1.0, 10, 20)
        v_h30 = ValidHit(h30, gene_3, GeneStatus.ACCESSORY)
        h50 = Hit(c_gene_3, "h50", 10, "replicon_1", 50, 1.0, 50.0, 1.0, 1.0, 10, 20)
        v_h50 = ValidHit(h50, gene_3, GeneStatus.ACCESSORY)

        c1 = Cluster([v_h10, v_h20], model, self.hit_weights)
        c2 = Cluster([v_h30, v_h50], model, self.hit_weights)
        c1.merge(c2)
        self.assertListEqual(c1.hits, [v_h10, v_h20, v_h30, v_h50])

        c1 = Cluster([v_h10, v_h20], model, self.hit_weights)
        c2 = Cluster([v_h30, v_h50], model, self.hit_weights)
        c2.merge(c1)
        self.assertListEqual(c2.hits, [v_h30, v_h50, v_h10, v_h20])

        c1 = Cluster([v_h10, v_h20], model, self.hit_weights)
        c2 = Cluster([v_h30, v_h50], model, self.hit_weights)
        c1.merge(c2, before=True)
        self.assertListEqual(c1.hits, [v_h30, v_h50, v_h10, v_h20])

        model_2 = Model("foo/T3SS", 11)
        c_gene_3 = CoreGene(self.model_location, "sctJ", self.profile_factory)
        gene_3 = ModelGene(c_gene_3, model)

        h30 = Hit(c_gene_3, "h30", 10, "replicon_2", 30, 1.0, 30.0, 1.0, 1.0, 10, 20)
        v_h30 = ValidHit(h30, gene_3, GeneStatus.ACCESSORY)
        h50 = Hit(c_gene_3, "h50", 10, "replicon_2", 50, 1.0, 50.0, 1.0, 1.0, 10, 20)
        v_h50 = ValidHit(h50, gene_3, GeneStatus.ACCESSORY)
        c3 = Cluster([v_h30, v_h50], model_2, self.hit_weights)
        with self.assertRaises(MacsypyError) as ctx:
            c1.merge(c3)
        self.assertEqual(str(ctx.exception), "Try to merge Clusters from different model")
Exemple #13
0
 def test_accessor_mutator(self):
     model = Model("foo", 10)
     gene_name = 'sctJ_FLG'
     c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
     gene = ModelGene(c_gene, model)
     categories = set(model.gene_category)
     for cat in categories:
         other_cat = categories - {cat}
         getattr(model, f'add_{cat}_gene')(gene)
         self.assertEqual(getattr(model, f'{cat}_genes'), [gene])
         for other in other_cat:
             self.assertEqual(getattr(model, f'{other}_genes'), [])
         # don't forget to reset the model to avoid
         # to accumulate genes
         model = Model("foo", 10)
Exemple #14
0
    def test_add_get_gene(self):
        gene_name = 'sctJ_FLG'
        with self.assertRaises(KeyError) as ctx:
            self.gene_bank[f"foo/{gene_name}"]
        self.assertEqual(str(ctx.exception),
                         f"\"No such gene 'foo/{gene_name}' in this bank\"")
        model_foo = Model(self.model_name, 10)

        self.gene_bank.add_new_gene(self.model_location, gene_name,
                                    self.profile_factory)

        gene_from_bank = self.gene_bank[(model_foo.family_name, gene_name)]
        self.assertTrue(isinstance(gene_from_bank, CoreGene))
        self.assertEqual(gene_from_bank.name, gene_name)
        gbk_contains_before = list(self.gene_bank)
        self.gene_bank.add_new_gene(self.model_location, gene_name,
                                    self.profile_factory)
        gbk_contains_after = list(self.gene_bank)
        self.assertEqual(gbk_contains_before, gbk_contains_after)

        gene_name = "bar"
        with self.assertRaises(MacsypyError) as ctx:
            self.gene_bank.add_new_gene(self.model_location, gene_name,
                                        self.profile_factory)
        self.assertEqual(str(ctx.exception),
                         f"'{self.model_name}/{gene_name}': No such profile")
Exemple #15
0
    def test_execute_hmm_with_GA(self):
        for db_type in ("gembase", "ordered_replicon", "unordered"):
            self.cfg._set_db_type(db_type)
            model = Model("foo/T2SS", 10)

            gene_name = 'T5aSS_PF03797'
            c_gene = CoreGene(self.model_location, gene_name,
                              self.profile_factory)
            gene = ModelGene(c_gene, model)

            # case GA threshold in profile
            profile_path = self.model_location.get_profile("T5aSS_PF03797")
            profile = Profile(gene, self.cfg, profile_path)
            report = profile.execute()
            hmmer_raw_out = profile.hmm_raw_output
            with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
                first_l = hmmer_raw_out_file.readline()
                # a hmmsearch output file has been produced
                self.assertTrue(
                    first_l.startswith(
                        "# hmmsearch :: search profile(s) against a sequence database"
                    ))
                for i in range(5):
                    # skip 4 lines
                    l = hmmer_raw_out_file.readline()
                # a hmmsearch used the abc profile line should become with: "# query HMM file: {the path tp hmm profile used}"
                self.assertTrue(l.find(profile_path) != -1)
                for i in range(3):
                    # skip 2 lines
                    l = hmmer_raw_out_file.readline()
                self.assertEqual(
                    "# model-specific thresholding:     GA cutoffs", l.strip())
            # test if profile is executed only once per run
            report_bis = profile.execute()
            self.assertIs(report, report_bis)
    def test_contains(self):
        model = Model("foo/T2SS", 11)

        c_gene_1 = CoreGene(self.model_location, "gspD", self.profile_factory)
        c_gene_2 = CoreGene(self.model_location, "sctC", self.profile_factory)
        c_gene_3 = CoreGene(self.model_location, "sctJ", self.profile_factory)

        gene_1 = ModelGene(c_gene_1, model)
        gene_2 = ModelGene(c_gene_2, model)
        gene_3 = ModelGene(c_gene_3, model)

        #     Hit(gene, model, hit_id, hit_seq_length, replicon_name, position, i_eval, score,
        #         profile_coverage, sequence_coverage, begin_match, end_match
        h10 = Hit(c_gene_1, "h10", 10, "replicon_1", 10, 1.0, 10.0, 1.0, 1.0, 10, 20)
        v_h10 = ValidHit(h10, gene_1, GeneStatus.MANDATORY)
        h20 = Hit(c_gene_2, "h20", 10, "replicon_1", 20, 1.0, 20.0, 1.0, 1.0, 10, 20)
        v_h20 = ValidHit(h20, gene_2, GeneStatus.MANDATORY)
        h30 = Hit(c_gene_3, "h30", 10, "replicon_1", 30, 1.0, 30.0, 1.0, 1.0, 10, 20)
        v_h30 = ValidHit(h30, gene_3, GeneStatus.ACCESSORY)
        h50 = Hit(c_gene_3, "h50", 10, "replicon_1", 50, 1.0, 50.0, 1.0, 1.0, 10, 20)
        v_h50 = ValidHit(h50, gene_3, GeneStatus.ACCESSORY)
        c1 = Cluster([v_h10, v_h20, v_h50], model, self.hit_weights)

        self.assertTrue(v_h10 in c1)
        self.assertFalse(v_h30 in c1)
Exemple #17
0
    def test_execute_hmm_protected_path(self):
        # create a hmmdir with space in name
        self.cfg.hmmer_dir = lambda: 'hmmer results'
        # create sequence_db path with space in path
        seq_path = os.path.join(self.cfg.working_dir(), "test test1.fasta")
        shutil.copyfile(self.find_data("base", "test_1.fasta"), seq_path)
        self.cfg._set_sequence_db(seq_path)

        model = Model("foo/T2SS", 10)
        gene_name = 'T5aSS_PF03797'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        # case GA threshold in profile
        profile_path = self.model_location.get_profile("T5aSS_PF03797")
        profile = Profile(gene, self.cfg, profile_path)
        report = profile.execute()
        hmmer_raw_out = profile.hmm_raw_output
        with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
            first_l = hmmer_raw_out_file.readline()
            # a hmmsearch output file has been produced
            self.assertTrue(
                first_l.startswith(
                    "# hmmsearch :: search profile(s) against a sequence database"
                ))
            for i in range(5):
                # skip 4 lines
                l = hmmer_raw_out_file.readline()
            # a hmmsearch used the abc profile line should become with: "# query HMM file: {the path tp hmm profile used}"
            self.assertTrue(l.find(profile_path) != -1)
            for i in range(3):
                # skip 2 lines
                l = hmmer_raw_out_file.readline()
            self.assertEqual("# model-specific thresholding:     GA cutoffs",
                             l.strip())
    def test_fulfilled_function(self):
        model = Model("foo/T2SS", 11)

        c_gene_1 = CoreGene(self.model_location, "gspD", self.profile_factory)
        c_gene_2 = CoreGene(self.model_location, "sctC", self.profile_factory)
        c_gene_3 = CoreGene(self.model_location, "sctJ", self.profile_factory)
        c_gene_4 = CoreGene(self.model_location, "sctJ_FLG", self.profile_factory)

        gene_1 = ModelGene(c_gene_1, model)
        gene_2 = ModelGene(c_gene_2, model)
        gene_3 = ModelGene(c_gene_3, model)
        gene_4 = Exchangeable(c_gene_4, gene_3)
        gene_3.add_exchangeable(gene_4)

        #     Hit(gene, model, hit_id, hit_seq_length, replicon_name, position, i_eval, score,
        #         profile_coverage, sequence_coverage, begin_match, end_match
        h10 = Hit(c_gene_1, "h10", 10, "replicon_1", 10, 1.0, 10.0, 1.0, 1.0, 10, 20)
        v_h10 = ValidHit(h10, gene_1, GeneStatus.MANDATORY)
        h20 = Hit(c_gene_2, "h20", 10, "replicon_1", 20, 1.0, 20.0, 1.0, 1.0, 10, 20)
        v_h20 = ValidHit(h20, gene_2, GeneStatus.MANDATORY)

        c = Cluster([v_h10, v_h20], model, self.hit_weights)

        self.assertTrue(c.fulfilled_function(gene_1))
        self.assertFalse(c.fulfilled_function(gene_3))

        h50 = Hit(c_gene_4, "h50", 10, "replicon_1", 50, 1.0, 50.0, 1.0, 1.0, 10, 20)
        v_h50 = ValidHit(h50, gene_4, GeneStatus.ACCESSORY)
        c = Cluster([v_h10, v_h50], model, self.hit_weights)
        self.assertTrue(c.fulfilled_function(gene_3))
Exemple #19
0
    def test_execute_hmmer_failed(self):
        fake_hmmer = os.path.join(tempfile.gettempdir(), 'hmmer_failed')
        with open(fake_hmmer, 'w') as hmmer:
            hmmer.write("""#! {}
import sys
sys.exit(127)
""".format(sysconfig.sys.executable))
        try:
            os.chmod(hmmer.name, 0o755)
            self.cfg._options['hmmer'] = hmmer.name
            model = Model("foo/T2SS", 10)

            gene_name = 'abc'
            c_gene = CoreGene(self.model_location, gene_name,
                              self.profile_factory)
            gene = ModelGene(c_gene, model)

            path = self.model_location.get_profile("abc", )
            profile = Profile(gene, self.cfg, path)
            with self.catch_log():
                with self.assertRaisesRegex(
                        RuntimeError, "an error occurred during Hmmer "
                        "execution: command = .* : return code = 127 .*"
                ) as ctx:
                    profile.execute()

        finally:
            try:
                os.unlink(fake_hmmer)
            except Exception:
                pass
Exemple #20
0
    def test_execute_hmm_w_GA_n_nocutga(self):
        # case GA threshold in profile but --no-cut-ga is set
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.res_search_dir = tempfile.gettempdir()
        args.log_level = 0
        args.e_value_search = 0.5
        args.no_cut_ga = True
        cfg = Config(MacsyDefaults(), args)

        model = Model("foo/T2SS", 10)
        gene_name = 'T5aSS_PF03797'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)
        profile_path = self.model_location.get_profile("T5aSS_PF03797")
        profile = Profile(gene, cfg, profile_path)
        report = profile.execute()
        hmmer_raw_out = profile.hmm_raw_output
        with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
            for i in range(9):
                l = hmmer_raw_out_file.readline()
            self.assertEqual(
                "# sequence reporting threshold:    E-value <= 0.5", l.strip())
Exemple #21
0
    def test_execute_hmm_wo_GA(self):
        # case cut-ga but no GA threshold in hmmprofile
        model = Model("foo/T2SS", 10)
        gene_name = 'abc'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        # case -cut-ga and GA threshold in profile
        profile_path = self.model_location.get_profile("abc")
        profile = Profile(gene, self.cfg, profile_path)

        with self.catch_log() as log:
            report = profile.execute()

        hmmer_raw_out = profile.hmm_raw_output
        with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
            first_l = hmmer_raw_out_file.readline()
            # a hmmsearch output file has been produced
            self.assertTrue(
                first_l.startswith(
                    "# hmmsearch :: search profile(s) against a sequence database"
                ))
            for i in range(5):
                # skip 4 lines
                l = hmmer_raw_out_file.readline()
            # a hmmsearch used the abc profile line should become with: "# query HMM file: {the path tp hmm profile used}"
            self.assertTrue(l.find(profile_path) != -1)
            for i in range(3):
                # skip 2 lines
                l = hmmer_raw_out_file.readline()
            self.assertEqual(
                '# sequence reporting threshold:    E-value <= 0.1', l.strip())
Exemple #22
0
    def test_str(self):
        """
        """
        model_foo = Model("foo", 10)

        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ_FLG = ModelGene(c_gene, model_foo)

        gene_name = 'sctJ'
        c_sctJ = CoreGene(self.model_location, gene_name, self.profile_factory)
        homolog = Exchangeable(c_sctJ, sctJ_FLG)
        sctJ_FLG.add_exchangeable(homolog)

        gene_name = 'sctN'
        c_sctN = CoreGene(self.model_location, gene_name, self.profile_factory)
        analog = Exchangeable(c_sctN, sctJ_FLG)
        sctJ_FLG.add_exchangeable(analog)
        s = """name : sctJ_FLG
inter_gene_max_space: 10
    exchangeables: sctJ, sctN"""
        self.assertEqual(str(sctJ_FLG), s)

        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ_FLG = ModelGene(c_gene,
                             model_foo,
                             loner=True,
                             multi_system=True,
                             inter_gene_max_space=10)
        s = """name : sctJ_FLG
inter_gene_max_space: 10
loner
multi_system"""
        self.assertEqual(str(sctJ_FLG), s)
Exemple #23
0
    def test_inter_gene_max_space(self):
        model_fqn = 'foo/bar'
        inter_gene_max_space_xml = 40
        # test inter_gene_max_space from xml
        model = Model(model_fqn, inter_gene_max_space_xml)
        self.assertEqual(model.inter_gene_max_space, inter_gene_max_space_xml)

        self.clean_working_dir()
Exemple #24
0
    def test_SystemSerializer_tsv(self):
        model = Model("foo/T2SS", 10)
        c_gene_gspd = CoreGene(self.model_location, "gspD",
                               self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        c_gene_sctj = CoreGene(self.model_location, "sctJ",
                               self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        c_gene_sctn = CoreGene(self.model_location, "sctN",
                               self.profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model)
        c_gene_sctn_flg = CoreGene(self.model_location, "sctN_FLG",
                                   self.profile_factory)
        gene_sctn_flg = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_flg)
        model.add_accessory_gene(gene_sctn)

        h_gspd = Hit(c_gene_gspd, "h_gspd", 803, "replicon_id", 10, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        v_h_gspd = ValidHit(h_gspd, gene_gspd, GeneStatus.MANDATORY)
        h_sctj = Hit(c_gene_sctj, "h_sctj", 803, "replicon_id", 20, 1.0, 1.0,
                     1.0, 1.0, 20, 30)
        v_h_sctj = ValidHit(h_sctj, gene_sctj, GeneStatus.ACCESSORY)
        h_sctn_flg = Hit(c_gene_sctn_flg, "h_sctn_flg", 803, "replicon_id", 30,
                         1.0, 1.0, 1.0, 1.0, 30, 40)
        v_h_sctn_flg = ValidHit(h_sctn_flg, gene_sctn_flg,
                                GeneStatus.ACCESSORY)
        c1 = Cluster([v_h_gspd, v_h_sctj], model, self.hit_weights)
        c2 = Cluster([v_h_sctn_flg], model, self.hit_weights)
        sys_multi_loci = System(model, [c1, c2], self.cfg.redundancy_penalty())
        hit_multi_sys_tracker = HitSystemTracker([sys_multi_loci])
        system_serializer = TsvSystemSerializer()

        sys_tsv = "\t".join([
            "replicon_id", "h_gspd", "gspD", "10", "foo/T2SS",
            sys_multi_loci.id, "1", "1.000", "1.900", "1", "gspD", "mandatory",
            "803", "1.0", "1.000", "1.000", "1.000", "10", "20", ""
        ])
        sys_tsv += "\n"
        sys_tsv += "\t".join([
            "replicon_id", "h_sctj", "sctJ", "20", "foo/T2SS",
            sys_multi_loci.id, "1", "1.000", "1.900", "1", "sctJ", "accessory",
            "803", "1.0", "1.000", "1.000", "1.000", "20", "30", ""
        ])
        sys_tsv += "\n"
        sys_tsv += "\t".join([
            "replicon_id", "h_sctn_flg", "sctN_FLG", "30", "foo/T2SS",
            sys_multi_loci.id, "1", "1.000", "1.900", "1", "sctN", "accessory",
            "803", "1.0", "1.000", "1.000", "1.000", "30", "40", ""
        ])
        sys_tsv += "\n"
        self.assertEqual(
            sys_tsv,
            system_serializer.serialize(sys_multi_loci, hit_multi_sys_tracker))
Exemple #25
0
 def test_unknown_attribute(self):
     model_foo = Model("foo", 10)
     gene_name = 'sctJ_FLG'
     c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
     gene = ModelGene(c_gene, model_foo)
     with self.assertRaises(AttributeError) as ctx:
         gene.foo
     self.assertEqual(str(ctx.exception), "'ModelGene' object has no attribute 'foo'")
Exemple #26
0
    def test_multi_loci(self):
        model_fqn = 'foo/True'
        inter_gene_max_space = 40
        model = Model(model_fqn, inter_gene_max_space, multi_loci=True)
        self.assertTrue(model.multi_loci)
        model_fqn = 'foo/False'
        inter_gene_max_space = 40
        model = Model(model_fqn, inter_gene_max_space)
        self.assertFalse(model.multi_loci)

        self.clean_working_dir()

        self.args.multi_loci = 'foo/False'

        model_fqn = 'foo/False'
        inter_gene_max_space = 40
        model = Model(model_fqn, inter_gene_max_space, multi_loci=False)
        self.assertFalse(model.multi_loci)
Exemple #27
0
 def test_model(self):
     """
     test getter/setter for model property
     """
     model_foo = Model("foo", 10)
     gene_name = 'sctJ_FLG'
     c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
     sctJ_FLG = ModelGene(c_gene, model_foo)
     self.assertEqual(sctJ_FLG.model, model_foo)
Exemple #28
0
    def test_get_best_hits_4_func(self):
        model = Model("foo/T2SS", 10)
        gene_name = "gspD"
        c_gene_gspd = CoreGene(self.models_location, gene_name, self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model, loner=True)

        #        gene, model, id,            hit_seq_len, replicon_name, position, i_eval,
        #        score,      profil_coverage,      sequence_coverage,     begin,end
        ######################
        # based on the score #
        ######################
        h0 = CoreHit(gene_gspd, "PSAE001c01_006940", 803, "PSAE001c01", 3450, float(1.2e-234),
                     10, float(1.000000), (741.0 - 104.0 + 1) / 803, 104, 741)
        h1 = CoreHit(gene_gspd, "PSAE001c01_013980", 759, "PSAE001c01", 3450, float(3.7e-76),
                     11, float(1.000000), (736.0 - 105.0 + 1) / 759, 105, 736)
        m0 = ModelHit(h0, gene_gspd, GeneStatus.ACCESSORY)
        m1 = ModelHit(h1, gene_gspd, GeneStatus.ACCESSORY)
        l0 = Loner(h0, gene_ref=gene_gspd, gene_status=GeneStatus.ACCESSORY, counterpart=[m1])
        l1 = Loner(h1, gene_ref=gene_gspd, gene_status=GeneStatus.ACCESSORY, counterpart=[m0])
        l = get_best_hit_4_func(gene_name, [l0, l1])
        self.assertEqual(l, l1)

        #######################
        # based on the i_eval #
        #######################
        h0 = CoreHit(gene_gspd, "PSAE001c01_006940", 803, "PSAE001c01", 3450, 10,
                     10, float(1.000000), (741.0 - 104.0 + 1) / 803, 104, 741)
        h1 = CoreHit(gene_gspd, "PSAE001c01_013980", 759, "PSAE001c01", 3450, 11,
                     10, float(1.000000), (736.0 - 105.0 + 1) / 759, 105, 736)
        m0 = ModelHit(h0, gene_gspd, GeneStatus.ACCESSORY)
        m1 = ModelHit(h1, gene_gspd, GeneStatus.ACCESSORY)
        l0 = Loner(h0, gene_ref=gene_gspd, gene_status=GeneStatus.ACCESSORY, counterpart=[m1])
        l1 = Loner(h1, gene_ref=gene_gspd, gene_status=GeneStatus.ACCESSORY, counterpart=[m0])

        l = get_best_hit_4_func(gene_name, [l0, l1], key='i_eval')
        self.assertEqual(l, l0)

        #################################
        # based on the profile_coverage #
        #################################
        h0 = CoreHit(gene_gspd, "PSAE001c01_006940", 803, "PSAE001c01", 3450, 10,
                     10, 10, (741.0 - 104.0 + 1) / 803, 104, 741)
        h1 = CoreHit(gene_gspd, "PSAE001c01_013980", 759, "PSAE001c01", 3450, 10,
                     10, 11, (736.0 - 105.0 + 1) / 759, 105, 736)
        m0 = ModelHit(h0, gene_gspd, GeneStatus.ACCESSORY)
        m1 = ModelHit(h1, gene_gspd, GeneStatus.ACCESSORY)
        l0 = Loner(h0, gene_ref=gene_gspd, gene_status=GeneStatus.ACCESSORY, counterpart=[m1])
        l1 = Loner(h1, gene_ref=gene_gspd, gene_status=GeneStatus.ACCESSORY, counterpart=[m0])

        l = get_best_hit_4_func(gene_name, [l0, l1], key='profile_coverage')
        self.assertEqual(l, l1)

        # bad criterion
        with self.assertRaises(MacsypyError) as ctx:
            get_best_hits([l0, l1], key='nimportnaoik')
        self.assertEqual('The criterion for Hits comparison nimportnaoik does not exist or is not available.\n'
                         'It must be either "score", "i_eval" or "profile_coverage".', str(ctx.exception))
Exemple #29
0
 def test_init(self):
     model_foo = Model("foo", 10)
     gene_name = 'sctJ_FLG'
     c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
     gene_1 = ModelGene(c_gene, model_foo)
     with self.assertRaises(MacsypyError) as ctx:
         ModelGene(gene_1, model_foo)
     self.assertEqual(str(ctx.exception),
                      "The ModeleGene gene argument must be a CoreGene not <class 'macsypy.gene.ModelGene'>.")
Exemple #30
0
    def test_str(self):
        model_fqn = "foo/bar"
        model = Model(model_fqn, 10)
        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        mandatory_gene = ModelGene(c_gene, model)
        model.add_mandatory_gene(mandatory_gene)
        homolog_name = 'sctJ'
        c_gene_homolg = CoreGene(self.model_location, homolog_name, self.profile_factory)
        homolog = Exchangeable(c_gene_homolg, mandatory_gene)
        mandatory_gene.add_exchangeable(homolog)

        gene_name = 'sctN_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        accessory_gene = ModelGene(c_gene, model)
        model.add_accessory_gene(accessory_gene)
        analog_name = 'sctN'
        c_gene_analog = CoreGene(self.model_location, analog_name, self.profile_factory)
        analog = Exchangeable(c_gene_analog, accessory_gene)
        accessory_gene.add_exchangeable(analog)

        gene_name = 'toto'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        neutral_gene = ModelGene(c_gene, model)
        model.add_neutral_gene(neutral_gene)

        gene_name = 'sctC'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        forbidden_gene = ModelGene(c_gene, model)
        model.add_forbidden_gene(forbidden_gene)

        exp_str = """name: bar
fqn: foo/bar
==== mandatory genes ====
sctJ_FLG
==== accessory genes ====
sctN_FLG
==== neutral genes ====
toto
==== forbidden genes ====
sctC
============== end pprint model ================
"""
        self.assertEqual(str(model), exp_str)