Exemple #1
0
class TestCoreGene(MacsyTest):

    def setUp(self):
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.res_search_dir = tempfile.gettempdir()
        args.log_level = 30
        self.cfg = Config(MacsyDefaults(), args)
        self.model_name = 'foo'
        self.model_location = ModelLocation(path=os.path.join(args.models_dir, self.model_name))
        self.profile_factory = ProfileFactory(self.cfg)


    def tearDown(self):
        try:
            shutil.rmtree(self.cfg.working_dir())
        except:
            pass

    def test_core_gene(self):
        model_fqn = "foo/bar"
        model = Model(model_fqn, 10)
        gene_name = 'toto'
        cg = CoreGene(self.model_location, gene_name, self.profile_factory)
        self.assertEqual(cg.name, gene_name)
        self.assertEqual(cg.model_family_name, model.family_name)
        self.assertEqual(cg.profile, self.profile_factory.get_profile(cg, self.model_location))
        cg2 = CoreGene(self.model_location, gene_name, self.profile_factory)
        self.assertTrue(isinstance(hash(cg), int))
        self.assertEqual(hash(cg), hash(cg2))
        gene_name = 'totote'
        cg3 = CoreGene(self.model_location, gene_name, self.profile_factory)
        self.assertNotEqual(hash(cg), hash(cg3))
class TestReport(MacsyTest):
    def setUp(self):
        args = argparse.Namespace()
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.res_search_dir = tempfile.gettempdir()
        args.log_level = 30
        args.out_dir = os.path.join(args.res_search_dir,
                                    'test_macsyfinder_Report')
        if os.path.exists(args.out_dir):
            shutil.rmtree(args.out_dir)
        os.mkdir(args.out_dir)

        seq_db = self.find_data("base", "test_base.fa")
        shutil.copy(seq_db, args.out_dir)
        args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db))
        self.cfg = Config(MacsyDefaults(), args)

        os.mkdir(os.path.join(self.cfg.out_dir(), self.cfg.hmmer_dir()))

        self.model_name = 'foo'
        self.model_location = ModelLocation(
            path=os.path.join(args.models_dir, self.model_name))

        # we need to reset the ProfileFactory
        # because it's a like a singleton
        # so other tests are influenced by ProfileFactory and it's configuration
        # for instance search_genes get profile without hmmer_exe
        self.profile_factory = ProfileFactory(self.cfg)

        idx = Indexes(self.cfg)
        idx.build()

    def tearDown(self):
        try:
            shutil.rmtree(self.cfg.working_dir())
        except Exception:
            pass
 def test_working_dir(self):
     cfg = Config(self.defaults, self.parsed_args)
     self.assertEqual(cfg.out_dir(), cfg.working_dir())
class TestModelParser(MacsyTest):
    def setUp(self):
        defaults = MacsyDefaults()
        self.args = argparse.Namespace()
        self.args.sequence_db = self.find_data("base", "test_1.fasta")
        self.args.db_type = 'gembase'
        self.args.models_dir = self.find_data('models')
        self.args.res_search_dir = tempfile.gettempdir()

        self.cfg = Config(defaults, self.args)
        self.model_bank = ModelBank()
        self.gene_bank = GeneBank()
        self.profile_factory = ProfileFactory(self.cfg)
        self.model_registry = ModelRegistry()
        models_location = scan_models_dir(self.args.models_dir)
        for ml in models_location:
            self.model_registry.add(ml)
        self.parser = DefinitionParser(self.cfg, self.model_bank,
                                       self.gene_bank, self.model_registry,
                                       self.profile_factory)

    def tearDown(self):
        try:
            shutil.rmtree(self.cfg.working_dir())
        except:
            pass

    def test_parse_with_exchangeable(self):
        model_name = 'model_1'
        model_family = 'foo'
        fqn = f"{model_family}/{model_name}"
        #def_2_parse = set()
        #def_2_parse.add(fqn)
        models_2_detect = [self.model_registry['foo'].get_definition(fqn)]
        self.parser.parse(models_2_detect)
        self.assertEqual(len(self.model_bank), 1)

        m1 = self.model_bank[fqn]
        self.assertEqual(m1.name, model_name)
        self.assertEqual(m1.fqn, fqn)
        self.assertEqual(m1.inter_gene_max_space, 20)
        self.assertEqual(m1.min_mandatory_genes_required, 2)
        self.assertEqual(m1.min_genes_required, 4)
        self.assertTrue(m1.multi_loci)

        self.assertEqual(len(m1.mandatory_genes), 2)
        mandatory_genes_name = sorted([g.name for g in m1.mandatory_genes])
        theoric_list = sorted(["sctJ_FLG", "sctN_FLG"])
        self.assertListEqual(mandatory_genes_name, theoric_list)

        self.assertEqual(len(m1.accessory_genes), 2)
        accessory_genes_name = sorted([g.name for g in m1.accessory_genes])
        theoric_list = sorted(["flgB", "flgC"])
        self.assertListEqual(accessory_genes_name, theoric_list)

        self.assertEqual(len(m1.neutral_genes), 2)
        neutral_genes_name = sorted([g.name for g in m1.neutral_genes])
        theoric_list = sorted(["fliE", "tadZ"])
        self.assertListEqual(neutral_genes_name, theoric_list)

        self.assertEqual(len(m1.forbidden_genes), 1)
        forbidden_genes_name = sorted([g.name for g in m1.forbidden_genes])
        theoric_list = sorted(["sctC"])
        self.assertListEqual(forbidden_genes_name, theoric_list)

        sctJ_FLG = m1.get_gene('sctJ_FLG')
        sctJ_FLG_exchangeables = sctJ_FLG.exchangeables
        self.assertEqual(len(sctJ_FLG_exchangeables), 2)
        self.assertEqual(sctJ_FLG_exchangeables[0].name, 'sctJ')
        self.assertEqual(sctJ_FLG_exchangeables[1].name, 'abc')
        self.assertTrue(isinstance(sctJ_FLG_exchangeables[0], Exchangeable))
        self.assertTrue(isinstance(sctJ_FLG_exchangeables[0]._gene, CoreGene))
        self.assertTrue(
            isinstance(sctJ_FLG_exchangeables[0].alternate_of(), ModelGene))
        self.assertTrue(sctJ_FLG_exchangeables[0].loner)
        self.assertFalse(sctJ_FLG.is_exchangeable)
        sctJ = m1.get_gene('sctJ')
        self.assertTrue(sctJ.is_exchangeable)

    def test_exchangeable_inheritance(self):
        def_2_parse = set()
        def_2_parse.add('foo/model_1')
        models_2_detect = [
            self.model_registry['foo'].get_definition('foo/model_1')
        ]
        self.parser.parse(models_2_detect)
        m1 = self.model_bank['foo/model_1']

        sctJ = m1.get_gene('sctJ')
        self.assertTrue(sctJ.is_exchangeable)
        self.assertTrue(sctJ.loner)
        self.assertTrue(sctJ.multi_system)
        self.assertFalse(sctJ.multi_model)
        sctJ_FLG = m1.get_gene('sctJ_FLG')
        self.assertTrue(sctJ_FLG.multi_system)
        abc = m1.get_gene('abc')
        self.assertFalse(abc.multi_system)

        sctN = m1.get_gene('sctN')
        sctN_FLG = m1.get_gene('sctN_FLG')

        self.assertFalse(sctN_FLG.loner)
        self.assertTrue(sctN.loner)
        self.assertIsNone(sctN_FLG.inter_gene_max_space)
        self.assertEqual(sctN.inter_gene_max_space, 10)
        self.assertFalse(sctN_FLG.multi_model)
        self.assertFalse(sctN.multi_model)
        gspD = m1.get_gene('gspD')
        self.assertFalse(sctN_FLG.multi_system)
        self.assertTrue(gspD.multi_model)
        self.assertTrue(gspD.multi_system)

    def test_model_w_unkown_attr(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition(
                'foo/model_w_unknown_attribute')
        ]
        with self.assertRaises(MacsypyError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            "unable to parse model definition 'foo/model_w_unknown_attribute' : "
            "The model definition model_w_unknown_attribute.xml has an unknow attribute 'multi-loci'. "
            "Please fix the definition.")

    def test_gene_w_unkown_attr(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition(
                'foo/gene_w_unknown_attribute')
        ]
        with self.assertRaises(MacsypyError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            "unable to parse model definition 'foo/gene_w_unknown_attribute' : "
            "The model definition gene_w_unknown_attribute.xml has an unknown attribute 'multi-system' for a gene."
            " Please fix the definition.")

    def test_wo_presence(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition('foo/fail_wo_presence')
        ]
        with self.assertRaises(SyntaxError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            "Invalid model definition 'foo/fail_wo_presence': gene 'sctN_FLG' without presence"
        )

    def test_invalid_presence(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition(
                'foo/fail_invalid_presence')
        ]
        with self.assertRaises(SyntaxError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            "Invalid model 'fail_invalid_presence' definition: presence value must be either: "
            "'mandatory', 'accessory', 'neutral', 'forbidden' not foo_bar")

    def test_gene_no_name(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition('foo/gene_no_name')
        ]
        with self.assertRaises(SyntaxError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            "Invalid model definition 'foo/gene_no_name': gene without name")

    def test_invalid_homolog(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition('foo/invalid_homolog')
        ]
        with self.assertRaises(MacsypyError) as context:
            self.parser.parse(model_2_detect)
        self.assertEqual(str(context.exception),
                         "'foo/foo_bar': No such profile")

    def test_invalid_homolog_2(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition('foo/invalid_homolog_2')
        ]
        with self.assertRaises(SyntaxError) as ctx:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(ctx.exception),
            "Invalid model definition 'foo/invalid_homolog_2': gene without name"
        )

    def test_bad_min_genes_required(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition(
                'foo/bad_min_genes_required')
        ]
        with self.assertRaises(ModelInconsistencyError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            'model \'bad_min_genes_required\' is not consistent: min_genes_required 16 must be lesser '
            'or equal than the number of "accessory" and "mandatory" components in the model: 6'
        )

    def test_bad_min_genes_required_2(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition(
                'foo/bad_min_genes_required_2')
        ]
        with self.catch_log():
            with self.assertRaisesRegex(
                    SyntaxError, "Invalid model definition (.*): "
                    "min_genes_required must be an integer: 16.5"):
                self.parser.parse(model_2_detect)

    def test_bad_min_mandatory_genes_required(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition(
                'foo/bad_min_mandatory_genes_required')
        ]
        with self.catch_log():
            with self.assertRaises(ModelInconsistencyError) as context:
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            'model \'bad_min_mandatory_genes_required\' is not consistent: min_genes_required 16 must '
            'be lesser or equal than the number of "accessory" and "mandatory" components in the model: 6'
        )

    def test_bad_min_mandatory_genes_required_2(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition(
                'foo/bad_min_mandatory_genes_required_2')
        ]
        with self.assertRaises(ModelInconsistencyError) as context:
            with self.catch_log():
                # error raised by System initialization
                # which occur before check_consistency
                # the last test : not(model.min_mandatory_genes_required <= model.min_genes_required)
                # seems to be useless
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            "foo/bad_min_mandatory_genes_required_2: min_genes_required '6' must be greater or equal"
            " than min_mandatory_genes_required '8'")

    def test_bad_min_mandatory_genes_required_4(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition(
                'foo/bad_min_mandatory_genes_required_4')
        ]
        with self.assertRaisesRegex(
                SyntaxError, "Invalid model definition (.*): "
                "min_mandatory_genes_required must be an integer: 12.5"):
            with self.catch_log():
                self.parser.parse(model_2_detect)

    def test_min_mandatory_genes_required_lesser_than_mandatory_genes(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition(
                'foo/bad_min_mandatory_genes_required_3')
        ]
        with self.assertRaises(ModelInconsistencyError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            "model 'bad_min_mandatory_genes_required_3' is not consistent:"
            " 'min_mandatory_genes_required': 6 must be lesser or equal than the number of 'mandatory' "
            "components in the model: 5")

    def test_only_one_accessory(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition('foo/only_one_accessory')
        ]
        with self.assertRaises(ModelInconsistencyError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(str(context.exception),
                         f"model 'only_one_accessory' is not consistent: there is only one gene in your model. " \
                         f"So its status should be 'mandatory'.")

    def test_bad_max_nb_genes(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition('foo/bad_max_nb_genes')
        ]
        with self.assertRaises(SyntaxError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        model_name, def_name = model_2_detect[0].split_fqn(
            model_2_detect[0].fqn)
        self.assertEqual(
            str(context.exception),
            "Invalid model definition ({0}.xml): max_nb_genes must be an integer: HOHOHO"
            .format(
                os.path.join(self.cfg.models_dir()[0], model_name,
                             'definitions', def_name)))

    def test_bad_inter_gene_max_space(self):
        fqn = 'foo/bad_inter_gene_max_space'
        model_family, model_name = fqn.split('/')
        model_2_detect = [self.model_registry['foo'].get_definition(fqn)]
        with self.assertRaises(SyntaxError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            "Invalid model definition ({}): inter_gene_max_space must be an integer: 12.5"
            .format(
                os.path.join(self.cfg.models_dir()[0], model_family,
                             'definitions', model_name + ".xml")))

    def test_no_inter_gene_max_space(self):
        model_2_detect = [
            self.model_registry['foo'].get_definition(
                'foo/no_inter_gene_max_space')
        ]
        with self.assertRaises(SyntaxError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)

        self.assertEqual(
            str(context.exception),
            "Invalid model definition ({}): inter_gene_max_space must be defined"
            .format(
                os.path.join(self.cfg.models_dir()[0],
                             "foo/definitions/no_inter_gene_max_space.xml")))

    def test_loner(self):
        model_fqn = 'foo/model_5'
        model_2_detect = [self.model_registry['foo'].get_definition(model_fqn)]
        self.parser.parse(model_2_detect)

        m5 = self.model_bank[model_fqn]
        m5_flgC = m5.get_gene('flgC')
        self.assertFalse(m5_flgC.loner)
        m5_tadZ = m5.get_gene('tadZ')
        self.assertTrue(m5_tadZ.loner)

        model_fqn = 'foo/model_6'
        model_2_detect = [self.model_registry['foo'].get_definition(model_fqn)]
        self.parser.parse(model_2_detect)
        m6 = self.model_bank[model_fqn]
        m6_flgC = m6.get_gene('flgC')
        self.assertFalse(m6_flgC.loner)
        m6_tadZ = m6.get_gene('tadZ')
        self.assertFalse(m6_tadZ.loner)

    def test_multi_system(self):
        model_fqn = 'foo/model_5'
        model_2_detect = [self.model_registry['foo'].get_definition(model_fqn)]
        self.parser.parse(model_2_detect)

        m = self.model_bank[model_fqn]
        flgC = m.get_gene('flgC')
        self.assertFalse(flgC.multi_system)
        fliE = m.get_gene('fliE')
        self.assertTrue(fliE.multi_system)

    def test_multi_model(self):
        model_fqn = 'foo/model_5'
        model_2_detect = [self.model_registry['foo'].get_definition(model_fqn)]
        self.parser.parse(model_2_detect)

        m = self.model_bank[model_fqn]
        flgC = m.get_gene('flgC')
        self.assertFalse(flgC.multi_model)
        abc = m.get_gene('abc')
        self.assertTrue(abc.multi_model)

    def test_gene_inter_gene_max_space(self):
        model_fqn = ['foo/model_5', 'foo/model_6']
        models_2_detect = [
            self.model_registry['foo'].get_definition(fqn) for fqn in model_fqn
        ]
        self.parser.parse(models_2_detect)

        m5 = self.model_bank['foo/model_5']
        self.assertEqual(m5.name, 'model_5')
        self.assertEqual(m5.fqn, 'foo/model_5')
        self.assertEqual(m5.inter_gene_max_space, 20)
        m5_flgB = m5.get_gene('flgB')
        m5_flgC = m5.get_gene('flgC')
        self.assertIsNone(m5_flgB.inter_gene_max_space)
        self.assertEqual(m5_flgC.inter_gene_max_space, 2)
        m6 = self.model_bank['foo/model_6']
        m6_flgC = m6.get_gene('flgC')
        self.assertEqual(m6_flgC.inter_gene_max_space, 12)

    def test_inter_gene_max_space_cfg(self):
        # test inter_gene_max_space is specified from configuration
        # so this value must overload the value read from xml
        model_fqn = 'foo/model_5'

        inter_gene_max_space_cfg = [[model_fqn, '222']]
        self.args.inter_gene_max_space = inter_gene_max_space_cfg

        self.cfg = Config(MacsyDefaults(), self.args)
        self.model_bank = ModelBank()
        self.gene_bank = GeneBank()
        self.model_registry = ModelRegistry()
        models_location = scan_models_dir(self.args.models_dir)
        for ml in models_location:
            self.model_registry.add(ml)
        self.parser = DefinitionParser(self.cfg, self.model_bank,
                                       self.gene_bank, self.model_registry,
                                       self.profile_factory)

        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        self.parser.parse(models_2_detect)
        m = self.model_bank[model_fqn]
        self.assertEqual(m.inter_gene_max_space, 222)

    def test_min_mandatory_genes_required_cfg(self):
        # test min_mandatory_genes_required is specified from configuration
        # so this value must overload the value read from xml
        model_fqn = 'foo/model_5'

        min_mandatory_genes_required = [[model_fqn, '3']]
        self.args.min_mandatory_genes_required = min_mandatory_genes_required

        self.cfg = Config(MacsyDefaults(), self.args)
        self.model_bank = ModelBank()
        self.gene_bank = GeneBank()
        self.model_registry = ModelRegistry()
        models_location = scan_models_dir(self.args.models_dir)
        for ml in models_location:
            self.model_registry.add(ml)
        self.parser = DefinitionParser(self.cfg, self.model_bank,
                                       self.gene_bank, self.model_registry,
                                       self.profile_factory)

        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        self.parser.parse(models_2_detect)
        m = self.model_bank[model_fqn]
        self.assertEqual(m.min_mandatory_genes_required, 3)

    def test_min_genes_required_cfg(self):
        # test min_genes_required is specified from configuration
        # so this value must overload the value read from xml
        def_2_parse = set()
        model_fqn = 'foo/model_5'
        def_2_parse.add(model_fqn)
        parsed = set()

        min_genes_required = [[model_fqn, '4']]
        self.args.min_genes_required = min_genes_required

        self.cfg = Config(MacsyDefaults(), self.args)
        self.model_bank = ModelBank()
        self.gene_bank = GeneBank()
        self.model_registry = ModelRegistry()
        models_location = scan_models_dir(self.args.models_dir)
        for ml in models_location:
            self.model_registry.add(ml)
        self.parser = DefinitionParser(self.cfg, self.model_bank,
                                       self.gene_bank, self.model_registry,
                                       self.profile_factory)

        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        self.parser.parse(models_2_detect)
        m = self.model_bank[model_fqn]
        self.assertEqual(m.min_genes_required, 4)

    def test_max_nb_genes_cfg(self):
        self.model_bank = ModelBank()
        self.gene_bank = GeneBank()
        self.model_registry = ModelRegistry()
        models_location = scan_models_dir(self.args.models_dir)
        for ml in models_location:
            self.model_registry.add(ml)

        # max_nb_genes is specified in xml
        # no user configuration on this
        self.cfg = Config(MacsyDefaults(), self.args)
        model_fqn = 'foo/model_6'  # 4 genes in this model but xml specify 3
        self.cfg = Config(MacsyDefaults(), self.args)
        self.parser = DefinitionParser(self.cfg, self.model_bank,
                                       self.gene_bank, self.model_registry,
                                       self.profile_factory)

        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        self.parser.parse(models_2_detect)
        m = self.model_bank[model_fqn]
        self.assertEqual(m.max_nb_genes, 3)

        # max_nb_genes is specified from configuration
        # so this value must overload the value read from xml
        model_fqn = 'foo/model_5'  # 4 genes in this model
        max_nb_genes = [[model_fqn, '6']]
        self.args.max_nb_genes = max_nb_genes
        self.cfg = Config(MacsyDefaults(), self.args)
        self.parser = DefinitionParser(self.cfg, self.model_bank,
                                       self.gene_bank, self.model_registry,
                                       self.profile_factory)

        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        self.parser.parse(models_2_detect)
        m = self.model_bank[model_fqn]
        self.assertEqual(m.max_nb_genes, 6)

    def test_multi_loci_cfg(self):
        # test multi_loci is specified from configuration
        # so this value must overload the value read from xml
        model_fqn = 'foo/model_5'

        self.args.multi_loci = model_fqn

        self.cfg = Config(MacsyDefaults(), self.args)
        self.model_bank = ModelBank()
        self.gene_bank = GeneBank()
        self.model_registry = ModelRegistry()
        models_location = scan_models_dir(self.args.models_dir)
        for ml in models_location:
            self.model_registry.add(ml)
        self.parser = DefinitionParser(self.cfg, self.model_bank,
                                       self.gene_bank, self.model_registry,
                                       self.profile_factory)

        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        self.parser.parse(models_2_detect)
        m = self.model_bank[model_fqn]
        self.assertTrue(m.multi_loci)

    def test_bad_gene_inter_gene_max_space_2(self):
        model_fqn = 'foo/bad_inter_gene_max_space_2'
        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        with self.assertRaises(SyntaxError) as ctx:
            with self.catch_log():
                self.parser.parse(models_2_detect)

        self.assertEqual(
            str(ctx.exception),
            "Invalid model definition 'bad_inter_gene_max_space_2': "
            "inter_gene_max_space must be an integer: 2.5")

    def test_bad_exchangeable_inter_gene_max_space(self):
        fqn = 'foo/bad_exchangeable_inter_gene_max_space'
        model_2_detect = [self.model_registry['foo'].get_definition(fqn)]
        with self.assertRaises(SyntaxError) as context:
            with self.catch_log():
                self.parser.parse(model_2_detect)
        self.assertEqual(
            str(context.exception),
            "Invalid model definition 'bad_exchangeable_inter_gene_max_space': "
            "inter_gene_max_space must be an integer: 1.5")

    def test_parse_model_old_syntax(self):
        # the attribute vers is not set
        model_fqn = 'foo/model_old_1'
        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        with self.catch_log(log_name='macsypy') as log:
            with self.assertRaises(MacsypyError) as ctx:
                self.parser.parse(models_2_detect)
            log_msg = log.get_value().strip()
        self.assertEqual(
            log_msg, "unable to parse model definition 'foo/model_old_1' : "
            "The model definition model_old_1.xml is not versioned. Please update your model."
        )

        # the root is system instead of mmodel
        model_fqn = 'foo/model_old_2'
        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        with self.catch_log(log_name='macsypy') as log:
            with self.assertRaises(MacsypyError) as ctx:
                self.parser.parse(models_2_detect)
            log_msg = log.get_value().strip()
        self.assertEqual(
            log_msg, f"unable to parse model definition '{model_fqn}' : "
            "The model definition model_old_2.xml is obsolete. Please update your model."
        )

        # there still system_ref attribute
        model_fqn = 'foo/model_old_3'
        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        with self.catch_log(log_name='macsypy') as log:
            with self.assertRaises(MacsypyError) as ctx:
                self.parser.parse(models_2_detect)
            log_msg = log.get_value().strip()
        self.assertEqual(
            log_msg, f"unable to parse model definition '{model_fqn}' : "
            "The model definition model_old_3.xml is obsolete. Please update your model."
        )

        # there still homologs tag
        model_fqn = 'foo/model_old_4'
        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        with self.catch_log(log_name='macsypy') as log:
            with self.assertRaises(MacsypyError) as ctx:
                self.parser.parse(models_2_detect)
            log_msg = log.get_value().strip()
        self.assertEqual(
            log_msg, f"unable to parse model definition '{model_fqn}' : "
            "The model definition model_old_4.xml is obsolete. Please update your model."
        )

        # there still analogs tag
        model_fqn = 'foo/model_old_5'
        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        with self.catch_log(log_name='macsypy') as log:
            with self.assertRaises(MacsypyError) as ctx:
                self.parser.parse(models_2_detect)
            log_msg = log.get_value().strip()
        self.assertEqual(
            log_msg, f"unable to parse model definition '{model_fqn}' : "
            "The model definition model_old_5.xml is obsolete. Please update your model."
        )
Exemple #5
0
class TestProfile(MacsyTest):
    def setUp(self):
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.res_search_dir = tempfile.gettempdir()
        args.log_level = 50
        self.cfg = Config(MacsyDefaults(), args)

        if os.path.exists(self.cfg.working_dir()):
            shutil.rmtree(self.cfg.working_dir())
        os.makedirs(self.cfg.working_dir())

        self.model_name = 'foo'
        self.model_location = ModelLocation(
            path=os.path.join(args.models_dir, self.model_name))
        self.profile_factory = ProfileFactory(self.cfg)

    def tearDown(self):
        try:
            shutil.rmtree(self.cfg.working_dir())
        except:
            pass

    def test_len(self):
        model = Model("foo/T2SS", 10)

        gene_name = 'abc'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        path = self.model_location.get_profile("abc")
        profile = Profile(gene, self.cfg, path)
        self.assertEqual(len(profile), 501)

    def test_ga_threshold(self):
        # No GA threshold
        model = Model("foo/T2SS", 10)

        gene_name = 'abc'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)
        path = self.model_location.get_profile(gene_name)
        profile = Profile(gene, self.cfg, path)
        self.assertFalse(profile.ga_threshold)

        # GA threshold line ends with ;
        gene_name = 'T5aSS_PF03797'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)
        path = self.model_location.get_profile(gene_name)
        profile = Profile(gene, self.cfg, path)
        self.assertTrue(profile.ga_threshold)

        # GA threshold line do NOT ends with ;
        gene_name = 'PF05930.13'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)
        path = self.model_location.get_profile(gene_name)
        profile = Profile(gene, self.cfg, path)
        self.assertTrue(profile.ga_threshold)

        # GA threshold invalid format string instead float
        gene_name = 'bad_GA'
        with self.catch_log(log_name='macsypy'):
            # When a CoreGene is created a Profile is automatically instanciated
            # So I mute the log to do not polute output
            c_gene = CoreGene(self.model_location, gene_name,
                              self.profile_factory)
        gene = ModelGene(c_gene, model)
        path = self.model_location.get_profile(gene_name)

        with self.catch_log(log_name='macsypy') as log:
            profile = Profile(gene, self.cfg, path)
            catch_msg = log.get_value().strip()
        self.assertFalse(profile.ga_threshold)
        self.assertEqual(
            catch_msg,
            "bad_GA GA score is not well formatted expected 2 floats got ''22.00'' ''23.00''.\n"
            "GA score will not used for gene 'bad_GA'.")

        # GA threshold invalid format only one score
        gene_name = 'bad_GA_2'
        with self.catch_log(log_name='macsypy'):
            # When a CoreGene is created a Profile is automatically instanciated
            # So I mute the log to do not polute output
            c_gene = CoreGene(self.model_location, gene_name,
                              self.profile_factory)
        gene = ModelGene(c_gene, model)
        path = self.model_location.get_profile(gene_name)

        with self.catch_log(log_name='macsypy') as log:
            profile = Profile(gene, self.cfg, path)
            catch_msg = log.get_value().strip()
        self.assertFalse(profile.ga_threshold)
        self.assertEqual(
            catch_msg,
            "bad_GA_2 GA score is not well formatted. expected: 'GA float float' got 'GA    22.00'.\n"
            "GA score will not used for gene 'bad_GA_2'.")

    def test_str(self):
        model = Model("foo/T2SS", 10)

        gene_name = 'abc'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        path = self.model_location.get_profile("abc")
        profile = Profile(gene, self.cfg, path)
        s = "{0} : {1}".format(gene.name, path)
        self.assertEqual(str(profile), s)

    @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH')
    def test_execute_hmm_with_GA(self):
        for db_type in ("gembase", "ordered_replicon", "unordered"):
            self.cfg._set_db_type(db_type)
            model = Model("foo/T2SS", 10)

            gene_name = 'T5aSS_PF03797'
            c_gene = CoreGene(self.model_location, gene_name,
                              self.profile_factory)
            gene = ModelGene(c_gene, model)

            # case GA threshold in profile
            profile_path = self.model_location.get_profile("T5aSS_PF03797")
            profile = Profile(gene, self.cfg, profile_path)
            report = profile.execute()
            hmmer_raw_out = profile.hmm_raw_output
            with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
                first_l = hmmer_raw_out_file.readline()
                # a hmmsearch output file has been produced
                self.assertTrue(
                    first_l.startswith(
                        "# hmmsearch :: search profile(s) against a sequence database"
                    ))
                for i in range(5):
                    # skip 4 lines
                    l = hmmer_raw_out_file.readline()
                # a hmmsearch used the abc profile line should become with: "# query HMM file: {the path tp hmm profile used}"
                self.assertTrue(l.find(profile_path) != -1)
                for i in range(3):
                    # skip 2 lines
                    l = hmmer_raw_out_file.readline()
                self.assertEqual(
                    "# model-specific thresholding:     GA cutoffs", l.strip())
            # test if profile is executed only once per run
            report_bis = profile.execute()
            self.assertIs(report, report_bis)

    @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH')
    def test_execute_hmm_protected_path(self):
        # create a hmmdir with space in name
        self.cfg.hmmer_dir = lambda: 'hmmer results'
        # create sequence_db path with space in path
        seq_path = os.path.join(self.cfg.working_dir(), "test test1.fasta")
        shutil.copyfile(self.find_data("base", "test_1.fasta"), seq_path)
        self.cfg._set_sequence_db(seq_path)

        model = Model("foo/T2SS", 10)
        gene_name = 'T5aSS_PF03797'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        # case GA threshold in profile
        profile_path = self.model_location.get_profile("T5aSS_PF03797")
        profile = Profile(gene, self.cfg, profile_path)
        report = profile.execute()
        hmmer_raw_out = profile.hmm_raw_output
        with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
            first_l = hmmer_raw_out_file.readline()
            # a hmmsearch output file has been produced
            self.assertTrue(
                first_l.startswith(
                    "# hmmsearch :: search profile(s) against a sequence database"
                ))
            for i in range(5):
                # skip 4 lines
                l = hmmer_raw_out_file.readline()
            # a hmmsearch used the abc profile line should become with: "# query HMM file: {the path tp hmm profile used}"
            self.assertTrue(l.find(profile_path) != -1)
            for i in range(3):
                # skip 2 lines
                l = hmmer_raw_out_file.readline()
            self.assertEqual("# model-specific thresholding:     GA cutoffs",
                             l.strip())

    @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH')
    def test_execute_hmm_w_GA_n_nocutga(self):
        # case GA threshold in profile but --no-cut-ga is set
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.res_search_dir = tempfile.gettempdir()
        args.log_level = 0
        args.e_value_search = 0.5
        args.no_cut_ga = True
        cfg = Config(MacsyDefaults(), args)

        model = Model("foo/T2SS", 10)
        gene_name = 'T5aSS_PF03797'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)
        profile_path = self.model_location.get_profile("T5aSS_PF03797")
        profile = Profile(gene, cfg, profile_path)
        report = profile.execute()
        hmmer_raw_out = profile.hmm_raw_output
        with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
            for i in range(9):
                l = hmmer_raw_out_file.readline()
            self.assertEqual(
                "# sequence reporting threshold:    E-value <= 0.5", l.strip())

    @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH')
    def test_execute_hmm_wo_GA(self):
        # case cut-ga but no GA threshold in hmmprofile
        model = Model("foo/T2SS", 10)
        gene_name = 'abc'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        # case -cut-ga and GA threshold in profile
        profile_path = self.model_location.get_profile("abc")
        profile = Profile(gene, self.cfg, profile_path)

        with self.catch_log() as log:
            report = profile.execute()

        hmmer_raw_out = profile.hmm_raw_output
        with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
            first_l = hmmer_raw_out_file.readline()
            # a hmmsearch output file has been produced
            self.assertTrue(
                first_l.startswith(
                    "# hmmsearch :: search profile(s) against a sequence database"
                ))
            for i in range(5):
                # skip 4 lines
                l = hmmer_raw_out_file.readline()
            # a hmmsearch used the abc profile line should become with: "# query HMM file: {the path tp hmm profile used}"
            self.assertTrue(l.find(profile_path) != -1)
            for i in range(3):
                # skip 2 lines
                l = hmmer_raw_out_file.readline()
            self.assertEqual(
                '# sequence reporting threshold:    E-value <= 0.1', l.strip())

    def test_execute_unknown_binary(self):
        self.cfg._options['hmmer'] = "Nimportnaoik"
        model = Model("foo/T2SS", 10)

        gene_name = 'abc'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        path = self.model_location.get_profile("abc", )
        profile = Profile(gene, self.cfg, path)
        with self.catch_log():
            with self.assertRaises(RuntimeError):
                profile.execute()

    def test_execute_hmmer_failed(self):
        fake_hmmer = os.path.join(tempfile.gettempdir(), 'hmmer_failed')
        with open(fake_hmmer, 'w') as hmmer:
            hmmer.write("""#! {}
import sys
sys.exit(127)
""".format(sysconfig.sys.executable))
        try:
            os.chmod(hmmer.name, 0o755)
            self.cfg._options['hmmer'] = hmmer.name
            model = Model("foo/T2SS", 10)

            gene_name = 'abc'
            c_gene = CoreGene(self.model_location, gene_name,
                              self.profile_factory)
            gene = ModelGene(c_gene, model)

            path = self.model_location.get_profile("abc", )
            profile = Profile(gene, self.cfg, path)
            with self.catch_log():
                with self.assertRaisesRegex(
                        RuntimeError, "an error occurred during Hmmer "
                        "execution: command = .* : return code = 127 .*"
                ) as ctx:
                    profile.execute()

        finally:
            try:
                os.unlink(fake_hmmer)
            except Exception:
                pass
Exemple #6
0
class TestModelGene(MacsyTest):
    def setUp(self):
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.res_search_dir = tempfile.gettempdir()
        args.log_level = 30
        self.cfg = Config(MacsyDefaults(), args)
        self.model_name = 'foo'
        self.model_location = ModelLocation(
            path=os.path.join(args.models_dir, self.model_name))
        self.profile_factory = ProfileFactory(self.cfg)

    def tearDown(self):
        try:
            shutil.rmtree(self.cfg.working_dir())
        except:
            pass

    def test_init(self):
        model_foo = Model("foo", 10)
        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene_1 = ModelGene(c_gene, model_foo)
        with self.assertRaises(MacsypyError) as ctx:
            ModelGene(gene_1, model_foo)
        self.assertEqual(
            str(ctx.exception),
            "The ModeleGene gene argument must be a CoreGene not <class 'macsypy.gene.ModelGene'>."
        )

    def test_hash(self):
        model_foo = Model("foo", 10)
        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene_1 = ModelGene(c_gene, model_foo)
        gene_2 = ModelGene(c_gene, model_foo)

        self.assertTrue(isinstance(hash(gene_1), int))
        self.assertEqual(hash(gene_1), hash(gene_1))
        self.assertNotEqual(hash(gene_1), hash(gene_2))

    def test_unknown_attribute(self):
        model_foo = Model("foo", 10)
        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model_foo)
        with self.assertRaises(AttributeError) as ctx:
            gene.foo
        self.assertEqual(str(ctx.exception),
                         "'ModelGene' object has no attribute 'foo'")

    def test_add_exchangeable(self):
        model_foo = Model("foo", 10)
        gene_name = 'sctJ'
        c_gene_ref = CoreGene(self.model_location, gene_name,
                              self.profile_factory)
        gene_ref = ModelGene(c_gene_ref, model_foo)

        h_gene_name = 'sctJ_FLG'
        h_c_gene = CoreGene(self.model_location, h_gene_name,
                            self.profile_factory)

        homolog = Exchangeable(h_c_gene, gene_ref)
        gene_ref.add_exchangeable(homolog)
        self.assertEqual(len(gene_ref.exchangeables), 1)
        self.assertEqual(gene_ref.exchangeables[0], homolog)

    def test_exhangeables(self):
        model_foo = Model("foo", 10)

        gene_name = 'sctN'
        c_sctn = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctn = ModelGene(c_sctn, model_foo)

        gene_name = 'sctJ_FLG'
        c_sctJ_FLG = CoreGene(self.model_location, gene_name,
                              self.profile_factory)

        gene_name = 'sctJ'
        c_sctJ = CoreGene(self.model_location, gene_name, self.profile_factory)

        homolog_1 = Exchangeable(c_sctJ, sctn)
        sctn.add_exchangeable(homolog_1)
        homolog_2 = Exchangeable(c_sctJ_FLG, sctn)
        sctn.add_exchangeable(homolog_2)
        self.assertEqual(sctn.exchangeables, [homolog_1, homolog_2])

    def test_is_exchangeable(self):
        model_foo = Model("foo", 10)

        gene_name = 'sctN'
        c_sctn = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctn = ModelGene(c_sctn, model_foo)

        gene_name = 'sctJ_FLG'
        c_sctj_flg = CoreGene(self.model_location, gene_name,
                              self.profile_factory)
        sctj_flg = ModelGene(c_sctj_flg, model_foo)

        gene_name = 'sctJ'
        c_sctj = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctj = ModelGene(c_sctj, model_foo)
        homolog = Exchangeable(c_sctj_flg, sctj)
        sctj.add_exchangeable(homolog)

        self.assertFalse(sctj_flg.is_exchangeable)
        self.assertFalse(sctj.is_exchangeable)
        self.assertTrue(homolog.is_exchangeable)
        self.assertFalse(sctn.is_exchangeable)

    def test_alternate_of(self):
        model_foo = Model("foo", 10)

        gene_name = 'sctJ'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctj = ModelGene(c_gene, model_foo)

        gene_name = 'sctJ_FLG'
        c_sctj_flg = CoreGene(self.model_location, gene_name,
                              self.profile_factory)
        analog = Exchangeable(c_sctj_flg, sctj)
        sctj.add_exchangeable(analog)
        self.assertEqual(sctj.alternate_of(), sctj)

    def test_model(self):
        """
        test getter/setter for model property
        """
        model_foo = Model("foo", 10)
        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ_FLG = ModelGene(c_gene, model_foo)
        self.assertEqual(sctJ_FLG.model, model_foo)

    def test_loner(self):
        """
        test getter for loner property
        """
        model_foo = Model("foo", 10)
        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ_FLG = ModelGene(c_gene, model_foo)
        self.assertFalse(sctJ_FLG.loner)

        gene_name = 'sctJ'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ = ModelGene(c_gene, model_foo, loner=True)
        self.assertTrue(sctJ.loner)

    def test_is_mandatory(self):
        """
        test if gene belong to model mandatory genes
        """
        model_foo = Model("foo", 10)

        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ_FLG = ModelGene(c_gene, model_foo)
        model_foo.add_mandatory_gene(sctJ_FLG)
        self.assertTrue(sctJ_FLG.is_mandatory(model_foo))

        gene_name = 'sctJ'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ = ModelGene(c_gene, model_foo)
        model_foo.add_accessory_gene(sctJ)
        self.assertFalse(sctJ.is_mandatory(model_foo))

    def test_is_accessory(self):
        """
        test if gene belong to model mandatory genes
        """
        model_foo = Model("foo", 10)

        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ_FLG = ModelGene(c_gene, model_foo)
        model_foo.add_mandatory_gene(sctJ_FLG)
        self.assertFalse(sctJ_FLG.is_accessory(model_foo))

        gene_name = 'sctJ'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ = ModelGene(c_gene, model_foo)
        model_foo.add_accessory_gene(sctJ)
        self.assertTrue(sctJ.is_accessory(model_foo))

    def test_is_Forbidden(self):
        """
        test if gene belong to model mandatory genes
        """
        model_foo = Model("foo", 10)
        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ_FLG = ModelGene(c_gene, model_foo)
        model_foo.add_mandatory_gene(sctJ_FLG)
        self.assertFalse(sctJ_FLG.is_forbidden(model_foo))

        gene_name = 'sctJ'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ = ModelGene(c_gene, model_foo)
        model_foo.add_forbidden_gene(sctJ)
        self.assertTrue(sctJ.is_forbidden(model_foo))

    def test_multi_system(self):
        """
        test getter for multi_system property
        """
        model_foo = Model("foo", 10)

        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ_FLG = ModelGene(c_gene, model_foo)
        self.assertFalse(sctJ_FLG.multi_system)

        gene_name = 'sctJ'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ = ModelGene(c_gene, model_foo, multi_system=True)
        self.assertTrue(sctJ.multi_system)

    def test_inter_gene_max_space(self):
        """
        test getter for inter_gene_max_space property
        """
        system_inter_gene_max_space = 40
        gene_inter_gene_max_space = 50
        model_foo = Model("foo", system_inter_gene_max_space)

        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ_FLG = ModelGene(c_gene, model_foo)
        self.assertEqual(sctJ_FLG.inter_gene_max_space,
                         system_inter_gene_max_space)

        gene_name = 'sctJ'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ = ModelGene(c_gene,
                         model_foo,
                         inter_gene_max_space=gene_inter_gene_max_space)
        self.assertEqual(sctJ.inter_gene_max_space, gene_inter_gene_max_space)

    def test_str(self):
        """
        """
        model_foo = Model("foo", 10)

        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ_FLG = ModelGene(c_gene, model_foo)

        gene_name = 'sctJ'
        c_sctJ = CoreGene(self.model_location, gene_name, self.profile_factory)
        homolog = Exchangeable(c_sctJ, sctJ_FLG)
        sctJ_FLG.add_exchangeable(homolog)

        gene_name = 'sctN'
        c_sctN = CoreGene(self.model_location, gene_name, self.profile_factory)
        analog = Exchangeable(c_sctN, sctJ_FLG)
        sctJ_FLG.add_exchangeable(analog)
        s = """name : sctJ_FLG
inter_gene_max_space: 10
    exchangeables: sctJ, sctN"""
        self.assertEqual(str(sctJ_FLG), s)

        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ_FLG = ModelGene(c_gene,
                             model_foo,
                             loner=True,
                             multi_system=True,
                             inter_gene_max_space=10)
        s = """name : sctJ_FLG
inter_gene_max_space: 10
loner
multi_system"""
        self.assertEqual(str(sctJ_FLG), s)
class Test(MacsyTest):


    def setUp(self):
        args = argparse.Namespace()

        args.db_type = 'gembase'
        args.e_value_res = 1
        args.i_evalue_sel = 0.5
        args.models_dir = self.find_data('models')
        args.res_search_suffix = ''
        args.log_level = 30

        args.out_dir = os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes')
        if os.path.exists(args.out_dir):
            shutil.rmtree(os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes'))
        os.makedirs(args.out_dir)
        seq_db = self.find_data("base", "test_1.fasta")
        shutil.copy(seq_db, args.out_dir)

        args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db))

        self.cfg = Config(MacsyDefaults(), args)


    def tearDown(self):
        try:
            shutil.rmtree(self.cfg.working_dir())
        except:
            pass

    def test_find_my_indexes(self):
        idx = Indexes(self.cfg)
        self.assertIsNone(idx.find_my_indexes())
        new_idx = os.path.join(os.path.dirname(self.cfg.sequence_db()), idx.name + ".idx")
        with open(new_idx, 'w'):
            pass
        self.assertEqual(idx.find_my_indexes(), new_idx)

    def test_build_no_idx(self):
        idx = Indexes(self.cfg)
        idx.build()
        my_idx = idx.find_my_indexes()
        self.assertEqual(my_idx, os.path.join(os.path.dirname(self.cfg.sequence_db()), idx.name + ".idx"))


    def test_build_with_idx(self):
        idx = Indexes(self.cfg)
        open(os.path.join(os.path.dirname(self.cfg.sequence_db()), idx.name + ".idx"), 'w').close()
        idx.build()
        my_idx = idx.find_my_indexes()
        self.assertEqual(os.path.getsize(my_idx), 0)


    def test_build_force(self):
        idx = Indexes(self.cfg)
        idx.build(force=True)
        my_idx = idx.find_my_indexes()
        self.assertNotEqual(os.path.getsize(my_idx), 0)


    @unittest.skipIf(platform.system() == 'Windows' or os.getuid() == 0, 'Skip test on Windows or if run as root')
    def test_build_not_writable(self):
        # Skip test on Windows, since setting the folder permissions is not affecting files inside
        # in Singularity container tess are run as root and this test as non sense
        idx = Indexes(self.cfg)
        idx_dir = os.path.join(os.path.dirname(self.cfg.sequence_db()))
        os.chmod(idx_dir, 0000)
        try:
            with self.assertRaises(IOError) as ctx:
                with self.catch_log():
                    idx.build()
            self.assertRegex(str(ctx.exception),
                             "cannot build indexes, \(.+/test_macsyfinder_indexes\) is not writable")
        finally:
            os.chmod(idx_dir, 0o777)


    def test_build_my_indexes(self):
        args = argparse.Namespace()

        args.db_type = 'gembase'
        args.e_value_res = 1
        args.i_evalue_sel = 0.5
        args.models_dir = self.find_data('models')
        args.res_search_suffix = ''
        args.log_level = 30

        args.out_dir = os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes')
        if os.path.exists(args.out_dir):
            shutil.rmtree(os.path.join(tempfile.gettempdir(), 'test_macsyfinder_indexes'))
        os.makedirs(args.out_dir)
        seq_db = self.find_data("base", "test_base_with_errors.fa")
        shutil.copy(seq_db, args.out_dir)
        args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db))
        cfg = Config(MacsyDefaults(), args)

        idx = Indexes(cfg)
        with self.assertRaises(MacsypyError) as e:
            with self.catch_log():
                idx._build_my_indexes()
        self.assertTrue(str(e.exception).startswith("unable to index the sequence dataset:"))
Exemple #8
0
class TestModel(MacsyTest):

    def setUp(self):
        self.args = argparse.Namespace()
        self.args.sequence_db = self.find_data("base", "test_1.fasta")
        self.args.db_type = 'gembase'
        self.args.models_dir = self.find_data('models')
        self.args.res_search_dir = tempfile.gettempdir()
        self.args.log_level = 30
        self.args.out_dir = os.path.join(self.args.res_search_dir,
                                         'test_macsyfinder_Model')
        if os.path.exists(self.args.out_dir):
            shutil.rmtree(self.args.out_dir)
        os.mkdir(self.args.out_dir)

        self.cfg = Config(MacsyDefaults(), self.args)
        self.model_name = 'foo'
        self.model_location = ModelLocation(path=os.path.join(self.args.models_dir, self.model_name))
        self.profile_factory = ProfileFactory(self.cfg)


    def tearDown(self):
        self.clean_working_dir()

    def clean_working_dir(self):
        try:
            shutil.rmtree(self.cfg.working_dir())
        except:
            pass

    def test_fqn(self):
        fqn = 'foo/bla'
        model = Model(fqn, 10)
        self.assertEqual(model.fqn, fqn)

        self.assertEqual(model.name, 'bla')


    def test_inter_gene_max_space(self):
        model_fqn = 'foo/bar'
        inter_gene_max_space_xml = 40
        # test inter_gene_max_space from xml
        model = Model(model_fqn, inter_gene_max_space_xml)
        self.assertEqual(model.inter_gene_max_space, inter_gene_max_space_xml)

        self.clean_working_dir()


    def test_min_genes_required(self):
        model_fqn = 'foo/model_1'
        min_genes_required_xml = 40
        model = Model(model_fqn, 10, min_genes_required=min_genes_required_xml)
        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)
        model.add_mandatory_gene(gene)
        self.assertEqual(model.min_genes_required, min_genes_required_xml)
        model = Model(model_fqn, 10)
        self.assertEqual(model.min_genes_required, len(model.mandatory_genes))

        self.clean_working_dir()


    def test_min_mandatory_genes_required(self):
        model_fqn = 'foo/bar'
        min_mandatory_genes_required_xml = 40
        model = Model(model_fqn, 10, min_mandatory_genes_required=min_mandatory_genes_required_xml)
        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)
        model.add_mandatory_gene(gene)
        self.assertEqual(model.min_mandatory_genes_required, min_mandatory_genes_required_xml)

        system = Model(model_fqn, 10)
        self.assertEqual(system.min_mandatory_genes_required, len(system.mandatory_genes))

        self.clean_working_dir()


    def test_max_nb_genes(self):
        model_fqn = 'foo/bar'
        inter_gene_max_space = 40
        max_nb_genes_xml = 10
        model = Model(model_fqn, inter_gene_max_space, max_nb_genes=max_nb_genes_xml)
        self.assertEqual(model.max_nb_genes, max_nb_genes_xml)

        model = Model(model_fqn, inter_gene_max_space)
        self.assertEqual(model.max_nb_genes, 0)

        c_gene_sctc = CoreGene(self.model_location, "sctC", self.profile_factory)
        gene_sctc = ModelGene(c_gene_sctc, model)

        c_gene_abc = CoreGene(self.model_location, "abc", self.profile_factory)
        gene_abc = ModelGene(c_gene_abc, model)

        model.add_mandatory_gene(gene_sctc)
        model.add_accessory_gene(gene_abc)
        self.assertEqual(model.max_nb_genes, 2)
        self.clean_working_dir()


    def test_multi_loci(self):
        model_fqn = 'foo/True'
        inter_gene_max_space = 40
        model = Model(model_fqn, inter_gene_max_space, multi_loci=True)
        self.assertTrue(model.multi_loci)
        model_fqn = 'foo/False'
        inter_gene_max_space = 40
        model = Model(model_fqn, inter_gene_max_space)
        self.assertFalse(model.multi_loci)

        self.clean_working_dir()

        self.args.multi_loci = 'foo/False'

        model_fqn = 'foo/False'
        inter_gene_max_space = 40
        model = Model(model_fqn, inter_gene_max_space, multi_loci=False)
        self.assertFalse(model.multi_loci)


    def test_accessor_mutator(self):
        model = Model("foo", 10)
        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)
        categories = set(model.gene_category)
        for cat in categories:
            other_cat = categories - {cat}
            getattr(model, f'add_{cat}_gene')(gene)
            self.assertEqual(getattr(model, f'{cat}_genes'), [gene])
            for other in other_cat:
                self.assertEqual(getattr(model, f'{other}_genes'), [])
            # don't forget to reset the model to avoid
            # to accumulate genes
            model = Model("foo", 10)

    def test_get_gene(self):
        model = Model("foo", 10)
        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)
        for meth in [getattr(model, f'add_{cat}_gene') for cat in model.gene_category]:
            for cat in model.gene_category:
                setattr(model, f'_{cat}_genes', [])
            meth(gene)
            self.assertEqual(gene, model.get_gene(gene_name))

        self.assertRaises(KeyError, model.get_gene, 'bar')

        homolog_name = 'sctJ'
        c_gene_homolog = CoreGene(self.model_location, homolog_name, self.profile_factory)
        homolog = Exchangeable(c_gene_homolog, gene)
        gene.add_exchangeable(homolog)
        for meth in [getattr(model, f'add_{cat}_gene') for cat in model.gene_category]:
            for cat in model.gene_category:
                setattr(model, f'_{cat}_genes', [])
            meth(gene)
            self.assertEqual(homolog, model.get_gene(homolog_name))


    def test_str(self):
        model_fqn = "foo/bar"
        model = Model(model_fqn, 10)
        gene_name = 'sctJ_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        mandatory_gene = ModelGene(c_gene, model)
        model.add_mandatory_gene(mandatory_gene)
        homolog_name = 'sctJ'
        c_gene_homolg = CoreGene(self.model_location, homolog_name, self.profile_factory)
        homolog = Exchangeable(c_gene_homolg, mandatory_gene)
        mandatory_gene.add_exchangeable(homolog)

        gene_name = 'sctN_FLG'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        accessory_gene = ModelGene(c_gene, model)
        model.add_accessory_gene(accessory_gene)
        analog_name = 'sctN'
        c_gene_analog = CoreGene(self.model_location, analog_name, self.profile_factory)
        analog = Exchangeable(c_gene_analog, accessory_gene)
        accessory_gene.add_exchangeable(analog)

        gene_name = 'toto'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        neutral_gene = ModelGene(c_gene, model)
        model.add_neutral_gene(neutral_gene)

        gene_name = 'sctC'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        forbidden_gene = ModelGene(c_gene, model)
        model.add_forbidden_gene(forbidden_gene)

        exp_str = """name: bar
fqn: foo/bar
==== mandatory genes ====
sctJ_FLG
==== accessory genes ====
sctN_FLG
==== neutral genes ====
toto
==== forbidden genes ====
sctC
============== end pprint model ================
"""
        self.assertEqual(str(model), exp_str)

    def test_eq(self):
        aa1 = Model("aaa", 10)
        aa2 = Model("aaa", 10)
        self.assertEqual(aa1, aa2)

    def test_lt(self):
        aaa = Model("aaa", 10)
        zzz = Model("zzz", 10)
        self.assertLess(aaa, zzz)

    def test_gt(self):
        aaa = Model("aaa", 10)
        zzz = Model("zzz", 10)
        self.assertGreater(zzz, aaa)

    def test_filter(self):
        model_fqn = "foo/bar"
        model = Model(model_fqn, 10)
        model_2 = Model("foo/buz", 10)

        gene_name = 'sctJ_FLG'
        sctJ_FLG_core = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctJ_FLG = ModelGene(sctJ_FLG_core, model)
        model.add_mandatory_gene(sctJ_FLG)

        gene_name = 'sctJ'
        sctJ_core = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctj = Exchangeable(sctJ_core, sctJ_FLG)
        sctJ_FLG.add_exchangeable(sctj)

        gene_name = 'sctN_FLG'
        sctN_FLG_core = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctN_FLG = ModelGene(sctN_FLG_core, model)
        model.add_accessory_gene(sctN_FLG)

        gene_name = 'sctN'
        sctN_core = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctn = Exchangeable(sctN_core, sctN_FLG)
        sctN_FLG.add_exchangeable(sctn)

        gene_name = 'sctC'
        sctC_core = CoreGene(self.model_location, gene_name, self.profile_factory)
        sctC = ModelGene(sctC_core, model)
        model.add_forbidden_gene(sctC)

        gene_name = 'toto'
        toto_core = CoreGene(self.model_location, gene_name, self.profile_factory)
        toto = ModelGene(toto_core, model)
        model.add_neutral_gene(toto)

        gene_name = 'totote'
        totote_core = CoreGene(self.model_location, gene_name, self.profile_factory)
        totote = Exchangeable(totote_core, toto)
        toto.add_exchangeable(totote)

        gene_name = 'gspD'
        gspd_core = CoreGene(self.model_location, gene_name, self.profile_factory)
        gspd = ModelGene(gspd_core, model_2)

        gene_name = 'tadZ'
        tadz_core = CoreGene(self.model_location, gene_name, self.profile_factory)
        tadz = Exchangeable(tadz_core, gspd)
        gspd.add_exchangeable(tadz)

        hit_to_keep = []
        for gene in (sctJ_FLG, sctN_FLG, sctC, toto, totote):
            hit_to_keep.append(CoreHit(gene,
                                   f"PSAE001c01_{gene.name}",
                                       1, "PSAE001c01", 1, 1.0, 1.0, 1.0, 1.0, 1, 2)
                               )
        hit_to_filter_out = []
        for gene in (gspd, tadz):
            hit_to_filter_out.append(CoreHit(gene,
                                     f"PSAE001c01_{gene.name}",
                                             1, "PSAE001c01", 1, 1.0, 1.0, 1.0, 1.0, 1, 2)
                                     )

        filtered_hits = model.filter(hit_to_keep + hit_to_filter_out)

        self.assertListEqual(sorted(hit_to_keep), sorted(filtered_hits))


    def test_hash(self):
        model_bar = Model('Foo/bar', 10)
        model_bar_bis = Model('Foo/bar', 10)
        model_buz = Model('Foo/buz', 10)
        self.assertTrue(isinstance(hash(model_bar), int))
        self.assertEqual(hash(model_bar), hash(model_bar_bis))
        self.assertNotEqual(hash(model_bar), hash(model_buz))
Exemple #9
0
def main(args=None, loglevel=None):
    """
    main entry point to MacSyFinder do some check before to launch :func:`main_search_systems` which is
    the real function that perform a search

    :param args: the arguments passed on the command line without the program name
    :type args: List of string
    :param loglevel: the output verbosity
    :type loglevel: a positive int or a string among 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'
    """
    args = sys.argv[1:] if args is None else args
    parser, parsed_args = parse_args(args)

    defaults = MacsyDefaults()
    config = Config(defaults, parsed_args)

    ###########################
    # creation of working dir
    ###########################
    working_dir = config.working_dir()
    if not os.path.exists(working_dir):
        os.makedirs(working_dir)
    else:
        if os.path.isdir(working_dir):
            if os.listdir(working_dir):
                raise ValueError(
                    f"'{working_dir}' already exists and is not a empty")
        else:
            raise ValueError(
                f"'{working_dir}' already exists and is not a directory")

    ################
    # init loggers #
    ################
    macsypy.init_logger(log_file=os.path.join(config.working_dir(),
                                              config.log_file()),
                        out=not config.mute())
    if not loglevel:
        # logs are specify from args options
        macsypy.logger_set_level(level=config.log_level())
    else:
        # used by unit tests to mute or unmute logs
        macsypy.logger_set_level(level=loglevel)

    logger = logging.getLogger('macsypy.macsyfinder')

    if parsed_args.list_models:
        print(list_models(parsed_args), file=sys.stdout)
        sys.exit(0)
    else:
        if not parsed_args.previous_run and not parsed_args.models:
            parser.print_help()
            print()
            sys.tracebacklimit = 0
            raise OptionError(
                "argument --models or --previous-run is required.")
        elif not parsed_args.previous_run and not parsed_args.sequence_db:
            parser.print_help()
            print()
            sys.tracebacklimit = 0
            raise OptionError(
                "argument --sequence-db or --previous-run is required.")
        elif not parsed_args.previous_run and not parsed_args.db_type:
            parser.print_help()
            print()
            sys.tracebacklimit = 0
            raise OptionError(
                "argument --db-type or --previous-run is required.")

        _log.info(f"command used: {' '.join(sys.argv)}")

        models = ModelBank()
        genes = GeneBank()
        profile_factory = ProfileFactory(config)
        macsypy.hit.hit_weight = macsypy.hit.HitWeight(itself=3,
                                                       exchangeable=.75,
                                                       mandatory=2,
                                                       accessory=.25,
                                                       neutral=1.5)

        logger.info("\n{:#^70}".format(" Searching systems "))
        all_systems, rejected_clusters = search_systems(
            config, models, genes, profile_factory, logger)

        track_multi_systems_hit = HitSystemTracker(all_systems)
        if config.db_type() in ('gembase', 'ordered_replicon'):
            #############################
            # Ordered/Gembase replicons #
            #############################

            ###########################
            # select the best systems #
            ###########################
            logger.info("\n{:#^70}".format(" Computing best solutions "))
            best_solutions = []
            one_best_solution = []

            # group systems found by replicon
            # before to search best system combination
            import time
            for rep_name, syst_group in itertools.groupby(
                    all_systems, key=lambda s: s.replicon_name):
                syst_group = list(syst_group)
                logger.info(
                    f"Computing best solutions for {rep_name} (nb of systems {len(syst_group)})"
                )
                t0 = time.time()
                best_sol_4_1_replicon, score = find_best_solutions(syst_group)
                t1 = time.time()
                logger.info(
                    f"It took {t1 - t0:.2f}sec to find best solution ({score:.2f}) for replicon {rep_name}"
                )
                # if several solutions are equivalent same number of system and score is same
                # store all equivalent solution in best_solution => all_best_systems
                # pick one in one_best_solution => best_systems
                best_solutions.extend(best_sol_4_1_replicon)
                one_best_solution.append(best_sol_4_1_replicon[0])

            ##############################
            # Write the results in files #
            ##############################
            logger.info("\n{:#^70}".format(" Writing down results "))
            system_filename = os.path.join(config.working_dir(),
                                           "all_systems.txt")
            tsv_filename = os.path.join(config.working_dir(),
                                        "all_systems.tsv")

            with open(system_filename, "w") as sys_file:
                systems_to_txt(all_systems, track_multi_systems_hit, sys_file)

            with open(tsv_filename, "w") as tsv_file:
                systems_to_tsv(all_systems, track_multi_systems_hit, tsv_file)

            cluster_filename = os.path.join(config.working_dir(),
                                            "rejected_clusters.txt")
            with open(cluster_filename, "w") as clst_file:
                rejected_clusters.sort(key=lambda clst: (
                    clst.replicon_name, clst.model, clst.hits))
                rejected_clst_to_txt(rejected_clusters, clst_file)
            if not (all_systems or rejected_clusters):
                logger.info("No Systems found in this dataset.")

            tsv_filename = os.path.join(config.working_dir(),
                                        "all_best_solutions.tsv")
            with open(tsv_filename, "w") as tsv_file:
                solutions_to_tsv(best_solutions, track_multi_systems_hit,
                                 tsv_file)

            tsv_filename = os.path.join(config.working_dir(),
                                        "best_solution.tsv")
            with open(tsv_filename, "w") as tsv_file:
                # flattern the list and sort it
                one_best_solution = [
                    syst for sol in one_best_solution for syst in sol
                ]
                one_best_solution.sort(
                    key=lambda syst: (syst.replicon_name, syst.position[0],
                                      syst.model.fqn, -syst.score))
                systems_to_tsv(one_best_solution, track_multi_systems_hit,
                               tsv_file)
        else:
            #######################
            # Unordered replicons #
            #######################

            ##############################
            # Write the results in files #
            ##############################
            logger.info("\n{:#^70}".format(" Writing down results "))

            system_filename = os.path.join(config.working_dir(),
                                           "all_systems.txt")
            with open(system_filename, "w") as sys_file:
                likely_systems_to_txt(all_systems, track_multi_systems_hit,
                                      sys_file)

            # forbidden = [s for s in all_systems if s.forbidden_occ]
            # system_filename = os.path.join(config.working_dir(), "forbidden_components.tsv")
            # with open(system_filename, "w") as sys_file:
            #     likely_systems_to_tsv(forbidden, track_multi_systems_hit, sys_file)

            system_filename = os.path.join(config.working_dir(),
                                           "all_systems.tsv")
            with open(system_filename, "w") as sys_file:
                likely_systems_to_tsv(all_systems, track_multi_systems_hit,
                                      sys_file)

            cluster_filename = os.path.join(config.working_dir(),
                                            "uncomplete_systems.txt")
            with open(cluster_filename, "w") as clst_file:
                unlikely_systems_to_txt(rejected_clusters, clst_file)

            if not (all_systems or rejected_clusters):
                logger.info("No Systems found in this dataset.")

    logger.info("END")
class Test(MacsyTest):
    def __init__(self, methodName='runTest'):
        super(Test, self).__init__(methodName)

        def fake_init(obj, cfg):
            obj.cfg = cfg
            obj._idx = Indexes(cfg)
            obj.sequence_idx = obj._idx.find_my_indexes()
            obj.topology_file = cfg.topology_file()
            obj._DB = {}

        self.fake_init = fake_init
        self.real_init = RepliconDB.__init__

    def setUp(self):
        self.args = argparse.Namespace()
        self.args.db_type = 'gembase'
        self.args.models_dir = self.find_data('models')
        self.args.res_search_dir = tempfile.gettempdir()
        self.args.log_level = 30
        self.args.out_dir = os.path.join(self.args.res_search_dir,
                                         'test_macsyfinder_repliconDB')
        if os.path.exists(self.args.out_dir):
            shutil.rmtree(self.args.out_dir)
        os.mkdir(self.args.out_dir)

        seq_db = self.find_data("base", "test_base.fa")
        shutil.copy(seq_db, self.args.out_dir)
        self.args.sequence_db = os.path.join(self.args.out_dir,
                                             os.path.basename(seq_db))
        self.cfg = Config(MacsyDefaults(), self.args)

        self.ESCO030p01_genes = [('000010', 886), ('000020', 291),
                                 ('000030', 656), ('000040', 500),
                                 ('000050', 407), ('000060', 144),
                                 ('000070', 183), ('000080', 121),
                                 ('000090', 199), ('000100', 325),
                                 ('000110', 425), ('000120', 171),
                                 ('000130', 277), ('000140', 133),
                                 ('000150', 108), ('000160', 295),
                                 ('000170', 273), ('000180', 367),
                                 ('000190', 573), ('000200', 343),
                                 ('000210', 295), ('000220', 108),
                                 ('000230', 117), ('000240', 153),
                                 ('000250', 479), ('000260', 706),
                                 ('000270', 998), ('000280', 171),
                                 ('000290', 108), ('000300', 295),
                                 ('000310', 165), ('000320', 243),
                                 ('000330', 295), ('000340', 108),
                                 ('000350', 1755), ('000360', 248),
                                 ('000370', 286), ('000380', 186),
                                 ('000390', 83), ('000400', 153),
                                 ('000410', 69), ('000420', 295),
                                 ('000430', 108), ('000440', 145),
                                 ('000450', 59), ('000460', 124),
                                 ('000470', 246), ('000480', 325),
                                 ('000490', 54), ('000500', 95), ('000510',
                                                                  83),
                                 ('000520', 56), ('000530', 401),
                                 ('000540', 320), ('000550', 256),
                                 ('000560', 73), ('000570', 144),
                                 ('000580', 258), ('000590', 133),
                                 ('000600', 140), ('000610', 63),
                                 ('000620', 138), ('000630', 68),
                                 ('000640', 169), ('000650', 127),
                                 ('000660', 295), ('000670', 108),
                                 ('000670', 108)]

        self.PSAE001c01_genes = [('006940', 803), ('013980', 759),
                                 ('017350', 600), ('018920', 776),
                                 ('026600', 273), ('031420', 658),
                                 ('043580', 416), ('051090', 714),
                                 ('055870', 449), ('055880', 447),
                                 ('055890', 588), ('055900', 292),
                                 ('055910', 262), ('055920', 166),
                                 ('055930', 288), ('055940', 194),
                                 ('055950', 567), ('055960', 188),
                                 ('055970', 247), ('055980', 252),
                                 ('055990', 455), ('056000', 450),
                                 ('056010', 260), ('056020', 246),
                                 ('056030', 70), ('056040', 133),
                                 ('056050', 284), ('056060', 585),
                                 ('056070', 435), ('056080', 342),
                                 ('056090', 252), ('056100', 122),
                                 ('056110', 213), ('056120', 400),
                                 ('056130', 134), ('056140', 138),
                                 ('056150', 397), ('056160', 298),
                                 ('056170', 186), ('056180', 445),
                                 ('056190', 414), ('056200', 132),
                                 ('056210', 674), ('056220', 319),
                                 ('056230', 394), ('056240', 207),
                                 ('056250', 401), ('056260', 611),
                                 ('056270', 257), ('056280', 169),
                                 ('056290', 454), ('056300', 141),
                                 ('056310', 458), ('056320', 286),
                                 ('056330', 514), ('056340', 178),
                                 ('056350', 156), ('056360', 85),
                                 ('056370', 289), ('056380', 126),
                                 ('056390', 290), ('056400', 262),
                                 ('056410', 214), ('056420', 630),
                                 ('056430', 127), ('056440', 455),
                                 ('056440', 455)]
        self.NCDB_genes = [('056134', 289), ('056135', 126), ('056136', 290),
                           ('056137', 262), ('056138', 214), ('056139', 630),
                           ('056140', 127), ('056141', 803), ('056141', 803)]

        self.idx = Indexes(self.cfg)
        self.idx.build()

    def tearDown(self):
        try:
            shutil.rmtree(self.cfg.working_dir())
        except:
            pass
        RepliconDB.__init__ = self.real_init

    def test_fill_topology(self):
        self.args.topology_file = self.args.sequence_db + ".topo"
        db_send = {'ESCO030p01': 'circular', 'PSAE001c01': 'linear'}
        with open(self.args.topology_file, 'w') as f:
            for k, v in list(db_send.items()):
                f.write('{0} : {1}\n'.format(k, v))

        cfg = Config(MacsyDefaults(), self.args)
        RepliconDB.__init__ = self.fake_init
        db = RepliconDB(cfg)
        rcv_topo = db._fill_topology()
        self.assertDictEqual(db_send, rcv_topo)

    def test_fill_ordered_replicon_min_max(self):
        seq_ori = self.find_data("base", "ordered_replicon_base.fasta")
        shutil.copy(seq_ori, self.args.out_dir)
        self.args.sequence_db = os.path.join(self.args.out_dir,
                                             os.path.basename(seq_ori))
        cfg = Config(MacsyDefaults(), self.args)

        idx = Indexes(cfg)
        idx.build()
        RepliconDB.__init__ = self.fake_init
        db = RepliconDB(cfg)
        db._fill_ordered_min_max(cfg.replicon_topology())

        self.assertEqual(len(db._DB), 1)
        rep = db[RepliconDB.ordered_replicon_name]
        self.assertEqual(rep.topology, cfg.replicon_topology())
        self.assertEqual(rep.min, 1)
        self.assertEqual(rep.max, 52)

    def test_fill_gembase_min_max_default_topology(self):
        RepliconDB.__init__ = self.fake_init
        db = RepliconDB(self.cfg)
        db._fill_gembase_min_max({}, self.cfg.replicon_topology())
        self.assertEqual(len(db._DB), 3)
        self.assertEqual(set(db._DB.keys()),
                         set(['ESCO030p01', 'PSAE001c01', 'NC_xxxxx_xx']))
        PRRU001c01 = db['ESCO030p01']
        self.assertEqual(PRRU001c01.topology, 'circular')
        self.assertEqual(PRRU001c01.min, 1)
        self.assertEqual(PRRU001c01.max, 67)
        self.assertEqual(PRRU001c01.genes, self.ESCO030p01_genes)
        PSAE001c01 = db['PSAE001c01']
        self.assertEqual(PSAE001c01.topology, 'circular')
        self.assertEqual(PSAE001c01.min, 68)
        self.assertEqual(PSAE001c01.max, 133)
        self.assertEqual(PSAE001c01.genes, self.PSAE001c01_genes)
        DBNC = db['NC_xxxxx_xx']
        self.assertEqual(DBNC.topology, 'circular')
        self.assertEqual(DBNC.min, 134)
        self.assertEqual(DBNC.max, 141)
        self.assertEqual(DBNC.genes, self.NCDB_genes)

    def test_fill_gembase_min_max_oredered_replicon(self):
        seq_ori = self.find_data("base", "ordered_replicon_base.fasta")
        shutil.copy(seq_ori, self.args.out_dir)
        self.args.sequence_db = os.path.join(self.args.out_dir,
                                             os.path.basename(seq_ori))
        cfg = Config(MacsyDefaults(), self.args)

        idx = Indexes(cfg)
        idx.build()
        RepliconDB.__init__ = self.fake_init
        db = RepliconDB(cfg)
        with self.assertRaises(MacsypyError) as ctx:
            with self.catch_log() as log:
                db._fill_gembase_min_max({}, self.cfg.replicon_topology())
        self.assertEqual(
            str(ctx.exception),
            f"Error during sequence-db '{self.args.sequence_db}' parsing. "
            f"Are you sure db-type is 'gembase'?")

    def test_fill_gembase_min_max_with_topology(self):
        self.args.topology_file = self.args.sequence_db + ".topo"
        with open(self.args.topology_file, 'w') as f:
            f.write(
                '# topology file\nESCO030p01 : circular\nPSAE001c01 : linear\n'
            )
        cfg = Config(MacsyDefaults(), self.args)
        RepliconDB.__init__ = self.fake_init
        db = RepliconDB(cfg)
        topo_dict = db._fill_topology()
        db._fill_gembase_min_max(topo_dict, 'circular')
        self.assertEqual(len(db._DB), 3)
        self.assertEqual(set(db._DB.keys()),
                         set(['ESCO030p01', 'PSAE001c01', 'NC_xxxxx_xx']))
        ESCO030p01 = db['ESCO030p01']
        self.assertEqual(ESCO030p01.topology, 'circular')
        self.assertEqual(ESCO030p01.min, 1)
        self.assertEqual(ESCO030p01.max, 67)
        self.assertEqual(ESCO030p01.genes, self.ESCO030p01_genes)
        PSAE001c01 = db['PSAE001c01']
        self.assertEqual(PSAE001c01.topology, 'linear')
        self.assertEqual(PSAE001c01.min, 68)
        self.assertEqual(PSAE001c01.max, 133)
        self.assertEqual(PSAE001c01.genes, self.PSAE001c01_genes)
        DBNC = db['NC_xxxxx_xx']
        self.assertEqual(DBNC.topology, 'circular')
        self.assertEqual(DBNC.min, 134)
        self.assertEqual(DBNC.max, 141)
        self.assertEqual(DBNC.genes, self.NCDB_genes)

    def test_in(self):
        db = RepliconDB(self.cfg)
        self.assertIn('ESCO030p01', db)
        self.assertIn('PSAE001c01', db)
        self.assertIn('NC_xxxxx_xx', db)
        self.assertNotIn('toto', db)

    def test_getitem(self):
        db = RepliconDB(self.cfg)
        ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67,
                                  self.ESCO030p01_genes)
        PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133,
                                  self.PSAE001c01_genes)
        NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes)
        self.assertEqual(ESCO030p01, db['ESCO030p01'])
        self.assertEqual(PSAE001c01, db['PSAE001c01'])
        self.assertEqual(NCXX, db['NC_xxxxx_xx'])
        self.assertRaises(KeyError, db.__getitem__, 'foo')

    def test_get(self):
        db = RepliconDB(self.cfg)
        ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67,
                                  self.ESCO030p01_genes)
        PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133,
                                  self.PSAE001c01_genes)
        NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes)
        self.assertEqual(ESCO030p01, db.get('ESCO030p01'))
        self.assertEqual(PSAE001c01, db.get('PSAE001c01'))
        self.assertEqual(NCXX, db.get('NC_xxxxx_xx', 'foo'))
        self.assertIsNone(db.get('foo'))
        self.assertEqual('bar', db.get('foo', 'bar'))

    def test_items(self):
        db = RepliconDB(self.cfg)
        ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67,
                                  self.ESCO030p01_genes)
        PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133,
                                  self.PSAE001c01_genes)
        NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes)
        self.assertCountEqual(list(db.items()), [('ESCO030p01', ESCO030p01),
                                                 ('NC_xxxxx_xx', NCXX),
                                                 ('PSAE001c01', PSAE001c01)])

    def test_iteritems(self):
        db = RepliconDB(self.cfg)
        ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67,
                                  self.ESCO030p01_genes)
        PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133,
                                  self.PSAE001c01_genes)
        NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes)
        iter_items = db.iteritems()
        for item in [('ESCO030p01', ESCO030p01), ('PSAE001c01', PSAE001c01),
                     ('NC_xxxxx_xx', NCXX)]:
            with self.subTest(item=item):
                self.assertEqual(next(iter_items), item)

    def test_names(self):
        db = RepliconDB(self.cfg)
        exp_name = ['ESCO030p01', 'PSAE001c01', 'NC_xxxxx_xx']
        self.assertListEqual(db.replicon_names(), exp_name)

    def test_replicon_infos(self):
        db = RepliconDB(self.cfg)
        ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67,
                                  self.ESCO030p01_genes)
        PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133,
                                  self.PSAE001c01_genes)
        NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes)
        values = db.replicon_infos()
        self.assertCountEqual(values, [ESCO030p01, NCXX, PSAE001c01])
class Test(MacsyTest):
    def __init__(self, methodName='runTest'):
        super(Test, self).__init__(methodName)

        def fake_init(obj, cfg):
            obj.cfg = cfg
            idx = Indexes(cfg)
            obj.sequence_idx = idx.find_my_indexes()
            obj.topology_file = cfg.topology_file()
            obj._DB = {}

        self.fake_init = fake_init
        self.real_init = RepliconDB.__init__

    def setUp(self):
        self.args = argparse.Namespace()
        self.args.db_type = 'gembase'
        self.args.models_dir = self.find_data('models')
        self.args.res_search_dir = tempfile.gettempdir()
        self.args.log_level = 30
        self.args.out_dir = os.path.join(self.args.res_search_dir,
                                         'test_macsyfinder_repliconDB')
        if os.path.exists(self.args.out_dir):
            shutil.rmtree(self.args.out_dir)
        os.mkdir(self.args.out_dir)

        seq_db = self.find_data("base", "test_base.fa")
        shutil.copy(seq_db, self.args.out_dir)
        self.args.sequence_db = os.path.join(self.args.out_dir,
                                             os.path.basename(seq_db))
        self.cfg = Config(MacsyDefaults(), self.args)

        self.ESCO030p01_genes = [('000010', '886'), ('000020', '291'),
                                 ('000030', '656'), ('000040', '500'),
                                 ('000050', '407'), ('000060', '144'),
                                 ('000070', '183'), ('000080', '121'),
                                 ('000090', '199'), ('000100', '325'),
                                 ('000110', '425'), ('000120', '171'),
                                 ('000130', '277'), ('000140', '133'),
                                 ('000150', '108'), ('000160', '295'),
                                 ('000170', '273'), ('000180', '367'),
                                 ('000190', '573'), ('000200', '343'),
                                 ('000210', '295'), ('000220', '108'),
                                 ('000230', '117'), ('000240', '153'),
                                 ('000250', '479'), ('000260', '706'),
                                 ('000270', '998'), ('000280', '171'),
                                 ('000290', '108'), ('000300', '295'),
                                 ('000310', '165'), ('000320', '243'),
                                 ('000330', '295'), ('000340', '108'),
                                 ('000350', '1755'), ('000360', '248'),
                                 ('000370', '286'), ('000380', '186'),
                                 ('000390', '83'), ('000400', '153'),
                                 ('000410', '69'), ('000420', '295'),
                                 ('000430', '108'), ('000440', '145'),
                                 ('000450', '59'), ('000460', '124'),
                                 ('000470', '246'), ('000480', '325'),
                                 ('000490', '54'), ('000500', '95'),
                                 ('000510', '83'), ('000520', '56'),
                                 ('000530', '401'), ('000540', '320'),
                                 ('000550', '256'), ('000560', '73'),
                                 ('000570', '144'), ('000580', '258'),
                                 ('000590', '133'), ('000600', '140'),
                                 ('000610', '63'), ('000620', '138'),
                                 ('000630', '68'), ('000640', '169'),
                                 ('000650', '127'), ('000660', '295'),
                                 ('000670', '108'), ('000670', '108')]

        self.PSAE001c01_genes = [('006940', '803'), ('013980', '759'),
                                 ('017350', '600'), ('018920', '776'),
                                 ('026600', '273'), ('031420', '658'),
                                 ('043580', '416'), ('051090', '714'),
                                 ('055870', '449'), ('055880', '447'),
                                 ('055890', '588'), ('055900', '292'),
                                 ('055910', '262'), ('055920', '166'),
                                 ('055930', '288'), ('055940', '194'),
                                 ('055950', '567'), ('055960', '188'),
                                 ('055970', '247'), ('055980', '252'),
                                 ('055990', '455'), ('056000', '450'),
                                 ('056010', '260'), ('056020', '246'),
                                 ('056030', '70'), ('056040', '133'),
                                 ('056050', '284'), ('056060', '585'),
                                 ('056070', '435'), ('056080', '342'),
                                 ('056090', '252'), ('056100', '122'),
                                 ('056110', '213'), ('056120', '400'),
                                 ('056130', '134'), ('056140', '138'),
                                 ('056150', '397'), ('056160', '298'),
                                 ('056170', '186'), ('056180', '445'),
                                 ('056190', '414'), ('056200', '132'),
                                 ('056210', '674'), ('056220', '319'),
                                 ('056230', '394'), ('056240', '207'),
                                 ('056250', '401'), ('056260', '611'),
                                 ('056270', '257'), ('056280', '169'),
                                 ('056290', '454'), ('056300', '141'),
                                 ('056310', '458'), ('056320', '286'),
                                 ('056330', '514'), ('056340', '178'),
                                 ('056350', '156'), ('056360', '85'),
                                 ('056370', '289'), ('056380', '126'),
                                 ('056390', '290'), ('056400', '262'),
                                 ('056410', '214'), ('056420', '630'),
                                 ('056430', '127'), ('056440', '455'),
                                 ('056440', '455')]
        self.NCDB_genes = [('056134', '289'), ('056135', '126'),
                           ('056136', '290'), ('056137', '262'),
                           ('056138', '214'), ('056139', '630'),
                           ('056140', '127'), ('056141', '803'),
                           ('056141', '803')]

        idx = Indexes(self.cfg)
        idx._build_my_indexes()

    def tearDown(self):
        try:
            shutil.rmtree(self.cfg.working_dir())
        except:
            pass
        RepliconDB.__init__ = self.real_init

    def test_fill_topology(self):
        self.args.topology_file = self.args.sequence_db + ".topo"
        db_send = {'ESCO030p01': 'circular', 'PSAE001c01': 'linear'}
        with open(self.args.topology_file, 'w') as f:
            for k, v in list(db_send.items()):
                f.write('{0} : {1}\n'.format(k, v))

        cfg = Config(MacsyDefaults(), self.args)
        RepliconDB.__init__ = self.fake_init
        db = RepliconDB(cfg)
        rcv_topo = db._fill_topology()
        self.assertDictEqual(db_send, rcv_topo)

    def test_fill_ordered_replicon_min_max(self):
        seq_ori = self.find_data("base", "ordered_replicon_base.fasta")
        shutil.copy(seq_ori, self.args.out_dir)
        self.args.sequence_db = os.path.join(self.args.out_dir,
                                             os.path.basename(seq_ori))
        cfg = Config(MacsyDefaults(), self.args)

        idx = Indexes(cfg)
        idx._build_my_indexes()
        RepliconDB.__init__ = self.fake_init
        db = RepliconDB(cfg)
        db._fill_ordered_min_max(cfg.replicon_topology())

        self.assertEqual(len(db._DB), 1)
        rep = db[RepliconDB.ordered_replicon_name]
        self.assertEqual(rep.topology, cfg.replicon_topology())
        self.assertEqual(rep.min, 1)
        self.assertEqual(rep.max, 52)

    def test_fill_gembase_min_max_default_topology(self):
        RepliconDB.__init__ = self.fake_init
        db = RepliconDB(self.cfg)
        db._fill_gembase_min_max({}, self.cfg.replicon_topology())
        self.assertEqual(len(db._DB), 3)
        self.assertEqual(set(db._DB.keys()),
                         set(['ESCO030p01', 'PSAE001c01', 'NC_xxxxx_xx']))
        PRRU001c01 = db['ESCO030p01']
        self.assertEqual(PRRU001c01.topology, 'circular')
        self.assertEqual(PRRU001c01.min, 1)
        self.assertEqual(PRRU001c01.max, 67)
        self.assertEqual(PRRU001c01.genes, self.ESCO030p01_genes)
        PSAE001c01 = db['PSAE001c01']
        self.assertEqual(PSAE001c01.topology, 'circular')
        self.assertEqual(PSAE001c01.min, 68)
        self.assertEqual(PSAE001c01.max, 133)
        self.assertEqual(PSAE001c01.genes, self.PSAE001c01_genes)
        DBNC = db['NC_xxxxx_xx']
        self.assertEqual(DBNC.topology, 'circular')
        self.assertEqual(DBNC.min, 134)
        self.assertEqual(DBNC.max, 141)
        self.assertEqual(DBNC.genes, self.NCDB_genes)

    def test_fill_gembase_min_max_with_topology(self):
        self.args.topology_file = self.args.sequence_db + ".topo"
        with open(self.args.topology_file, 'w') as f:
            f.write(
                '# topology file\nESCO030p01 : circular\nPSAE001c01 : linear\n'
            )
        cfg = Config(MacsyDefaults(), self.args)
        RepliconDB.__init__ = self.fake_init
        db = RepliconDB(cfg)
        topo_dict = db._fill_topology()
        db._fill_gembase_min_max(topo_dict, 'circular')
        self.assertEqual(len(db._DB), 3)
        self.assertEqual(set(db._DB.keys()),
                         set(['ESCO030p01', 'PSAE001c01', 'NC_xxxxx_xx']))
        ESCO030p01 = db['ESCO030p01']
        self.assertEqual(ESCO030p01.topology, 'circular')
        self.assertEqual(ESCO030p01.min, 1)
        self.assertEqual(ESCO030p01.max, 67)
        self.assertEqual(ESCO030p01.genes, self.ESCO030p01_genes)
        PSAE001c01 = db['PSAE001c01']
        self.assertEqual(PSAE001c01.topology, 'linear')
        self.assertEqual(PSAE001c01.min, 68)
        self.assertEqual(PSAE001c01.max, 133)
        self.assertEqual(PSAE001c01.genes, self.PSAE001c01_genes)
        DBNC = db['NC_xxxxx_xx']
        self.assertEqual(DBNC.topology, 'circular')
        self.assertEqual(DBNC.min, 134)
        self.assertEqual(DBNC.max, 141)
        self.assertEqual(DBNC.genes, self.NCDB_genes)

    def test_in(self):
        db = RepliconDB(self.cfg)
        self.assertIn('ESCO030p01', db)
        self.assertIn('PSAE001c01', db)
        self.assertIn('NC_xxxxx_xx', db)
        self.assertNotIn('toto', db)

    def test_getitem(self):
        db = RepliconDB(self.cfg)
        ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67,
                                  self.ESCO030p01_genes)
        PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133,
                                  self.PSAE001c01_genes)
        NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes)
        self.assertEqual(ESCO030p01, db['ESCO030p01'])
        self.assertEqual(PSAE001c01, db['PSAE001c01'])
        self.assertEqual(NCXX, db['NC_xxxxx_xx'])
        self.assertRaises(KeyError, db.__getitem__, 'foo')

    def test_get(self):
        db = RepliconDB(self.cfg)
        ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67,
                                  self.ESCO030p01_genes)
        PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133,
                                  self.PSAE001c01_genes)
        NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes)
        self.assertEqual(ESCO030p01, db.get('ESCO030p01'))
        self.assertEqual(PSAE001c01, db.get('PSAE001c01'))
        self.assertEqual(NCXX, db.get('NC_xxxxx_xx', 'foo'))
        self.assertIsNone(db.get('foo'))
        self.assertEqual('bar', db.get('foo', 'bar'))

    def test_items(self):
        db = RepliconDB(self.cfg)
        ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67,
                                  self.ESCO030p01_genes)
        PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133,
                                  self.PSAE001c01_genes)
        NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes)
        self.assertCountEqual(list(db.items()), [('ESCO030p01', ESCO030p01),
                                                 ('NC_xxxxx_xx', NCXX),
                                                 ('PSAE001c01', PSAE001c01)])

    def test_iteritems(self):
        db = RepliconDB(self.cfg)
        ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67,
                                  self.ESCO030p01_genes)
        PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133,
                                  self.PSAE001c01_genes)
        NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes)
        self.assertCountEqual(iter(db.items()), [('ESCO030p01', ESCO030p01),
                                                 ('NC_xxxxx_xx', NCXX),
                                                 ('PSAE001c01', PSAE001c01)])

    def test_replicon_infos(self):
        db = RepliconDB(self.cfg)
        ESCO030p01 = RepliconInfo(self.cfg.replicon_topology(), 1, 67,
                                  self.ESCO030p01_genes)
        PSAE001c01 = RepliconInfo(self.cfg.replicon_topology(), 68, 133,
                                  self.PSAE001c01_genes)
        NCXX = RepliconInfo("circular", 134, 141, self.NCDB_genes)
        values = db.replicon_infos()
        self.assertCountEqual(values, [ESCO030p01, NCXX, PSAE001c01])
class TestSearchGenes(MacsyTest):
    def setUp(self):
        self.tmp_dir = os.path.join(tempfile.gettempdir(),
                                    'test_macsyfinder_search_genes')
        if os.path.exists(self.tmp_dir):
            shutil.rmtree(self.tmp_dir)
        os.mkdir(self.tmp_dir)

        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_base.fa")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.log_level = 30
        args.out_dir = os.path.join(self.tmp_dir, 'job_1')
        args.res_search_dir = args.out_dir
        os.mkdir(args.out_dir)

        self.cfg = Config(MacsyDefaults(), args)

        self.model_name = 'foo'
        self.model_location = ModelLocation(
            path=os.path.join(args.models_dir, self.model_name))

        idx = Indexes(self.cfg)
        idx._build_my_indexes()
        self.profile_factory = ProfileFactory(self.cfg)

    def tearDown(self):
        try:
            shutil.rmtree(self.cfg.working_dir())
            #pass
        except:
            pass

    @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH')
    def test_search(self):
        gene_name = "abc"
        c_gene_abc = CoreGene(self.model_location, gene_name,
                              self.profile_factory)
        report = search_genes([c_gene_abc], self.cfg)
        expected_hit = [
            Hit(c_gene_abc, "ESCO030p01_000260", 706, "ESCO030p01", 26,
                float(1.000e-200), float(660.800), float(1.000), float(0.714),
                160, 663)
        ]
        self.assertEqual(len(report), 1)
        self.assertEqual(expected_hit[0], report[0].hits[0])

    @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH')
    def test_search_recover(self):
        # first job searching using hmmsearch
        gene_name = "abc"
        c_gene_abc = CoreGene(self.model_location, gene_name,
                              self.profile_factory)
        report = search_genes([c_gene_abc], self.cfg)
        expected_hit = [
            Hit(c_gene_abc, "ESCO030p01_000260", 706, "ESCO030p01", 26,
                float(1.000e-200), float(660.800), float(1.000), float(0.714),
                160, 663)
        ]

        # second job using recover
        # disable hmmer to be sure that test use the recover inner function
        self.cfg.hmmer = lambda: "hmmer_disable"
        # and create a new dir for the second job
        previous_job_path = self.cfg.working_dir()
        self.cfg.previous_run = lambda: previous_job_path
        self.cfg.out_dir = lambda: os.path.join(self.tmp_dir, 'job_2')
        os.mkdir(self.cfg.out_dir())

        # rerun with previous run
        # but we have to reset the profile attached to the gene gene._profile._report
        self.profile_factory = ProfileFactory(self.cfg)
        c_gene_abc = CoreGene(self.model_location, gene_name,
                              self.profile_factory)
        report = search_genes([c_gene_abc], self.cfg)
        self.assertEqual(len(report), 1)
        self.assertEqual(expected_hit[0], report[0].hits[0])
class TestIndex(MacsyTest):
    def setUp(self):
        args = argparse.Namespace()
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.out_dir = os.path.join(tempfile.gettempdir(),
                                    'test_macsyfinder_indexes')
        if os.path.exists(args.out_dir):
            shutil.rmtree(
                os.path.join(tempfile.gettempdir(),
                             'test_macsyfinder_indexes'))
        os.makedirs(args.out_dir)
        seq_db = self.find_data("base", "test_1.fasta")
        shutil.copy(seq_db, args.out_dir)

        args.index_dir = args.out_dir
        args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db))

        self.cfg = Config(MacsyDefaults(), args)

    def tearDown(self):
        try:
            shutil.rmtree(self.cfg.working_dir())
        except:
            pass

    def test_find_my_indexes(self):
        idx = Indexes(self.cfg)
        self.assertIsNone(idx.find_my_indexes())
        new_idx = os.path.join(os.path.dirname(self.cfg.sequence_db()),
                               idx.name + ".idx")
        with open(new_idx, 'w'):
            pass
        self.assertEqual(idx.find_my_indexes(), new_idx)

    def test_build_no_idx(self):
        idx = Indexes(self.cfg)
        my_idx = idx.build()
        self.assertEqual(
            my_idx,
            os.path.join(os.path.dirname(self.cfg.sequence_db()),
                         idx.name + ".idx"))

    def test_build_with_idx(self):
        idx = Indexes(self.cfg)
        # case new style idx
        with open(
                os.path.join(os.path.dirname(self.cfg.sequence_db()),
                             idx.name + ".idx"), 'w') as idx_file:
            idx_content_new = f"{self.cfg.sequence_db()}\nVICH001.B.00001.C001_01359{idx._field_separator}200{idx._field_separator}1\n"
            idx_file.write(idx_content_new)
        my_idx = idx.build()
        self.assertEqual(os.path.getsize(idx_file.name), len(idx_content_new))

        # case old style no path as first line
        idx_path = os.path.join(os.path.dirname(self.cfg.sequence_db()),
                                idx.name + ".idx")
        with open(idx_path, 'w') as idx_file:
            idx_content_old = "VICH001.B.00001.C001_01359;200;1\n"
            idx_file.write(idx_content_old)
        with self.catch_log(log_name='macsypy') as log:
            _ = idx.build()
            log_msg = log.get_value().strip()
        self.assertEqual(
            log_msg,
            f"The '{idx_path}' index file is in old format. Force index building."
        )

        # case old style bad separator
        idx_path = os.path.join(os.path.dirname(self.cfg.sequence_db()),
                                idx.name + ".idx")
        with open(idx_path, 'w') as idx_file:
            idx_content_old = f"{self.cfg.sequence_db()}\nVICH001.B.00001.C001_01359;200;1\n"
            idx_file.write(idx_content_old)
        with self.catch_log(log_name='macsypy') as log:
            _ = idx.build()
            log_msg = log.get_value().strip()
        self.assertEqual(
            log_msg,
            f"The '{idx_path}' index file is in old format. Force index building."
        )

        # case idx seems valid read it
        with open(
                os.path.join(os.path.dirname(self.cfg.sequence_db()),
                             idx.name + ".idx")) as idx_file_test:
            data = idx_file_test.read()

        new_content = f"""{self.cfg.sequence_db()}
VICH001.B.00001.C001_01359{idx._field_separator}200{idx._field_separator}1
VICH001.B.00001.C001_01360{idx._field_separator}484{idx._field_separator}2
VICH001.B.00001.C001_01361{idx._field_separator}406{idx._field_separator}3
VICH001.B.00001.C001_01390{idx._field_separator}326{idx._field_separator}4
VICH001.B.00001.C001_01391{idx._field_separator}54{idx._field_separator}5
VICH001.B.00001.C001_01392{idx._field_separator}206{idx._field_separator}6
VICH001.B.00001.C001_01393{idx._field_separator}477{idx._field_separator}7
VICH001.B.00001.C001_01394{idx._field_separator}126{idx._field_separator}8
VICH001.B.00001.C001_01395{idx._field_separator}405{idx._field_separator}9
VICH001.B.00001.C001_01396{idx._field_separator}572{idx._field_separator}10
VICH001.B.00001.C001_01397{idx._field_separator}721{idx._field_separator}11
VICH001.B.00001.C001_01398{idx._field_separator}467{idx._field_separator}12
VICH001.B.00001.C001_01399{idx._field_separator}720{idx._field_separator}13
VICH001.B.00001.C001_01400{idx._field_separator}559{idx._field_separator}14
VICH001.B.00001.C001_01401{idx._field_separator}153{idx._field_separator}15
VICH001.B.00001.C001_01402{idx._field_separator}4558{idx._field_separator}16
VICH001.B.00001.C001_01500{idx._field_separator}120{idx._field_separator}17
VICH001.B.00001.C001_01501{idx._field_separator}344{idx._field_separator}18
VICH001.B.00001.C001_01502{idx._field_separator}478{idx._field_separator}19
VICH001.B.00001.C001_01503{idx._field_separator}724{idx._field_separator}20
VICH001.B.00001.C001_01504{idx._field_separator}309{idx._field_separator}21
VICH001.B.00001.C001_01505{idx._field_separator}390{idx._field_separator}22
VICH001.B.00001.C001_01506{idx._field_separator}419{idx._field_separator}23
VICH001.B.00001.C001_01540{idx._field_separator}353{idx._field_separator}24
VICH001.B.00001.C001_01541{idx._field_separator}229{idx._field_separator}25
VICH001.B.00001.C001_01542{idx._field_separator}267{idx._field_separator}26
VICH001.B.00001.C001_01543{idx._field_separator}328{idx._field_separator}27
VICH001.B.00001.C001_01544{idx._field_separator}258{idx._field_separator}28
VICH001.B.00001.C001_01545{idx._field_separator}228{idx._field_separator}29
VICH001.B.00001.C001_01546{idx._field_separator}538{idx._field_separator}30
VICH001.B.00001.C001_01547{idx._field_separator}77{idx._field_separator}31
VICH001.B.00001.C001_01548{idx._field_separator}476{idx._field_separator}32
VICH001.B.00001.C001_01549{idx._field_separator}324{idx._field_separator}33
VICH001.B.00001.C001_01550{idx._field_separator}387{idx._field_separator}34
VICH001.B.00001.C001_01551{idx._field_separator}382{idx._field_separator}35
VICH001.B.00001.C001_01552{idx._field_separator}149{idx._field_separator}36
VICH001.B.00001.C001_01553{idx._field_separator}319{idx._field_separator}37
VICH001.B.00001.C001_01554{idx._field_separator}237{idx._field_separator}38
VICH001.B.00001.C001_01555{idx._field_separator}74{idx._field_separator}39
VICH001.B.00001.C001_01556{idx._field_separator}362{idx._field_separator}40
VICH001.B.00001.C001_01557{idx._field_separator}170{idx._field_separator}41
VICH001.B.00001.C001_01558{idx._field_separator}77{idx._field_separator}42
VICH001.B.00001.C001_01559{idx._field_separator}296{idx._field_separator}43
VICH001.B.00001.C001_01560{idx._field_separator}405{idx._field_separator}44
VICH001.B.00001.C001_01561{idx._field_separator}182{idx._field_separator}45
VICH001.B.00001.C001_01562{idx._field_separator}445{idx._field_separator}46
VICH001.B.00001.C001_01563{idx._field_separator}212{idx._field_separator}47
VICH001.B.00001.C001_01564{idx._field_separator}387{idx._field_separator}48
VICH001.B.00001.C001_01565{idx._field_separator}414{idx._field_separator}49
"""

        self.assertEqual(data, new_content)

    def test_build_force(self):
        idx = Indexes(self.cfg)
        idx.build(force=True)
        my_idx = idx.find_my_indexes()
        self.assertNotEqual(os.path.getsize(my_idx), 0)

    @unittest.skipIf(platform.system() == 'Windows' or os.getuid() == 0,
                     'Skip test on Windows or if run as root')
    def test_build_not_writable(self):
        # Skip test on Windows, since setting the folder permissions is not affecting files inside
        # in Singularity container tess are run as root and this test as non sense
        idx = Indexes(self.cfg)
        idx_dir = os.path.join(os.path.dirname(self.cfg.sequence_db()))
        os.chmod(idx_dir, 0000)
        try:
            with self.assertRaises(IOError) as ctx:
                with self.catch_log():
                    idx.build()
            self.assertEqual(f"The '{idx_dir}' dir is not writable.",
                             str(ctx.exception))
        finally:
            os.chmod(idx_dir, 0o777)

    @unittest.skipIf(platform.system() == 'Windows' or os.getuid() == 0,
                     'Skip test on Windows or if run as root')
    def test_index_dir(self):
        # case index-dir is not specify sequence-db dir is not writable
        args = argparse.Namespace()
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.out_dir = os.path.join(tempfile.gettempdir(),
                                    'test_macsyfinder_indexes')
        args.sequence_db = os.path.join(
            args.out_dir, os.path.basename(self.cfg.sequence_db()))
        cfg = Config(MacsyDefaults(), args)
        idx = Indexes(cfg)
        index_dir = idx._index_dir(build=False)
        expc_idx_dir = os.path.dirname(cfg.sequence_db())
        self.assertEqual(index_dir, expc_idx_dir)
        try:
            os.chmod(index_dir, 0000)
            with self.assertRaises(ValueError) as ctx:
                _ = idx._index_dir(build=True)
            self.assertEqual(
                f"The '{index_dir}' dir is not writable. Change rights or specify --index-dir.",
                str(ctx.exception))
        finally:
            os.chmod(index_dir, 0o777)

        args = argparse.Namespace()
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.out_dir = os.path.join(tempfile.gettempdir(),
                                    'test_macsyfinder_indexes')
        args.index_dir = os.path.join(args.out_dir, 'index_dir')
        args.sequence_db = os.path.join(
            args.out_dir, os.path.basename(self.cfg.sequence_db()))
        cfg = Config(MacsyDefaults(), args)
        idx = Indexes(cfg)

        # case --index-dir does not exists
        with self.assertRaises(ValueError) as ctx:
            _ = idx._index_dir(build=False)
        self.assertEqual(str(ctx.exception),
                         f"No such directory: {args.index_dir}")

        # case --index-dir is not writable
        os.makedirs(args.index_dir)
        os.chmod(args.index_dir, 0000)
        try:
            # but I do not care I only read
            index_dir = idx._index_dir(build=False)
            self.assertEqual(index_dir, args.index_dir)

            # it's important to build indexes
            with self.assertRaises(ValueError) as ctx:
                _ = idx._index_dir(build=True)
            self.assertEqual(str(ctx.exception),
                             f"The '{index_dir}' dir is not writable.")
        finally:
            os.chmod(args.index_dir, 0o777)

        # case the sequence_db value is just a filename not a path
        current_dir = os.getcwd()
        try:
            os.chdir(args.out_dir)
            args = argparse.Namespace()
            args.db_type = 'gembase'
            args.models_dir = self.find_data('models')
            args.out_dir = os.path.join(tempfile.gettempdir(),
                                        'test_macsyfinder_indexes')
            args.sequence_db = os.path.basename(self.cfg.sequence_db())
            cfg = Config(MacsyDefaults(), args)
            idx = Indexes(cfg)
            idx_dir = idx._index_dir(build=True)
            self.assertEqual(idx_dir, os.getcwd())
        finally:
            os.chdir(current_dir)

    def test_build_my_indexes(self):
        args = argparse.Namespace()
        args.db_type = 'gembase'

        args.out_dir = os.path.join(tempfile.gettempdir(),
                                    'test_macsyfinder_indexes')
        if os.path.exists(args.out_dir):
            shutil.rmtree(
                os.path.join(tempfile.gettempdir(),
                             'test_macsyfinder_indexes'))
        os.makedirs(args.out_dir)
        seq_db = self.find_data("base", "test_base_with_errors.fa")
        shutil.copy(seq_db, args.out_dir)
        args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db))
        self.cfg = Config(MacsyDefaults(), args)

        idx = Indexes(self.cfg)
        with self.assertRaises(MacsypyError) as e:
            # the directory for index exist and is writable but
            # the sequence file is corrupted and cannot be read correctly
            with self.catch_log():
                idx._build_my_indexes(args.out_dir)
        self.assertTrue(
            str(e.exception).startswith(
                "unable to index the sequence dataset:"))

    def test_iter(self):
        idx = Indexes(self.cfg)
        with self.assertRaises(MacsypyError) as ctx:
            next(iter(idx))

        self.assertEqual(str(ctx.exception), 'Build index before to use it.')

        idx.build()
        expected_idx = [('VICH001.B.00001.C001_01359', 200, 1),
                        ('VICH001.B.00001.C001_01360', 484, 2),
                        ('VICH001.B.00001.C001_01361', 406, 3),
                        ('VICH001.B.00001.C001_01390', 326, 4),
                        ('VICH001.B.00001.C001_01391', 54, 5),
                        ('VICH001.B.00001.C001_01392', 206, 6),
                        ('VICH001.B.00001.C001_01393', 477, 7),
                        ('VICH001.B.00001.C001_01394', 126, 8),
                        ('VICH001.B.00001.C001_01395', 405, 9),
                        ('VICH001.B.00001.C001_01396', 572, 10),
                        ('VICH001.B.00001.C001_01397', 721, 11),
                        ('VICH001.B.00001.C001_01398', 467, 12),
                        ('VICH001.B.00001.C001_01399', 720, 13),
                        ('VICH001.B.00001.C001_01400', 559, 14),
                        ('VICH001.B.00001.C001_01401', 153, 15),
                        ('VICH001.B.00001.C001_01402', 4558, 16),
                        ('VICH001.B.00001.C001_01500', 120, 17),
                        ('VICH001.B.00001.C001_01501', 344, 18),
                        ('VICH001.B.00001.C001_01502', 478, 19),
                        ('VICH001.B.00001.C001_01503', 724, 20),
                        ('VICH001.B.00001.C001_01504', 309, 21),
                        ('VICH001.B.00001.C001_01505', 390, 22),
                        ('VICH001.B.00001.C001_01506', 419, 23),
                        ('VICH001.B.00001.C001_01540', 353, 24),
                        ('VICH001.B.00001.C001_01541', 229, 25),
                        ('VICH001.B.00001.C001_01542', 267, 26),
                        ('VICH001.B.00001.C001_01543', 328, 27),
                        ('VICH001.B.00001.C001_01544', 258, 28),
                        ('VICH001.B.00001.C001_01545', 228, 29),
                        ('VICH001.B.00001.C001_01546', 538, 30),
                        ('VICH001.B.00001.C001_01547', 77, 31),
                        ('VICH001.B.00001.C001_01548', 476, 32),
                        ('VICH001.B.00001.C001_01549', 324, 33),
                        ('VICH001.B.00001.C001_01550', 387, 34),
                        ('VICH001.B.00001.C001_01551', 382, 35),
                        ('VICH001.B.00001.C001_01552', 149, 36),
                        ('VICH001.B.00001.C001_01553', 319, 37),
                        ('VICH001.B.00001.C001_01554', 237, 38),
                        ('VICH001.B.00001.C001_01555', 74, 39),
                        ('VICH001.B.00001.C001_01556', 362, 40),
                        ('VICH001.B.00001.C001_01557', 170, 41),
                        ('VICH001.B.00001.C001_01558', 77, 42),
                        ('VICH001.B.00001.C001_01559', 296, 43),
                        ('VICH001.B.00001.C001_01560', 405, 44),
                        ('VICH001.B.00001.C001_01561', 182, 45),
                        ('VICH001.B.00001.C001_01562', 445, 46),
                        ('VICH001.B.00001.C001_01563', 212, 47),
                        ('VICH001.B.00001.C001_01564', 387, 48),
                        ('VICH001.B.00001.C001_01565', 414, 49)]
        self.assertListEqual(list(iter(idx)), expected_idx)
Exemple #14
0
class TestProfile(MacsyTest):

    def setUp(self):
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.res_search_dir = tempfile.gettempdir()
        args.log_level = 0
        self.cfg = Config(MacsyDefaults(), args)

        if os.path.exists(self.cfg.working_dir()):
            shutil(self.cfg.working_dir())
        os.makedirs(self.cfg.working_dir())

        self.model_name = 'foo'
        self.model_location = ModelLocation(path=os.path.join(args.models_dir, self.model_name))
        self.profile_factory = ProfileFactory(self.cfg)


    def tearDown(self):
        try:
            shutil.rmtree(self.cfg.working_dir())
        except:
            pass


    def test_len(self):
        model = Model("foo/T2SS", 10)

        gene_name = 'abc'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        path = self.model_location.get_profile("abc")
        profile = Profile(gene, self.cfg, path)
        self.assertEqual(len(profile), 501)


    def test_ga_threshold(self):
        model = Model("foo/T2SS", 10)
        gene_name = 'abc'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        path = self.model_location.get_profile("abc")
        profile = Profile(gene, self.cfg, path)
        self.assertFalse(profile.ga_threshold)

        gene_name = 'T5aSS_PF03797'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        path = self.model_location.get_profile("T5aSS_PF03797")
        profile = Profile(gene, self.cfg, path)
        self.assertTrue(profile.ga_threshold)

    def test_str(self):
        model = Model("foo/T2SS", 10)

        gene_name = 'abc'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        path = self.model_location.get_profile("abc")
        profile = Profile(gene, self.cfg, path)
        s = "{0} : {1}".format(gene.name, path)
        self.assertEqual(str(profile), s)


    @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH')
    def test_execute(self):
        for db_type in ("gembase", "ordered_replicon", "unordered"):
            self.cfg._set_db_type(db_type)
            model = Model("foo/T2SS", 10)

            gene_name = 'T5aSS_PF03797'
            c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
            gene = ModelGene(c_gene, model)

            # case GA threshold in profile
            profile_path = self.model_location.get_profile("T5aSS_PF03797")
            profile = Profile(gene, self.cfg, profile_path)
            report = profile.execute()
            hmmer_raw_out = profile.hmm_raw_output
            with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
                first_l = hmmer_raw_out_file.readline()
                # a hmmsearch output file has been produced
                self.assertTrue(first_l.startswith("# hmmsearch :: search profile(s) against a sequence database"))
                for i in range(5):
                    # skip 4 lines
                    l = hmmer_raw_out_file.readline()
                # a hmmsearch used the abc profile line should become with: "# query HMM file: {the path tp hmm profile used}"
                self.assertTrue(l.find(profile_path) != -1)
                for i in range(3):
                    # skip 2 lines
                    l = hmmer_raw_out_file.readline()
                self.assertEqual("# model-specific thresholding:     GA cutoffs", l.strip())
            # test if profile is executed only once per run
            report_bis = profile.execute()
            self.assertIs(report, report_bis)

            # case GA threshold in profile but --no-cut-ga is set
            args = argparse.Namespace()
            args.sequence_db = self.find_data("base", "test_1.fasta")
            args.db_type = 'gembase'
            args.models_dir = self.find_data('models')
            args.res_search_dir = tempfile.gettempdir()
            args.log_level = 0
            args.e_value_search = 0.5
            args.no_cut_ga = True
            cfg = Config(MacsyDefaults(), args)

            profile = Profile(gene, cfg, profile_path)
            report = profile.execute()
            hmmer_raw_out = profile.hmm_raw_output
            with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
                for i in range(9):
                    l = hmmer_raw_out_file.readline()
                self.assertEqual("# sequence reporting threshold:    E-value <= 0.5", l.strip())


            # case cut-ga but no GA threshold in hmmprofile
            gene_name = 'abc'
            c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
            gene = ModelGene(c_gene, model)

            # case -cut-ga and GA threshold in profile
            profile_path = self.model_location.get_profile("abc")
            profile = Profile(gene, self.cfg, profile_path)

            with self.catch_log() as log:
                report = profile.execute()

            hmmer_raw_out = profile.hmm_raw_output
            with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
                first_l = hmmer_raw_out_file.readline()
                # a hmmsearch output file has been produced
                self.assertTrue(first_l.startswith("# hmmsearch :: search profile(s) against a sequence database"))
                for i in range(5):
                    # skip 4 lines
                    l = hmmer_raw_out_file.readline()
                # a hmmsearch used the abc profile line should become with: "# query HMM file: {the path tp hmm profile used}"
                self.assertTrue(l.find(profile_path) != -1)
                for i in range(3):
                    # skip 2 lines
                    l = hmmer_raw_out_file.readline()
                self.assertEqual('# sequence reporting threshold:    E-value <= 0.1', l.strip())


    def test_execute_unknown_binary(self):
        self.cfg._options['hmmer'] = "Nimportnaoik"
        model = Model("foo/T2SS", 10)

        gene_name = 'abc'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        path = self.model_location.get_profile("abc", )
        profile = Profile(gene, self.cfg, path)
        with self.catch_log():
            with self.assertRaises(RuntimeError):
                profile.execute()


    def test_execute_hmmer_failed(self):
        fake_hmmer = os.path.join(tempfile.gettempdir(), 'hmmer_failed')
        with open(fake_hmmer, 'w') as hmmer:
            hmmer.write("""#! {}
import sys
sys.exit(127)
""".format(sysconfig.sys.executable))
        try:
            os.chmod(hmmer.name, 0o755)
            self.cfg._options['hmmer'] = hmmer.name
            model = Model("foo/T2SS", 10)

            gene_name = 'abc'
            c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
            gene = ModelGene(c_gene, model)

            path = self.model_location.get_profile("abc", )
            profile = Profile(gene, self.cfg, path)
            with self.catch_log():
                with self.assertRaisesRegex(RuntimeError,
                                            "an error occurred during Hmmer "
                                            "execution: command = .* : return code = 127 .*") as ctx:
                    profile.execute()

        finally:
            try:
                os.unlink(fake_hmmer)
            except Exception:
                pass