Python which Examples

Programming Language: Python

Namespace/Package Name: tests

Method/Function: which

Examples at hotexamples.com: 12

Python which - 12 examples found. These are the top rated real world Python examples of tests.which extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test_local_max.py Project: gem-pasteur/Integron_Finder

    def setUp(self):
        if 'INTEGRON_HOME' in os.environ:
            self.integron_home = os.environ['INTEGRON_HOME']
            self.local_install = True
        else:
            self.local_install = False
            self.integron_home = os.path.normpath(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))

        self.tmp_dir = os.path.join(tempfile.gettempdir(), 'tmp_test_integron_finder')
        if os.path.exists(self.tmp_dir) and os.path.isdir(self.tmp_dir):
            shutil.rmtree(self.tmp_dir)
        os.makedirs(self.tmp_dir)

        self.cmsearch = which('cmsearch')
        self.out_dir = self.tmp_dir
        self.model_attc_path = self.find_data(os.path.join('Models', 'attc_4.cm'))
        self.cpu_nb = 1
        replicon_name = 'lian.001.c02.10'
        replicon_path = self.find_data(os.path.join('Replicons', replicon_name + '.fst'))
        topologies = Topology('lin')
        with FastaIterator(replicon_path) as sequences_db:
            sequences_db.topologies = topologies
            self.replicon = next(sequences_db)
        self.evalue_attc = 1.
        self.max_attc_size = 200
        self.min_attc_size = 40
        self.length_cm = 47  # length in 'CLEN' (value for model attc_4.cm)
        self.call = call_wrapper()
        infernal.read_infernal = read_infernal_mock(self.tmp_dir)

Example #2

Show file

    def setUp(self):
        if 'INTEGRON_HOME' in os.environ:
            self.integron_home = os.environ['INTEGRON_HOME']
            self.local_install = True
        else:
            self.local_install = False
            self.integron_home = os.path.normpath(
                os.path.abspath(
                    os.path.join(os.path.dirname(__file__), '..', '..')))

        self.tmp_dir = os.path.join(tempfile.gettempdir(),
                                    'tmp_test_integron_finder')
        if os.path.exists(self.tmp_dir) and os.path.isdir(self.tmp_dir):
            shutil.rmtree(self.tmp_dir)
        os.makedirs(self.tmp_dir)

        self.cmsearch = which('cmsearch')
        self.out_dir = self.tmp_dir
        self.model_attc_path = self.find_data(
            os.path.join('Models', 'attc_4.cm'))
        self.cpu_nb = 1
        replicon_name = 'lian.001.c02.10'
        replicon_path = self.find_data(
            os.path.join('Replicons', replicon_name + '.fst'))
        topologies = Topology('lin')
        with FastaIterator(replicon_path) as sequences_db:
            sequences_db.topologies = topologies
            self.replicon = next(sequences_db)
        self.evalue_attc = 1.
        self.max_attc_size = 200
        self.min_attc_size = 40
        self.length_cm = 47  # length in 'CLEN' (value for model attc_4.cm)
        self.call = call_wrapper()
        infernal.read_infernal = read_infernal_mock(self.tmp_dir)

Example #3

Show file

File: test_find_integrons.py Project: orangeSi/Integron_Finder

    def setUp(self):
        if 'INTEGRON_HOME' in os.environ:
            self.integron_home = os.environ['INTEGRON_HOME']
            self.local_install = True
        else:
            self.local_install = False
            self.integron_home = os.path.normpath(
                os.path.abspath(
                    os.path.join(os.path.dirname(__file__), '..', '..')))

        self.tmp_dir = os.path.join(tempfile.gettempdir(),
                                    'tmp_test_integron_finder')
        os.makedirs(self.tmp_dir)

        integron_finder.PRODIGAL = which('prodigal')
        integron_finder.HMMSEARCH = which('hmmsearch')
        integron_finder.N_CPU = '1'
        integron_finder.MODEL_DIR = os.path.join(self.integron_home, "data",
                                                 "Models")
        integron_finder.MODEL_integrase = os.path.join(
            integron_finder.MODEL_DIR, "integron_integrase.hmm")
        integron_finder.MODEL_phage_int = os.path.join(
            integron_finder.MODEL_DIR, "phage-int.hmm")
        integron_finder.MODEL_attc = os.path.join(self.integron_home, 'data',
                                                  'Models', 'attc_4.cm')

        self.columns = [
            'pos_beg', 'pos_end', 'strand', 'evalue', 'type_elt', 'model',
            'distance_2attC', 'annotation'
        ]
        self.dtype = {
            "pos_beg": 'int',
            "pos_end": 'int',
            "strand": 'int',
            "evalue": 'float',
            "type_elt": 'str',
            "annotation": 'str',
            "model": 'str',
            "distance_2attC": 'float'
        }

Example #4

Show file

File: test_acba.py Project: orangeSi/Integron_Finder

 def setUp(self):
     if 'INTEGRON_HOME' in os.environ:
         self.integron_home = os.environ['INTEGRON_HOME']
         self.local_install = True
     else:
         self.local_install = False
         self.integron_home = os.path.normpath(
             os.path.abspath(
                 os.path.join(os.path.dirname(__file__), '..'
                              '..')))
     self.tmp_dir = tempfile.gettempdir()
     self.bin = os.path.join(
         self.integron_home, 'integron_finder'
     ) if self.local_install else which('integron_finder')

Example #5

Show file

File: test_find_attc.py Project: orangeSi/Integron_Finder

    def setUp(self):
        if 'INTEGRON_HOME' in os.environ:
            self.integron_home = os.environ['INTEGRON_HOME']
            self.local_install = True
        else:
            self.local_install = False
            self.integron_home = os.path.normpath(
                os.path.abspath(
                    os.path.join(os.path.dirname(__file__), '..', '..')))

        self.tmp_dir = os.path.join(tempfile.gettempdir(),
                                    'tmp_test_integron_finder')
        os.makedirs(self.tmp_dir)
        integron_finder.CMSEARCH = which('cmsearch')
        integron_finder.N_CPU = '1'
        integron_finder.MODEL_attc = os.path.join(self.integron_home, 'data',
                                                  'Models', 'attc_4.cm')

Example #6

Show file

class TestProfile(MacsyTest):
    def setUp(self):
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.res_search_dir = tempfile.gettempdir()
        args.log_level = 50
        self.cfg = Config(MacsyDefaults(), args)

        if os.path.exists(self.cfg.working_dir()):
            shutil.rmtree(self.cfg.working_dir())
        os.makedirs(self.cfg.working_dir())

        self.model_name = 'foo'
        self.model_location = ModelLocation(
            path=os.path.join(args.models_dir, self.model_name))
        self.profile_factory = ProfileFactory(self.cfg)

    def tearDown(self):
        try:
            shutil.rmtree(self.cfg.working_dir())
        except:
            pass

    def test_len(self):
        model = Model("foo/T2SS", 10)

        gene_name = 'abc'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        path = self.model_location.get_profile("abc")
        profile = Profile(gene, self.cfg, path)
        self.assertEqual(len(profile), 501)

    def test_ga_threshold(self):
        # No GA threshold
        model = Model("foo/T2SS", 10)

        gene_name = 'abc'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)
        path = self.model_location.get_profile(gene_name)
        profile = Profile(gene, self.cfg, path)
        self.assertFalse(profile.ga_threshold)

        # GA threshold line ends with ;
        gene_name = 'T5aSS_PF03797'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)
        path = self.model_location.get_profile(gene_name)
        profile = Profile(gene, self.cfg, path)
        self.assertTrue(profile.ga_threshold)

        # GA threshold line do NOT ends with ;
        gene_name = 'PF05930.13'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)
        path = self.model_location.get_profile(gene_name)
        profile = Profile(gene, self.cfg, path)
        self.assertTrue(profile.ga_threshold)

        # GA threshold invalid format string instead float
        gene_name = 'bad_GA'
        with self.catch_log(log_name='macsypy'):
            # When a CoreGene is created a Profile is automatically instanciated
            # So I mute the log to do not polute output
            c_gene = CoreGene(self.model_location, gene_name,
                              self.profile_factory)
        gene = ModelGene(c_gene, model)
        path = self.model_location.get_profile(gene_name)

        with self.catch_log(log_name='macsypy') as log:
            profile = Profile(gene, self.cfg, path)
            catch_msg = log.get_value().strip()
        self.assertFalse(profile.ga_threshold)
        self.assertEqual(
            catch_msg,
            "bad_GA GA score is not well formatted expected 2 floats got ''22.00'' ''23.00''.\n"
            "GA score will not used for gene 'bad_GA'.")

        # GA threshold invalid format only one score
        gene_name = 'bad_GA_2'
        with self.catch_log(log_name='macsypy'):
            # When a CoreGene is created a Profile is automatically instanciated
            # So I mute the log to do not polute output
            c_gene = CoreGene(self.model_location, gene_name,
                              self.profile_factory)
        gene = ModelGene(c_gene, model)
        path = self.model_location.get_profile(gene_name)

        with self.catch_log(log_name='macsypy') as log:
            profile = Profile(gene, self.cfg, path)
            catch_msg = log.get_value().strip()
        self.assertFalse(profile.ga_threshold)
        self.assertEqual(
            catch_msg,
            "bad_GA_2 GA score is not well formatted. expected: 'GA float float' got 'GA    22.00'.\n"
            "GA score will not used for gene 'bad_GA_2'.")

    def test_str(self):
        model = Model("foo/T2SS", 10)

        gene_name = 'abc'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        path = self.model_location.get_profile("abc")
        profile = Profile(gene, self.cfg, path)
        s = "{0} : {1}".format(gene.name, path)
        self.assertEqual(str(profile), s)

    @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH')
    def test_execute_hmm_with_GA(self):
        for db_type in ("gembase", "ordered_replicon", "unordered"):
            self.cfg._set_db_type(db_type)
            model = Model("foo/T2SS", 10)

            gene_name = 'T5aSS_PF03797'
            c_gene = CoreGene(self.model_location, gene_name,
                              self.profile_factory)
            gene = ModelGene(c_gene, model)

            # case GA threshold in profile
            profile_path = self.model_location.get_profile("T5aSS_PF03797")
            profile = Profile(gene, self.cfg, profile_path)
            report = profile.execute()
            hmmer_raw_out = profile.hmm_raw_output
            with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
                first_l = hmmer_raw_out_file.readline()
                # a hmmsearch output file has been produced
                self.assertTrue(
                    first_l.startswith(
                        "# hmmsearch :: search profile(s) against a sequence database"
                    ))
                for i in range(5):
                    # skip 4 lines
                    l = hmmer_raw_out_file.readline()
                # a hmmsearch used the abc profile line should become with: "# query HMM file: {the path tp hmm profile used}"
                self.assertTrue(l.find(profile_path) != -1)
                for i in range(3):
                    # skip 2 lines
                    l = hmmer_raw_out_file.readline()
                self.assertEqual(
                    "# model-specific thresholding:     GA cutoffs", l.strip())
            # test if profile is executed only once per run
            report_bis = profile.execute()
            self.assertIs(report, report_bis)

    @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH')
    def test_execute_hmm_protected_path(self):
        # create a hmmdir with space in name
        self.cfg.hmmer_dir = lambda: 'hmmer results'
        # create sequence_db path with space in path
        seq_path = os.path.join(self.cfg.working_dir(), "test test1.fasta")
        shutil.copyfile(self.find_data("base", "test_1.fasta"), seq_path)
        self.cfg._set_sequence_db(seq_path)

        model = Model("foo/T2SS", 10)
        gene_name = 'T5aSS_PF03797'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        # case GA threshold in profile
        profile_path = self.model_location.get_profile("T5aSS_PF03797")
        profile = Profile(gene, self.cfg, profile_path)
        report = profile.execute()
        hmmer_raw_out = profile.hmm_raw_output
        with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
            first_l = hmmer_raw_out_file.readline()
            # a hmmsearch output file has been produced
            self.assertTrue(
                first_l.startswith(
                    "# hmmsearch :: search profile(s) against a sequence database"
                ))
            for i in range(5):
                # skip 4 lines
                l = hmmer_raw_out_file.readline()
            # a hmmsearch used the abc profile line should become with: "# query HMM file: {the path tp hmm profile used}"
            self.assertTrue(l.find(profile_path) != -1)
            for i in range(3):
                # skip 2 lines
                l = hmmer_raw_out_file.readline()
            self.assertEqual("# model-specific thresholding:     GA cutoffs",
                             l.strip())

    @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH')
    def test_execute_hmm_w_GA_n_nocutga(self):
        # case GA threshold in profile but --no-cut-ga is set
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.res_search_dir = tempfile.gettempdir()
        args.log_level = 0
        args.e_value_search = 0.5
        args.no_cut_ga = True
        cfg = Config(MacsyDefaults(), args)

        model = Model("foo/T2SS", 10)
        gene_name = 'T5aSS_PF03797'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)
        profile_path = self.model_location.get_profile("T5aSS_PF03797")
        profile = Profile(gene, cfg, profile_path)
        report = profile.execute()
        hmmer_raw_out = profile.hmm_raw_output
        with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
            for i in range(9):
                l = hmmer_raw_out_file.readline()
            self.assertEqual(
                "# sequence reporting threshold:    E-value <= 0.5", l.strip())

    @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH')
    def test_execute_hmm_wo_GA(self):
        # case cut-ga but no GA threshold in hmmprofile
        model = Model("foo/T2SS", 10)
        gene_name = 'abc'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        # case -cut-ga and GA threshold in profile
        profile_path = self.model_location.get_profile("abc")
        profile = Profile(gene, self.cfg, profile_path)

        with self.catch_log() as log:
            report = profile.execute()

        hmmer_raw_out = profile.hmm_raw_output
        with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
            first_l = hmmer_raw_out_file.readline()
            # a hmmsearch output file has been produced
            self.assertTrue(
                first_l.startswith(
                    "# hmmsearch :: search profile(s) against a sequence database"
                ))
            for i in range(5):
                # skip 4 lines
                l = hmmer_raw_out_file.readline()
            # a hmmsearch used the abc profile line should become with: "# query HMM file: {the path tp hmm profile used}"
            self.assertTrue(l.find(profile_path) != -1)
            for i in range(3):
                # skip 2 lines
                l = hmmer_raw_out_file.readline()
            self.assertEqual(
                '# sequence reporting threshold:    E-value <= 0.1', l.strip())

    def test_execute_unknown_binary(self):
        self.cfg._options['hmmer'] = "Nimportnaoik"
        model = Model("foo/T2SS", 10)

        gene_name = 'abc'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        path = self.model_location.get_profile("abc", )
        profile = Profile(gene, self.cfg, path)
        with self.catch_log():
            with self.assertRaises(RuntimeError):
                profile.execute()

    def test_execute_hmmer_failed(self):
        fake_hmmer = os.path.join(tempfile.gettempdir(), 'hmmer_failed')
        with open(fake_hmmer, 'w') as hmmer:
            hmmer.write("""#! {}
import sys
sys.exit(127)
""".format(sysconfig.sys.executable))
        try:
            os.chmod(hmmer.name, 0o755)
            self.cfg._options['hmmer'] = hmmer.name
            model = Model("foo/T2SS", 10)

            gene_name = 'abc'
            c_gene = CoreGene(self.model_location, gene_name,
                              self.profile_factory)
            gene = ModelGene(c_gene, model)

            path = self.model_location.get_profile("abc", )
            profile = Profile(gene, self.cfg, path)
            with self.catch_log():
                with self.assertRaisesRegex(
                        RuntimeError, "an error occurred during Hmmer "
                        "execution: command = .* : return code = 127 .*"
                ) as ctx:
                    profile.execute()

        finally:
            try:
                os.unlink(fake_hmmer)
            except Exception:
                pass

Example #7

Show file

File: test_functional_test.py Project: karubiotools/macsyfinder

class Test(MacsyTest):
    def setUp(self):
        self.tmp_dir = tempfile.gettempdir()
        # reset AbstractSetOfHits internal id to have predictable results (Systems, ...) id
        # it's works only if there is only one replicon
        # for gembase the order is not guarantee

        AbstractSetOfHits._id = itertools.count(1)
        self.all_systems_tsv = "all_systems.tsv"
        self.all_systems_txt = "all_systems.txt"
        self.all_best_solutions = "all_best_solutions.tsv"
        self.best_solution = "best_solution.tsv"
        self.rejected_clusters = "rejected_clusters.txt"
        self.uncomplete_systems = "uncomplete_systems.txt"

    def tearDown(self):
        try:
            pass
            # self.out_dir is set in self._macsyfinder_run
            shutil.rmtree(self.out_dir)
        except:
            pass

    @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH')
    def test_gembase(self):
        """

        """
        expected_result_dir = self.find_data("functional_test_gembase")
        args = "--db-type=gembase " \
               f"--models-dir={self.find_data('models')} " \
               "--models TFF-SF Archaeal-T4P ComM MSH T2SS T4bP T4P Tad " \
               "--out-dir={out_dir} " \
               f"--previous-run {expected_result_dir} " \
               "--relative-path"

        self._macsyfinder_run(args)
        for file_name in (self.all_systems_tsv, self.all_best_solutions,
                          self.best_solution):
            with self.subTest(file_name=file_name):
                expected_result = self.find_data(expected_result_dir,
                                                 file_name)
                get_results = os.path.join(self.out_dir, file_name)
                self.assertTsvEqual(expected_result, get_results, comment="#")
        expected_result = self.find_data(expected_result_dir,
                                         self.rejected_clusters)
        get_results = os.path.join(self.out_dir, self.rejected_clusters)
        self.assertFileEqual(expected_result, get_results, comment="#")

    def test_only_loners(self):
        expected_result_dir = self.find_data("functional_tests_only_loners")
        args = "--db-type ordered_replicon " \
               "--replicon-topology linear  " \
               f"--models-dir {self.find_data('models')} " \
               "-m test_loners MOB_cf_T5SS " \
               "-o {out_dir} " \
               f"--previous-run {expected_result_dir} " \
               "--relative-path"
        self._macsyfinder_run(args)

        for file_name in (self.all_systems_tsv, self.all_best_solutions,
                          self.best_solution, self.rejected_clusters):
            with self.subTest(file_name=file_name):
                expected_result = self.find_data(expected_result_dir,
                                                 file_name)
                get_results = os.path.join(self.out_dir, file_name)
                self.assertFileEqual(expected_result, get_results, comment="#")

    def test_ordered_circular(self):
        # genetic organization of test_3.fasta
        # gene       abc    mfp    omf    omf    abc    gspd
        # gene id   01397  01398  01548  01562  01399  01400
        # pos        8      9      19     27     37     38
        # clst                 ]               [
        # syst (abc,37),  (gspd, 38), (abc,2), (mfp,3)

        expected_result_dir = self.find_data(
            "functional_test_ordered_circular")
        # TODO how to specify multi_loci = false when multi_loci =True is set in xml
        args = "--db-type ordered_replicon " \
               "--replicon-topology circular  " \
               f"--models-dir {self.find_data('models')} " \
               "-m functional T12SS-simple-exch " \
               "-o {out_dir} " \
               f"--previous-run {expected_result_dir} " \
               "--relative-path"
        self._macsyfinder_run(args)

        for file_name in (self.all_systems_tsv, self.all_best_solutions,
                          self.best_solution, self.rejected_clusters):
            with self.subTest(file_name=file_name):
                expected_result = self.find_data(expected_result_dir,
                                                 file_name)
                get_results = os.path.join(self.out_dir, file_name)
                self.assertFileEqual(expected_result, get_results, comment="#")

    def test_ordered_linear(self):
        # genetic organization of test_3.fasta
        # gene       abc    mfp    omf    omf    abc    gspd
        # gene id   01397  01398  01548  01562  01399  01400
        # pos        8      9      19     27     37     38
        # clst    [            ]               [           ]
        # syst  no system

        expected_result_dir = self.find_data("functional_test_ordered_linear")
        # TODO how to specify multi_loci = false when multi_loci =True is set in xml
        args = "--db-type ordered_replicon " \
               "--replicon-topology linear  " \
               f"--models-dir {self.find_data('models')} " \
               "-m functional T12SS-simple-exch " \
               "-o {out_dir} " \
               f"--previous-run {expected_result_dir} " \
               "--relative-path"
        self._macsyfinder_run(args)

        for file_name in (self.all_systems_tsv, self.all_best_solutions,
                          self.best_solution, self.rejected_clusters):
            with self.subTest(file_name=file_name):
                expected_result = self.find_data(expected_result_dir,
                                                 file_name)
                get_results = os.path.join(self.out_dir, file_name)
                self.assertFileEqual(expected_result, get_results, comment="#")

    def test_ordered_multi_system(self):
        # genetic organization of test_1.fasta
        #
        # gene       omf    mfp    abc    mfp    abc    gspd   omf    omf    omf
        # gene id   01360  01361  01397  01398  01399  01400  01506  01548  01562
        # pos         2      3     11     12     13      14    23     32     46
        # clst      [         ]   [                        ]  [  ]   [  ]   [  ]
        # syst                    [abc    mfp    abc    gspd   omf    omf    omf]

        expected_result_dir = self.find_data(
            "functional_test_ordered_multi_system")
        # TODO how to specify multi_loci = false when multi_loci =True is set in xml
        args = "--db-type ordered_replicon " \
               f"--models-dir {self.find_data('models')} " \
               "-m functional T12SS-multi-syst-exch " \
               "-o {out_dir} " \
               f"--previous-run {expected_result_dir} " \
               "--relative-path"
        self._macsyfinder_run(args)

        for file_name in (self.all_systems_tsv, self.all_best_solutions,
                          self.best_solution, self.rejected_clusters):
            with self.subTest(file_name=file_name):
                expected_result = self.find_data(expected_result_dir,
                                                 file_name)
                get_results = os.path.join(self.out_dir, file_name)
                self.assertFileEqual(expected_result, get_results, comment="#")

    def test_ordered_multi_system_loner_in_clust(self):
        # genetic organization of test_2.fasta
        #
        # gene       abc    mfp    abc    gspd   omf    omf    omf
        # gene id   01397  01398  01399  01400  01506  01548  01562
        # pos        8      9      10      11    13     29     43
        # clst     [                               ]   [  ]   [  ]
        # syst     [abc    mfp    abc    gspd   omf]

        expected_result_dir = self.find_data(
            "functional_test_ordered_multi_system_loner_in_clust")
        # TODO how to specify multi_loci = false when multi_loci =True is set in xml
        args = "--db-type ordered_replicon " \
               f"--models-dir {self.find_data('models')} " \
               "-m functional T12SS-multi-syst-exch " \
               "-o {out_dir} " \
               f"--previous-run {expected_result_dir} " \
               "--relative-path"
        self._macsyfinder_run(args)

        for file_name in (self.all_systems_tsv, self.all_best_solutions,
                          self.best_solution, self.rejected_clusters):
            with self.subTest(file_name=file_name):
                expected_result = self.find_data(expected_result_dir,
                                                 file_name)
                get_results = os.path.join(self.out_dir, file_name)
                self.assertFileEqual(expected_result, get_results, comment="#")

    def test_ordered_multi_loci(self):
        # genetic organization of test_4.fasta
        #
        # gene       abc    mfp    abc    gspd   omf    omf
        # gene id   01397  01398  01399  01400  01548  01562
        # pos        6      7      14      15    26     40
        # clst     [         ]   [           ]
        # syst    abc, mfp, abc, gspd, omf, omf

        expected_result_dir = self.find_data(
            "functional_test_ordered_multi_loci")
        args = "--db-type ordered_replicon " \
               f"--models-dir {self.find_data('models')} " \
               "-m functional T12SS-simple-exch " \
               "-o {out_dir} " \
               "--multi-loci functional/T12SS-simple-exch " \
               f"--previous-run {expected_result_dir} " \
               "--relative-path"

        self._macsyfinder_run(args)

        for file_name in (self.all_systems_tsv, self.all_best_solutions,
                          self.best_solution, self.rejected_clusters):
            with self.subTest(file_name=file_name):
                expected_result = self.find_data(expected_result_dir,
                                                 file_name)
                get_results = os.path.join(self.out_dir, file_name)
                self.assertFileEqual(expected_result, get_results, comment="#")

    def test_ordered_single_loci(self):
        # genetic organization of test_4.fasta
        #
        # gene       abc    mfp    abc    gspd   omf    omf
        # gene id   01397  01398  01399  01400  01548  01562
        # pos        6      7      14      15    26     40
        # clst     [         ]   [           ]
        # syst    no system

        expected_result_dir = self.find_data(
            "functional_test_ordered_single_loci")
        args = "--db-type ordered_replicon " \
               f"--models-dir {self.find_data('models')} " \
               "-m functional T12SS-simple-exch " \
               "-o {out_dir} " \
               f"--previous-run {expected_result_dir} " \
               "--relative-path"

        self._macsyfinder_run(args)

        for file_name in (self.all_systems_tsv, self.all_best_solutions,
                          self.best_solution, self.rejected_clusters):
            with self.subTest(file_name=file_name):
                expected_result = self.find_data(expected_result_dir,
                                                 file_name)
                get_results = os.path.join(self.out_dir, file_name)
                self.assertFileEqual(expected_result, get_results, comment="#")

    def test_unordered(self):
        # genetic organization of test_4.fasta
        #
        # gene       abc    mfp    abc    gspd   omf    omf
        # gene id   01397  01398  01399  01400  01548  01562
        # pos        6      7      14      15    26     40
        # syst    abc    mfp    abc    gspd   omf    omf
        expected_result_dir = self.find_data("functional_test_unordered")
        args = "--db-type unordered " \
               f"--models-dir {self.find_data('models')} " \
               "-m functional T12SS-simple-exch " \
               "-o {out_dir} " \
               f"--previous-run {expected_result_dir} " \
               "--relative-path"
        self._macsyfinder_run(args)

        for file_name in (self.all_systems_tsv, self.all_systems_txt,
                          self.uncomplete_systems):
            with self.subTest(file_name=file_name):
                expected_result = self.find_data(expected_result_dir,
                                                 file_name)
                get_results = os.path.join(self.out_dir, file_name)
                self.assertFileEqual(expected_result, get_results, comment="#")

    def test_working_dir_exists(self):
        args = f"--sequence-db {self.find_data('base', 'one_replicon.fasta')} " \
               "--db-type ordered_replicon " \
               f"--models-dir {self.find_data('models')} " \
               "-m functional T12SS " \
               "-o {out_dir}"

        self.out_dir = os.path.join(self.tmp_dir,
                                    'macsyfinder_working_dir_exists')
        os.makedirs(self.out_dir)
        open(os.path.join(self.out_dir, 'toto.empty'), 'w').close()

        args = args.format(out_dir=self.out_dir)
        with self.assertRaises(ValueError) as ctx:
            macsyfinder.main(args=args.split(), loglevel='ERROR')
        self.assertEqual(
            str(ctx.exception),
            f"'{self.out_dir}' already exists and is not a empty")

    def test_working_dir_exists_and_not_dir(self):
        args = f"--sequence-db {self.find_data('base', 'one_replicon.fasta')} " \
               "--db-type ordered_replicon " \
               f"--models-dir {self.find_data('models')} " \
               "-m functional T12SS " \
               "-o {out_dir} "

        self.out_dir = os.path.join(
            self.tmp_dir, 'macsyfinder_working_dir_exists_and_not_dir')
        try:
            open(self.out_dir, 'w').close()

            args = args.format(out_dir=self.out_dir)
            with self.assertRaises(ValueError) as ctx:
                macsyfinder.main(args=args.split(), loglevel='ERROR')
            self.assertEqual(
                str(ctx.exception),
                f"'{self.out_dir}' already exists and is not a directory")
        finally:
            os.unlink(self.out_dir)

    def test_no_models(self):
        args = f"--sequence-db {self.find_data('base', 'one_replicon.fasta')} " \
               "--db-type ordered_replicon " \
               f"--models-dir {self.find_data('models')} " \
               "-o {out_dir} "

        self.out_dir = os.path.join(self.tmp_dir, 'macsyfinder_no_models')
        args = args.format(out_dir=self.out_dir)
        with self.catch_io(out=True):
            with self.assertRaises(OptionError) as ctx:
                macsyfinder.main(args=args.split(), loglevel='ERROR')
        self.assertEqual(str(ctx.exception),
                         "argument --models or --previous-run is required.")

    def test_no_seq_db(self):
        args = "--db-type ordered_replicon " \
               f"--models-dir {self.find_data('models')} " \
               "-m functional T12SS " \
               "-o {out_dir} "

        self.out_dir = os.path.join(self.tmp_dir, 'macsyfinder_no_seq_db')

        args = args.format(out_dir=self.out_dir)
        with self.catch_io(out=True):
            with self.assertRaises(OptionError) as ctx:
                macsyfinder.main(args=args.split(), loglevel='ERROR')
        self.assertEqual(
            str(ctx.exception),
            "argument --sequence-db or --previous-run is required.")

    def test_no_db_type(self):
        args = f"--sequence-db {self.find_data('base', 'one_replicon.fasta')} " \
               f"--models-dir {self.find_data('models')} " \
               "-m functional T12SS " \
               "-o {out_dir} "

        self.out_dir = os.path.join(self.tmp_dir, 'macsyfinder_no_db_type')
        args = args.format(out_dir=self.out_dir)
        with self.catch_io(out=True):
            with self.assertRaises(OptionError) as ctx:
                macsyfinder.main(args=args.split(), loglevel='ERROR')
        self.assertEqual(str(ctx.exception),
                         "argument --db-type or --previous-run is required.")

    def test_model_unknown(self):
        args = f"--sequence-db {self.find_data('base', 'one_replicon.fasta')} " \
               "--db-type ordered_replicon " \
               f"--models-dir {self.find_data('models')} " \
               "-m functional Unknown_model " \
               "-o {out_dir}"

        self.out_dir = os.path.join(self.tmp_dir, 'macsyfinder_model_unkwon')
        os.makedirs(self.out_dir)

        args = args.format(out_dir=self.out_dir)
        with self.assertRaises(ValueError) as ctx:
            macsyfinder.main(args=args.split(), loglevel='ERROR')
        self.assertEqual(str(ctx.exception),
                         "Unknown_model does not match with any definitions")

    def _macsyfinder_run(self, args_tpl):
        # get the name of the calling function
        test_name = inspect.stack()[1].function
        self.out_dir = os.path.join(self.tmp_dir,
                                    'macsyfinder_{}'.format(test_name))
        os.makedirs(self.out_dir)
        args = args_tpl.format(out_dir=self.out_dir)
        # print("\n############################################")
        # print(args)
        # print("##############################################")
        macsyfinder.main(args=args.split(), loglevel='ERROR')

Example #8

Show file

class TestMacsyfinder(MacsyTest):
    def setUp(self):
        self.tmp_dir = tempfile.mkdtemp()
        AbstractSetOfHits._id = itertools.count(1)

    def tearDown(self):
        try:
            shutil.rmtree(self.tmp_dir)
        except:
            pass

    def test_list_models(self):
        cmd_args = argparse.Namespace()
        cmd_args.models_dir = os.path.join(self._data_dir, 'fake_model_dir')
        cmd_args.list_models = True
        rcv_list_models = list_models(cmd_args)
        exp_list_models = """set_1
      /def_1_1
      /def_1_2
      /def_1_3
      /def_1_4
set_2
      /level_1
              /def_1_1
              /def_1_2
              /level_2
                      /def_2_3
                      /def_2_4
"""
        self.assertEqual(exp_list_models, rcv_list_models)

    def test_systems_to_txt(self):
        system_str = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# No Systems found
"""
        f_out = StringIO()
        track_multi_systems_hit = HitSystemTracker([])
        systems_to_txt([], track_multi_systems_hit, f_out)
        self.assertMultiLineEqual(system_str, f_out.getvalue())

        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        cfg = Config(MacsyDefaults(), args)

        model_name = 'foo'
        models_location = ModelLocation(
            path=os.path.join(args.models_dir, model_name))

        # we need to reset the ProfileFactory
        # because it's a like a singleton
        # so other tests are influenced by ProfileFactory and it's configuration
        # for instance search_genes get profile without hmmer_exe
        profile_factory = ProfileFactory(cfg)

        model = Model("foo/T2SS", 10)
        # test if id is well incremented
        gene_name = "gspD"
        c_gene_gspd = CoreGene(models_location, gene_name, profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        gene_name = "sctJ"
        c_gene_sctj = CoreGene(models_location, gene_name, profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)

        hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        system_1 = System(model, [
            Cluster([v_hit_1, v_hit_2], model, HitWeight(**cfg.hit_weights()))
        ], cfg.redundancy_penalty())

        system_str = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# Systems found:

system id = replicon_id_T2SS_{next(System._id) - 1}
model = foo/T2SS
replicon = replicon_id
clusters = [('hit_1', 'gspD', 1), ('hit_2', 'sctJ', 1)]
occ = 1
wholeness = 1.000
loci nb = 1
score = 1.500

mandatory genes:
\t- gspD: 1 (gspD)

accessory genes:
\t- sctJ: 1 (sctJ)

neutral genes:

============================================================
"""

        f_out = StringIO()
        track_multi_systems_hit = HitSystemTracker([system_1])
        systems_to_txt([system_1], track_multi_systems_hit, f_out)
        self.assertMultiLineEqual(system_str, f_out.getvalue())

    def test_systems_to_tsv(self):
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        cfg = Config(MacsyDefaults(), args)

        model_name = 'foo'
        models_location = ModelLocation(
            path=os.path.join(args.models_dir, model_name))

        # we need to reset the ProfileFactory
        # because it's a like a singleton
        # so other tests are influenced by ProfileFactory and it's configuration
        # for instance search_genes get profile without hmmer_exe
        profile_factory = ProfileFactory(cfg)

        model = Model("foo/T2SS", 10)
        gene_name = "gspD"
        c_gene_gspd = CoreGene(models_location, gene_name, profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        gene_name = "sctJ"
        c_gene_sctj = CoreGene(models_location, gene_name, profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)

        hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = Hit(c_gene_sctj, "hit_2", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        system_1 = System(model, [
            Cluster([v_hit_1, v_hit_2], model, HitWeight(**cfg.hit_weights()))
        ], cfg.redundancy_penalty())

        system_tsv = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# Systems found:
"""
        system_tsv += "\t".join([
            "replicon", "hit_id", "gene_name", "hit_pos", "model_fqn",
            "sys_id", "sys_loci", "sys_wholeness", "sys_score", "sys_occ",
            "hit_gene_ref", "hit_status", "hit_seq_len", "hit_i_eval",
            "hit_score", "hit_profile_cov", "hit_seq_cov", "hit_begin_match",
            "hit_end_match", "used_in"
        ])
        system_tsv += "\n"
        system_tsv += "\t".join([
            "replicon_id", "hit_1", "gspD", "1", "foo/T2SS", system_1.id, "1",
            "1.000", "1.500", "1", "gspD", "mandatory", "803", "1.0", "1.000",
            "1.000", "1.000", "10", "20", ""
        ])
        system_tsv += "\n"
        system_tsv += "\t".join([
            "replicon_id", "hit_2", "sctJ", "1", "foo/T2SS", system_1.id, "1",
            "1.000", "1.500", "1", "sctJ", "accessory", "803", "1.0", "1.000",
            "1.000", "1.000", "10", "20", ""
        ])
        system_tsv += "\n\n"

        f_out = StringIO()
        track_multi_systems_hit = HitSystemTracker([system_1])
        systems_to_tsv([system_1], track_multi_systems_hit, f_out)
        self.assertMultiLineEqual(system_tsv, f_out.getvalue())

        # test No system found
        system_str = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# No Systems found
"""
        f_out = StringIO()
        track_multi_systems_hit = HitSystemTracker([])
        systems_to_tsv([], track_multi_systems_hit, f_out)
        self.assertMultiLineEqual(system_str, f_out.getvalue())

    def test_solutions_to_tsv(self):
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        cfg = Config(MacsyDefaults(), args)
        model_name = 'foo'
        models_location = ModelLocation(
            path=os.path.join(args.models_dir, model_name))

        # we need to reset the ProfileFactory
        # because it's a like a singleton
        # so other tests are influenced by ProfileFactory and it's configuration
        # for instance search_genes get profile without hmmer_exe
        profile_factory = ProfileFactory(cfg)

        model_A = Model("foo/A", 10)
        model_B = Model("foo/B", 10)
        model_C = Model("foo/C", 10)

        c_gene_sctn_flg = CoreGene(models_location, "sctN_FLG",
                                   profile_factory)
        gene_sctn_flg = ModelGene(c_gene_sctn_flg, model_B)
        c_gene_sctj_flg = CoreGene(models_location, "sctJ_FLG",
                                   profile_factory)
        gene_sctj_flg = ModelGene(c_gene_sctj_flg, model_B)
        c_gene_flgB = CoreGene(models_location, "flgB", profile_factory)
        gene_flgB = ModelGene(c_gene_flgB, model_B)
        c_gene_tadZ = CoreGene(models_location, "tadZ", profile_factory)
        gene_tadZ = ModelGene(c_gene_tadZ, model_B)

        c_gene_sctn = CoreGene(models_location, "sctN", profile_factory)
        gene_sctn = ModelGene(c_gene_sctn, model_A)
        gene_sctn_hom = Exchangeable(c_gene_sctn_flg, gene_sctn)
        gene_sctn.add_exchangeable(gene_sctn_hom)

        c_gene_sctj = CoreGene(models_location, "sctJ", profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model_A)
        gene_sctj_an = Exchangeable(c_gene_sctj_flg, gene_sctj)
        gene_sctj.add_exchangeable(gene_sctj_an)

        c_gene_gspd = CoreGene(models_location, "gspD", profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model_A)
        gene_gspd_an = Exchangeable(c_gene_flgB, gene_gspd)
        gene_gspd.add_exchangeable(gene_gspd_an)

        c_gene_abc = CoreGene(models_location, "abc", profile_factory)
        gene_abc = ModelGene(c_gene_abc, model_A)
        gene_abc_ho = Exchangeable(c_gene_tadZ, gene_abc)
        gene_abc.add_exchangeable(gene_abc_ho)

        model_A.add_mandatory_gene(gene_sctn)
        model_A.add_mandatory_gene(gene_sctj)
        model_A.add_accessory_gene(gene_gspd)
        model_A.add_forbidden_gene(gene_abc)

        model_B.add_mandatory_gene(gene_sctn_flg)
        model_B.add_mandatory_gene(gene_sctj_flg)
        model_B.add_accessory_gene(gene_flgB)
        model_B.add_accessory_gene(gene_tadZ)

        model_C.add_mandatory_gene(gene_sctn_flg)
        model_C.add_mandatory_gene(gene_sctj_flg)
        model_C.add_mandatory_gene(gene_flgB)
        model_C.add_accessory_gene(gene_tadZ)
        model_C.add_accessory_gene(gene_gspd)

        h_sctj = Hit(c_gene_sctj, "hit_sctj", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_sctn = Hit(c_gene_sctn, "hit_sctn", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_gspd = Hit(c_gene_gspd, "hit_gspd", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)

        h_sctj_flg = Hit(c_gene_sctj_flg, "hit_sctj_flg", 803, "replicon_id",
                         1, 1.0, 1.0, 1.0, 1.0, 10, 20)
        h_flgB = Hit(c_gene_flgB, "hit_flgB", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)
        h_tadZ = Hit(c_gene_tadZ, "hit_tadZ", 803, "replicon_id", 1, 1.0, 1.0,
                     1.0, 1.0, 10, 20)

        model_A._min_mandatory_genes_required = 2
        model_A._min_genes_required = 2
        hit_weights = HitWeight(**cfg.hit_weights())
        c1 = Cluster([
            ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY),
            ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
        ], model_A, hit_weights)

        c2 = Cluster([
            ValidHit(h_sctj, gene_sctj, GeneStatus.MANDATORY),
            ValidHit(h_sctn, gene_sctn, GeneStatus.MANDATORY)
        ], model_A, hit_weights)

        model_B._min_mandatory_genes_required = 1
        model_B._min_genes_required = 2
        c3 = Cluster([
            ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY),
            ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY),
            ValidHit(h_flgB, gene_flgB, GeneStatus.ACCESSORY)
        ], model_B, hit_weights)

        model_C._min_mandatory_genes_required = 1
        model_C._min_genes_required = 2
        c4 = Cluster([
            ValidHit(h_sctj_flg, gene_sctj_flg, GeneStatus.MANDATORY),
            ValidHit(h_tadZ, gene_tadZ, GeneStatus.ACCESSORY),
            ValidHit(h_flgB, gene_flgB, GeneStatus.MANDATORY),
            ValidHit(h_gspd, gene_gspd, GeneStatus.ACCESSORY)
        ], model_C, hit_weights)

        sys_A = System(model_A, [c1, c2], cfg.redundancy_penalty())
        sys_A.id = "sys_id_A"
        sys_B = System(model_B, [c3], cfg.redundancy_penalty())
        sys_B.id = "sys_id_B"
        sys_C = System(model_C, [c4], cfg.redundancy_penalty())
        sys_C.id = "sys_id_C"

        sol_1 = [sys_A, sys_B]
        sol_2 = [sys_A, sys_C]
        sol_id_1 = '1'
        sol_id_2 = '2'

        sol_tsv = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# Systems found:
"""
        sol_tsv += "\t".join([
            "sol_id", "replicon", "hit_id", "gene_name", "hit_pos",
            "model_fqn", "sys_id", "sys_loci", "sys_wholeness", "sys_score",
            "sys_occ", "hit_gene_ref", "hit_status", "hit_seq_len",
            "hit_i_eval", "hit_score", "hit_profile_cov", "hit_seq_cov",
            "hit_begin_match", "hit_end_match", "used_in"
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'gspD', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'sctJ_FLG', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_tadZ', 'tadZ', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'tadZ', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_1, 'replicon_id', 'hit_flgB', 'flgB', '1', 'foo/B',
            'sys_id_B', '1', '0.750', '2.000', '1', 'flgB', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'gspD', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_sctj', 'sctJ', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctJ', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_sctn', 'sctN', '1', 'foo/A',
            'sys_id_A', '2', '1.000', '1.500', '2', 'sctN', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', ''
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_sctj_flg', 'sctJ_FLG', '1', 'foo/C',
            'sys_id_C', '1', '0.800', '3.000', '1', 'sctJ_FLG', 'mandatory',
            '803', '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_B'
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_tadZ', 'tadZ', '1', 'foo/C',
            'sys_id_C', '1', '0.800', '3.000', '1', 'tadZ', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_B'
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_flgB', 'flgB', '1', 'foo/C',
            'sys_id_C', '1', '0.800', '3.000', '1', 'flgB', 'mandatory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_B'
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            sol_id_2, 'replicon_id', 'hit_gspd', 'gspD', '1', 'foo/C',
            'sys_id_C', '1', '0.800', '3.000', '1', 'gspD', 'accessory', '803',
            '1.0', '1.000', '1.000', '1.000', '10', '20', 'sys_id_A'
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"

        f_out = StringIO()
        hit_multi_sys_tracker = HitSystemTracker([sys_A, sys_B])
        solutions_to_tsv([sol_1, sol_2], hit_multi_sys_tracker, f_out)
        self.assertMultiLineEqual(sol_tsv, f_out.getvalue())

    def test_rejected_clst_to_txt(self):
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.res_search_dir = "blabla"

        cfg = Config(MacsyDefaults(), args)
        model_name = 'foo'
        models_location = ModelLocation(
            path=os.path.join(args.models_dir, model_name))
        profile_factory = ProfileFactory(cfg)

        model = Model("foo/T2SS", 11)

        gene_name = "gspD"
        c_gene_gspd = CoreGene(models_location, gene_name, profile_factory)
        gene_1 = ModelGene(c_gene_gspd, model)
        gene_name = "sctC"
        c_gene_sctc = CoreGene(models_location, gene_name, profile_factory)
        gene_2 = ModelGene(c_gene_sctc, model)
        model.add_mandatory_gene(gene_1)
        model.add_accessory_gene(gene_2)

        #     Hit(gene, model, hit_id, hit_seq_length, replicon_name, position, i_eval, score,
        #         profile_coverage, sequence_coverage, begin_match, end_match
        h10 = Hit(c_gene_gspd, "h10", 10, "replicon_1", 10, 1.0, 10.0, 1.0,
                  1.0, 10, 20)
        v_h10 = ValidHit(h10, gene_1, GeneStatus.MANDATORY)
        h20 = Hit(c_gene_sctc, "h20", 10, "replicon_1", 20, 1.0, 20.0, 1.0,
                  1.0, 10, 20)
        v_h20 = ValidHit(h20, gene_2, GeneStatus.ACCESSORY)
        h40 = Hit(c_gene_gspd, "h10", 10, "replicon_1", 40, 1.0, 10.0, 1.0,
                  1.0, 10, 20)
        v_h40 = ValidHit(h40, gene_1, GeneStatus.MANDATORY)
        h50 = Hit(c_gene_sctc, "h20", 10, "replicon_1", 50, 1.0, 20.0, 1.0,
                  1.0, 10, 20)
        v_h50 = ValidHit(h50, gene_2, GeneStatus.ACCESSORY)
        hit_weights = HitWeight(**cfg.hit_weights())
        c1 = Cluster([v_h10, v_h20], model, hit_weights)
        c2 = Cluster([v_h40, v_h50], model, hit_weights)
        r_c = RejectedClusters(model, [c1, c2],
                               ["The reasons to reject this clusters"])

        rej_clst_str = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# Rejected clusters:

Cluster:
- model = T2SS
- replicon = replicon_1
- hits = (h10, gspD, 10), (h20, sctC, 20)
Cluster:
- model = T2SS
- replicon = replicon_1
- hits = (h10, gspD, 40), (h20, sctC, 50)
These clusters have been rejected because:
\t- The reasons to reject this clusters
============================================================
"""

        f_out = StringIO()
        rejected_clst_to_txt([r_c], f_out)
        self.maxDiff = None
        self.assertMultiLineEqual(rej_clst_str, f_out.getvalue())

        rej_clst_str = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# No Rejected clusters
"""
        f_out = StringIO()
        rejected_clst_to_txt([], f_out)
        self.assertMultiLineEqual(rej_clst_str, f_out.getvalue())

    def test_likely_systems_to_txt(self):
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'unordered'
        args.models_dir = self.find_data('models')
        cfg = Config(MacsyDefaults(), args)

        model_name = 'foo'
        models_location = ModelLocation(
            path=os.path.join(args.models_dir, model_name))

        # we need to reset the ProfileFactory
        # because it's a like a singleton
        # so other tests are influenced by ProfileFactory and it's configuration
        # for instance search_genes get profile without hmmer_exe
        profile_factory = ProfileFactory(cfg)

        model = Model("foo/T2SS", 10)
        # test if id is well incremented
        gene_name = "gspD"
        c_gene_gspd = CoreGene(models_location, gene_name, profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        gene_name = "sctJ"
        c_gene_sctj = CoreGene(models_location, gene_name, profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        gene_name = "sctC"
        c_gene_sctc = CoreGene(models_location, gene_name, profile_factory)
        gene_sctc = ModelGene(c_gene_sctc, model)
        model.add_neutral_gene(gene_sctc)
        gene_name = "tadZ"
        c_gene_tadz = CoreGene(models_location, gene_name, profile_factory)
        gene_tadz = ModelGene(c_gene_tadz, model)
        model.add_forbidden_gene(gene_tadz)

        hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = Hit(c_gene_sctj, "hit_2", 804, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        hit_3 = Hit(c_gene_sctc, "hit_3", 805, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_3 = ValidHit(hit_3, gene_sctc, GeneStatus.NEUTRAL)
        hit_4 = Hit(c_gene_tadz, "hit_4", 806, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_4 = ValidHit(hit_4, gene_tadz, GeneStatus.FORBIDDEN)

        system_1 = LikelySystem(model, [v_hit_1], [v_hit_2], [v_hit_3],
                                [v_hit_4])

        system_str = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# Systems found:

This replicon contains genetic materials needed for system foo/T2SS
WARNING there quorum is reached but there is also some forbidden genes.

system id = replicon_id_T2SS_1
model = foo/T2SS
replicon = replicon_id
hits = [('hit_1', 'gspD', 1), ('hit_2', 'sctJ', 1), ('hit_3', 'sctC', 1), ('hit_4', 'tadZ', 1)]
wholeness = 1.000

mandatory genes:
\t- gspD: 1 (gspD)

accessory genes:
\t- sctJ: 1 (sctJ)

neutral genes:
\t- sctC: 1 (sctC)

forbidden genes:
\t- tadZ: 1 (tadZ)

Use ordered replicon to have better prediction.

"""

        f_out = StringIO()
        track_multi_systems_hit = HitSystemTracker([system_1])
        likely_systems_to_txt([system_1], track_multi_systems_hit, f_out)
        self.assertMultiLineEqual(system_str, f_out.getvalue())

        f_out = StringIO()
        likely_systems_to_txt([], track_multi_systems_hit, f_out)
        expected_out = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# No Likely Systems found
"""
        self.assertEqual(expected_out, f_out.getvalue())

    def test_likely_systems_to_tsv(self):
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'unordered'
        args.models_dir = self.find_data('models')
        cfg = Config(MacsyDefaults(), args)

        model_name = 'foo'
        models_location = ModelLocation(
            path=os.path.join(args.models_dir, model_name))

        # we need to reset the ProfileFactory
        # because it's a like a singleton
        # so other tests are influenced by ProfileFactory and it's configuration
        # for instance search_genes get profile without hmmer_exe
        profile_factory = ProfileFactory(cfg)

        model = Model("foo/T2SS", 10)
        # test if id is well incremented
        gene_name = "gspD"
        c_gene_gspd = CoreGene(models_location, gene_name, profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        gene_name = "sctJ"
        c_gene_sctj = CoreGene(models_location, gene_name, profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        gene_name = "sctC"
        c_gene_sctc = CoreGene(models_location, gene_name, profile_factory)
        gene_sctc = ModelGene(c_gene_sctc, model)
        model.add_neutral_gene(gene_sctc)
        gene_name = "tadZ"
        c_gene_tadz = CoreGene(models_location, gene_name, profile_factory)
        gene_tadz = ModelGene(c_gene_tadz, model)
        model.add_forbidden_gene(gene_tadz)

        hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = Hit(c_gene_sctj, "hit_2", 804, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        hit_3 = Hit(c_gene_sctc, "hit_3", 805, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_3 = ValidHit(hit_3, gene_sctc, GeneStatus.NEUTRAL)
        hit_4 = Hit(c_gene_tadz, "hit_4", 806, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_4 = ValidHit(hit_4, gene_tadz, GeneStatus.FORBIDDEN)

        system_1 = LikelySystem(model, [v_hit_1], [v_hit_2], [v_hit_3],
                                [v_hit_4])

        sol_tsv = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# Likely Systems found:"""
        sol_tsv += "\n\n"
        sol_tsv += "\t".join([
            "replicon", "hit_id", "gene_name", "hit_pos", "model_fqn",
            "sys_id", "sys_wholeness", "hit_gene_ref", "hit_status",
            "hit_seq_len", "hit_i_eval", "hit_score", "hit_profile_cov",
            "hit_seq_cov", "hit_begin_match", "hit_end_match", "used_in"
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            "replicon_id", "hit_1", "gspD", "1", "foo/T2SS",
            "replicon_id_T2SS_1", "1.000", "gspD", "mandatory", "803", "1.0",
            "1.000", "1.000", "1.000", "10", "20", ""
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            "replicon_id", "hit_2", "sctJ", "1", "foo/T2SS",
            "replicon_id_T2SS_1", "1.000", "sctJ", "accessory", "804", "1.0",
            "1.000", "1.000", "1.000", "10", "20", ""
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            "replicon_id", "hit_4", "tadZ", "1", "foo/T2SS",
            "replicon_id_T2SS_1", "1.000", "tadZ", "forbidden", "806", "1.0",
            "1.000", "1.000", "1.000", "10", "20", ""
        ])
        sol_tsv += "\n"
        sol_tsv += '\t'.join([
            "replicon_id", "hit_3", "sctC", "1", "foo/T2SS",
            "replicon_id_T2SS_1", "1.000", "sctC", "neutral", "805", "1.0",
            "1.000", "1.000", "1.000", "10", "20", ""
        ])
        sol_tsv += "\n"
        sol_tsv += "\n"

        f_out = StringIO()
        track_multi_systems_hit = HitSystemTracker([system_1])
        likely_systems_to_tsv([system_1], track_multi_systems_hit, f_out)
        self.assertMultiLineEqual(sol_tsv, f_out.getvalue())

        f_out = StringIO()
        likely_systems_to_tsv([], track_multi_systems_hit, f_out)
        expected_out = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# No Likely Systems found
"""
        self.assertEqual(expected_out, f_out.getvalue())

    def test_unnlikely_systems_to_txt(self):
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'unordered'
        args.models_dir = self.find_data('models')
        cfg = Config(MacsyDefaults(), args)

        model_name = 'foo'
        models_location = ModelLocation(
            path=os.path.join(args.models_dir, model_name))

        # we need to reset the ProfileFactory
        # because it's a like a singleton
        # so other tests are influenced by ProfileFactory and it's configuration
        # for instance search_genes get profile without hmmer_exe
        profile_factory = ProfileFactory(cfg)

        model = Model("foo/T2SS", 10)
        # test if id is well incremented
        gene_name = "gspD"
        c_gene_gspd = CoreGene(models_location, gene_name, profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model)
        model.add_mandatory_gene(gene_gspd)
        gene_name = "sctJ"
        c_gene_sctj = CoreGene(models_location, gene_name, profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model)
        model.add_accessory_gene(gene_sctj)
        gene_name = "sctC"
        c_gene_sctc = CoreGene(models_location, gene_name, profile_factory)
        gene_sctc = ModelGene(c_gene_sctc, model)
        model.add_neutral_gene(gene_sctc)
        gene_name = "tadZ"
        c_gene_tadz = CoreGene(models_location, gene_name, profile_factory)
        gene_tadz = ModelGene(c_gene_tadz, model)
        model.add_forbidden_gene(gene_tadz)

        hit_1 = Hit(c_gene_gspd, "hit_1", 803, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_1 = ValidHit(hit_1, gene_gspd, GeneStatus.MANDATORY)
        hit_2 = Hit(c_gene_sctj, "hit_2", 804, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_2 = ValidHit(hit_2, gene_sctj, GeneStatus.ACCESSORY)
        hit_3 = Hit(c_gene_sctc, "hit_3", 805, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_3 = ValidHit(hit_3, gene_sctc, GeneStatus.NEUTRAL)
        hit_4 = Hit(c_gene_tadz, "hit_4", 806, "replicon_id", 1, 1.0, 1.0, 1.0,
                    1.0, 10, 20)
        v_hit_4 = ValidHit(hit_4, gene_tadz, GeneStatus.FORBIDDEN)
        reason = "why it not a system"
        system_1 = UnlikelySystem(model, [v_hit_1], [v_hit_2], [v_hit_3],
                                  [v_hit_4], reason)

        exp_txt = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# Unlikely Systems found:

This replicon probably not contains a system foo/T2SS:
{reason}

system id = replicon_id_T2SS_1
model = foo/T2SS
replicon = replicon_id
hits = [('hit_1', 'gspD', 1), ('hit_2', 'sctJ', 1), ('hit_3', 'sctC', 1), ('hit_4', 'tadZ', 1)]
wholeness = 1.000

mandatory genes:
\t- gspD: 1 (gspD)

accessory genes:
\t- sctJ: 1 (sctJ)

neutral genes:
\t- sctC: 1 (sctC)

forbidden genes:
\t- tadZ: 1 (tadZ)

Use ordered replicon to have better prediction.

============================================================
"""

        f_out = StringIO()
        unlikely_systems_to_txt([system_1], f_out)
        self.assertMultiLineEqual(exp_txt, f_out.getvalue())

        f_out = StringIO()
        unlikely_systems_to_txt([], f_out)
        expected_out = f"""# macsyfinder {macsypy.__version__}
# {' '.join(sys.argv)}
# No Unlikely Systems found
"""
        self.assertEqual(expected_out, f_out.getvalue())

    def test_parse_args(self):
        command_line = "macsyfinder --sequence-db test_1.fasta --db-type=gembase --models-dir data/models/ " \
                       "--models functional all -w 4 --out test_1-all"
        parser, args = parse_args(command_line.split()[1:])
        self.assertIsNone(args.cfg_file)
        self.assertIsNone(args.coverage_profile)
        self.assertIsNone(args.hmmer)
        self.assertIsNone(args.i_evalue_sel)
        self.assertIsNone(args.inter_gene_max_space)
        self.assertIsNone(args.max_nb_genes)
        self.assertIsNone(args.min_genes_required)
        self.assertIsNone(args.min_mandatory_genes_required)
        self.assertIsNone(args.multi_loci)
        self.assertIsNone(args.previous_run)
        self.assertIsNone(args.profile_suffix)
        self.assertIsNone(args.replicon_topology)
        self.assertIsNone(args.res_extract_suffix)
        self.assertIsNone(args.res_search_suffix)
        self.assertIsNone(args.topology_file)
        self.assertFalse(args.idx)
        self.assertFalse(args.list_models)
        self.assertFalse(args.mute)
        self.assertFalse(args.relative_path)
        self.assertEqual(args.db_type, 'gembase')
        self.assertEqual(args.models_dir, 'data/models/')
        self.assertEqual(args.out_dir, 'test_1-all')
        self.assertEqual(args.sequence_db, 'test_1.fasta')
        self.assertEqual(args.verbosity, 0)
        self.assertEqual(args.worker, 4)

        self.assertListEqual(args.models, [['functional', 'all']])

        command_line = "macsyfinder --sequence-db test_!.fasta " \
                       "--db-type=ordered_replicon --models-dir data/models/ " \
                       "--models functional all -w 4 --out test_1-all " \
                       "--mute --multi-loci TXSscan/T2SS,TXSScan/T3SS --relative-path"
        parser, args = parse_args(command_line.split()[1:])
        self.assertEqual(args.db_type, 'ordered_replicon')
        self.assertEqual(args.multi_loci, "TXSscan/T2SS,TXSScan/T3SS")
        self.assertTrue(args.relative_path)
        self.assertTrue(args.mute)

        command_line = "macsyfinder --sequence-db test_1.dasta " \
                       "--db-type=ordered_replicon --models-dir data/models/ " \
                       "--i-evalue-sel=0.5 " \
                       "--min-genes-required TXSScan/T2SS 15 --min-genes-required TXSScan/Flagellum 10"
        parser, args = parse_args(command_line.split()[1:])
        self.assertEqual(args.i_evalue_sel, 0.5)
        self.assertListEqual(
            args.min_genes_required,
            [['TXSScan/T2SS', '15'], ['TXSScan/Flagellum', '10']])

    @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH')
    def test_search_systems(self):
        logger = logging.getLogger('macsypy.macsyfinder')
        macsypy.logger_set_level(level='ERROR')
        defaults = MacsyDefaults()

        out_dir = os.path.join(self.tmp_dir, 'macsyfinder_test_search_systems')
        os.mkdir(out_dir)

        # test gembase replicon
        seq_db = self.find_data('base', 'VICH001.B.00001.C001.prt')
        model_dir = self.find_data('data_set', 'models')
        args = f"--sequence-db {seq_db} --db-type=gembase --models-dir {model_dir} --models set_1 all -w 4 -o {out_dir}"

        _, parsed_args = parse_args(args.split())
        config = Config(defaults, parsed_args)
        model_bank = ModelBank()
        gene_bank = GeneBank()
        profile_factory = ProfileFactory(config)

        systems, rejected_clst = search_systems(config, model_bank, gene_bank,
                                                profile_factory, logger)
        expected_sys_id = [
            'VICH001.B.00001.C001_MSH_5', 'VICH001.B.00001.C001_MSH_7',
            'VICH001.B.00001.C001_T4P_25', 'VICH001.B.00001.C001_T4P_23',
            'VICH001.B.00001.C001_T4P_21', 'VICH001.B.00001.C001_T4P_22',
            'VICH001.B.00001.C001_T4P_17', 'VICH001.B.00001.C001_T4P_16',
            'VICH001.B.00001.C001_T4bP_26', 'VICH001.B.00001.C001_T4P_24',
            'VICH001.B.00001.C001_T4P_18', 'VICH001.B.00001.C001_T4P_19',
            'VICH001.B.00001.C001_T4P_20', 'VICH001.B.00001.C001_T2SS_10',
            'VICH001.B.00001.C001_T2SS_9'
        ]
        self.assertListEqual([s.id for s in systems], expected_sys_id)

        expected_scores = [
            10.5, 10.0, 12.0, 9.5, 9.0, 8.5, 6.0, 5.0, 5.5, 10.5, 7.5, 7.0,
            8.0, 8.3, 7.5
        ]
        self.assertListEqual([s.score for s in systems], expected_scores)
        self.assertEqual(len(rejected_clst), 11)

        # test hits but No Systems
        args = f"--sequence-db {seq_db} --db-type=gembase --models-dir {model_dir} --models set_1 Tad -w 4 -o {out_dir}"
        _, parsed_args = parse_args(args.split())
        config = Config(defaults, parsed_args)
        model_bank = ModelBank()
        gene_bank = GeneBank()
        profile_factory = ProfileFactory(config)
        systems, rejected_clst = search_systems(config, model_bank, gene_bank,
                                                profile_factory, logger)
        self.assertEqual(systems, [])

        # test No hits
        seq_db = self.find_data('base', 'test_1.fasta')
        args = f"--sequence-db {seq_db} --db-type=gembase --models-dir {model_dir} --models set_1 T4bP -w 4 -o {out_dir}"
        _, parsed_args = parse_args(args.split())
        config = Config(defaults, parsed_args)
        model_bank = ModelBank()
        gene_bank = GeneBank()
        profile_factory = ProfileFactory(config)
        systems, rejected_clst = search_systems(config, model_bank, gene_bank,
                                                profile_factory, logger)
        self.assertEqual(systems, [])
        self.assertEqual(rejected_clst, [])

    def test_search_systems_unordered(self):
        logger = logging.getLogger('macsypy.macsyfinder')
        macsypy.logger_set_level(level='ERROR')
        defaults = MacsyDefaults()

        out_dir = os.path.join(self.tmp_dir, 'macsyfinder_test_search_systems')
        os.mkdir(out_dir)
        seq_db = self.find_data('base', 'VICH001.B.00001.C001.prt')
        model_dir = self.find_data('data_set', 'models')
        # test unordered replicon
        args = f"--sequence-db {seq_db} --db-type=unordered --models-dir {model_dir} --models set_1 all -w 4 -o {out_dir}"

        _, parsed_args = parse_args(args.split())
        config = Config(defaults, parsed_args)
        model_bank = ModelBank()
        gene_bank = GeneBank()
        profile_factory = ProfileFactory(config)

        systems, uncomplete_sys = search_systems(config, model_bank, gene_bank,
                                                 profile_factory, logger)
        expected_sys_id = [
            'Unordered_T2SS_4', 'Unordered_MSH_3', 'Unordered_T4P_5',
            'Unordered_T4bP_6'
        ]
        self.assertListEqual([s.id for s in systems], expected_sys_id)

        expected_uncomplete_sys_id = [
            'Unordered_Archaeal-T4P_1', 'Unordered_ComM_2', 'Unordered_Tad_7'
        ]
        self.assertListEqual([s.id for s in uncomplete_sys],
                             expected_uncomplete_sys_id)

    def test_search_systems_model_unknown(self):
        logger = logging.getLogger('macsypy.macsyfinder')
        macsypy.logger_set_level(level='ERROR')
        defaults = MacsyDefaults()

        out_dir = os.path.join(self.tmp_dir, 'macsyfinder_test_search_systems')
        os.mkdir(out_dir)
        seq_db = self.find_data('base', 'test_1.fasta')
        model_dir = self.find_data('data_set', 'models')
        args = f"--sequence-db {seq_db} --db-type=gembase --models-dir {model_dir} --models nimporaoik -w 4 -o {out_dir}"

        _, parsed_args = parse_args(args.split())
        config = Config(defaults, parsed_args)
        model_bank = ModelBank()
        gene_bank = GeneBank()
        profile_factory = ProfileFactory(config)

        exit_ori = sys.exit
        sys.exit = self.fake_exit
        try:
            with self.assertRaises(TypeError) as ctx:
                _ = search_systems(config, model_bank, gene_bank,
                                   profile_factory, logger)
            self.assertEqual(
                str(ctx.exception),
                "macsyfinder: \"No such model definition: 'nimporaoik'\"")
        finally:
            sys.exit = exit_ori

Example #9

Show file

File: test_search_genes.py Project: karubiotools/macsyfinder

class TestSearchGenes(MacsyTest):
    def setUp(self):
        self.tmp_dir = os.path.join(tempfile.gettempdir(),
                                    'test_macsyfinder_search_genes')
        if os.path.exists(self.tmp_dir):
            shutil.rmtree(self.tmp_dir)
        os.mkdir(self.tmp_dir)

        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_base.fa")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.log_level = 30
        args.out_dir = os.path.join(self.tmp_dir, 'job_1')
        args.res_search_dir = args.out_dir
        os.mkdir(args.out_dir)

        self.cfg = Config(MacsyDefaults(), args)

        self.model_name = 'foo'
        self.model_location = ModelLocation(
            path=os.path.join(args.models_dir, self.model_name))

        idx = Indexes(self.cfg)
        idx._build_my_indexes()
        self.profile_factory = ProfileFactory(self.cfg)

    def tearDown(self):
        try:
            shutil.rmtree(self.cfg.working_dir())
            #pass
        except:
            pass

    @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH')
    def test_search(self):
        gene_name = "abc"
        c_gene_abc = CoreGene(self.model_location, gene_name,
                              self.profile_factory)
        report = search_genes([c_gene_abc], self.cfg)
        expected_hit = [
            Hit(c_gene_abc, "ESCO030p01_000260", 706, "ESCO030p01", 26,
                float(1.000e-200), float(660.800), float(1.000), float(0.714),
                160, 663)
        ]
        self.assertEqual(len(report), 1)
        self.assertEqual(expected_hit[0], report[0].hits[0])

    @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH')
    def test_search_recover(self):
        # first job searching using hmmsearch
        gene_name = "abc"
        c_gene_abc = CoreGene(self.model_location, gene_name,
                              self.profile_factory)
        report = search_genes([c_gene_abc], self.cfg)
        expected_hit = [
            Hit(c_gene_abc, "ESCO030p01_000260", 706, "ESCO030p01", 26,
                float(1.000e-200), float(660.800), float(1.000), float(0.714),
                160, 663)
        ]

        # second job using recover
        # disable hmmer to be sure that test use the recover inner function
        self.cfg.hmmer = lambda: "hmmer_disable"
        # and create a new dir for the second job
        previous_job_path = self.cfg.working_dir()
        self.cfg.previous_run = lambda: previous_job_path
        self.cfg.out_dir = lambda: os.path.join(self.tmp_dir, 'job_2')
        os.mkdir(self.cfg.out_dir())

        # rerun with previous run
        # but we have to reset the profile attached to the gene gene._profile._report
        self.profile_factory = ProfileFactory(self.cfg)
        c_gene_abc = CoreGene(self.model_location, gene_name,
                              self.profile_factory)
        report = search_genes([c_gene_abc], self.cfg)
        self.assertEqual(len(report), 1)
        self.assertEqual(expected_hit[0], report[0].hits[0])

Example #10

Show file

class TestProfile(MacsyTest):

    def setUp(self):
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.res_search_dir = tempfile.gettempdir()
        args.log_level = 0
        self.cfg = Config(MacsyDefaults(), args)

        if os.path.exists(self.cfg.working_dir()):
            shutil(self.cfg.working_dir())
        os.makedirs(self.cfg.working_dir())

        self.model_name = 'foo'
        self.model_location = ModelLocation(path=os.path.join(args.models_dir, self.model_name))
        self.profile_factory = ProfileFactory(self.cfg)


    def tearDown(self):
        try:
            shutil.rmtree(self.cfg.working_dir())
        except:
            pass


    def test_len(self):
        model = Model("foo/T2SS", 10)

        gene_name = 'abc'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        path = self.model_location.get_profile("abc")
        profile = Profile(gene, self.cfg, path)
        self.assertEqual(len(profile), 501)


    def test_ga_threshold(self):
        model = Model("foo/T2SS", 10)
        gene_name = 'abc'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        path = self.model_location.get_profile("abc")
        profile = Profile(gene, self.cfg, path)
        self.assertFalse(profile.ga_threshold)

        gene_name = 'T5aSS_PF03797'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        path = self.model_location.get_profile("T5aSS_PF03797")
        profile = Profile(gene, self.cfg, path)
        self.assertTrue(profile.ga_threshold)

    def test_str(self):
        model = Model("foo/T2SS", 10)

        gene_name = 'abc'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        path = self.model_location.get_profile("abc")
        profile = Profile(gene, self.cfg, path)
        s = "{0} : {1}".format(gene.name, path)
        self.assertEqual(str(profile), s)


    @unittest.skipIf(not which('hmmsearch'), 'hmmsearch not found in PATH')
    def test_execute(self):
        for db_type in ("gembase", "ordered_replicon", "unordered"):
            self.cfg._set_db_type(db_type)
            model = Model("foo/T2SS", 10)

            gene_name = 'T5aSS_PF03797'
            c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
            gene = ModelGene(c_gene, model)

            # case GA threshold in profile
            profile_path = self.model_location.get_profile("T5aSS_PF03797")
            profile = Profile(gene, self.cfg, profile_path)
            report = profile.execute()
            hmmer_raw_out = profile.hmm_raw_output
            with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
                first_l = hmmer_raw_out_file.readline()
                # a hmmsearch output file has been produced
                self.assertTrue(first_l.startswith("# hmmsearch :: search profile(s) against a sequence database"))
                for i in range(5):
                    # skip 4 lines
                    l = hmmer_raw_out_file.readline()
                # a hmmsearch used the abc profile line should become with: "# query HMM file: {the path tp hmm profile used}"
                self.assertTrue(l.find(profile_path) != -1)
                for i in range(3):
                    # skip 2 lines
                    l = hmmer_raw_out_file.readline()
                self.assertEqual("# model-specific thresholding:     GA cutoffs", l.strip())
            # test if profile is executed only once per run
            report_bis = profile.execute()
            self.assertIs(report, report_bis)

            # case GA threshold in profile but --no-cut-ga is set
            args = argparse.Namespace()
            args.sequence_db = self.find_data("base", "test_1.fasta")
            args.db_type = 'gembase'
            args.models_dir = self.find_data('models')
            args.res_search_dir = tempfile.gettempdir()
            args.log_level = 0
            args.e_value_search = 0.5
            args.no_cut_ga = True
            cfg = Config(MacsyDefaults(), args)

            profile = Profile(gene, cfg, profile_path)
            report = profile.execute()
            hmmer_raw_out = profile.hmm_raw_output
            with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
                for i in range(9):
                    l = hmmer_raw_out_file.readline()
                self.assertEqual("# sequence reporting threshold:    E-value <= 0.5", l.strip())


            # case cut-ga but no GA threshold in hmmprofile
            gene_name = 'abc'
            c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
            gene = ModelGene(c_gene, model)

            # case -cut-ga and GA threshold in profile
            profile_path = self.model_location.get_profile("abc")
            profile = Profile(gene, self.cfg, profile_path)

            with self.catch_log() as log:
                report = profile.execute()

            hmmer_raw_out = profile.hmm_raw_output
            with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
                first_l = hmmer_raw_out_file.readline()
                # a hmmsearch output file has been produced
                self.assertTrue(first_l.startswith("# hmmsearch :: search profile(s) against a sequence database"))
                for i in range(5):
                    # skip 4 lines
                    l = hmmer_raw_out_file.readline()
                # a hmmsearch used the abc profile line should become with: "# query HMM file: {the path tp hmm profile used}"
                self.assertTrue(l.find(profile_path) != -1)
                for i in range(3):
                    # skip 2 lines
                    l = hmmer_raw_out_file.readline()
                self.assertEqual('# sequence reporting threshold:    E-value <= 0.1', l.strip())


    def test_execute_unknown_binary(self):
        self.cfg._options['hmmer'] = "Nimportnaoik"
        model = Model("foo/T2SS", 10)

        gene_name = 'abc'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)

        path = self.model_location.get_profile("abc", )
        profile = Profile(gene, self.cfg, path)
        with self.catch_log():
            with self.assertRaises(RuntimeError):
                profile.execute()


    def test_execute_hmmer_failed(self):
        fake_hmmer = os.path.join(tempfile.gettempdir(), 'hmmer_failed')
        with open(fake_hmmer, 'w') as hmmer:
            hmmer.write("""#! {}
import sys
sys.exit(127)
""".format(sysconfig.sys.executable))
        try:
            os.chmod(hmmer.name, 0o755)
            self.cfg._options['hmmer'] = hmmer.name
            model = Model("foo/T2SS", 10)

            gene_name = 'abc'
            c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
            gene = ModelGene(c_gene, model)

            path = self.model_location.get_profile("abc", )
            profile = Profile(gene, self.cfg, path)
            with self.catch_log():
                with self.assertRaisesRegex(RuntimeError,
                                            "an error occurred during Hmmer "
                                            "execution: command = .* : return code = 127 .*") as ctx:
                    profile.execute()

        finally:
            try:
                os.unlink(fake_hmmer)
            except Exception:
                pass

Example #11

Show file

        print("#" * 70)

        ############## WORKAROUND ##################
        # integron_finder is a script not a lib
        # to test each function in integron_finder,
        # we need to import integron_finder
        # so we need to transform the script in lib
        # and ad it to the path somewhere a user can write
        # the fisrt element of path is integron_finder.tests

        if not 'INTEGRON_HOME' in os.environ:
            INTEGRON_HOME = os.path.abspath(
                os.path.join(os.path.dirname(__file__), '..'))
            sys.path.append(INTEGRON_HOME)
            from tests import which
            integron_finder_script = which('integron_finder')
            if integron_finder_script is None:
                raise RuntimeError(
                    'Cannot find integron_finder, do you set INTEGRON_HOME')
        else:
            INTEGRON_HOME = os.environ['INTEGRON_HOME']
            integron_finder_script = os.path.join(INTEGRON_HOME,
                                                  'integron_finder')

        fake_lib = os.path.join(INTEGRON_HOME, 'tests', 'fake_lib')
        integron_finder_lib = os.path.join(fake_lib, 'integron_finder.py')
        if not os.path.exists(fake_lib):
            os.mkdir(fake_lib)
        if os.path.exists(integron_finder_lib):
            os.unlink(integron_finder_lib)

Example #12

Show file

    def setUp(self):
        if 'INTEGRON_HOME' in os.environ:
            self.integron_home = os.environ['INTEGRON_HOME']
            self.local_install = True
        else:
            self.local_install = False
            self.integron_home = os.path.normpath(
                os.path.abspath(
                    os.path.join(os.path.dirname(__file__), '..', '..')))

        self.tmp_dir = os.path.join(tempfile.gettempdir(),
                                    'tmp_test_integron_finder')
        os.makedirs(self.tmp_dir)

        integron_finder.PRODIGAL = which('prodigal')
        integron_finder.HMMSEARCH = which('hmmsearch')
        integron_finder.N_CPU = '1'
        integron_finder.MODEL_DIR = os.path.join(self.integron_home, "data",
                                                 "Models")
        integron_finder.MODEL_integrase = os.path.join(
            integron_finder.MODEL_DIR, "integron_integrase.hmm")
        integron_finder.MODEL_phage_int = os.path.join(
            integron_finder.MODEL_DIR, "phage-int.hmm")
        integron_finder.MODEL_attc = os.path.join(self.integron_home, 'data',
                                                  'Models', 'attc_4.cm')

        integron_finder.circular = True
        integron_finder.out_dir = self.tmp_dir
        integron_finder.CMSEARCH = which('cmsearch')
        integron_finder.evalue_attc = 1.
        integron_finder.max_attc_size = 200
        integron_finder.min_attc_size = 40
        integron_finder.length_cm = 47  # length in 'CLEN' (value for model attc_4.cm)
        integron_finder.DISTANCE_THRESHOLD = 4000  # (4kb at least between 2 different arrays)

        self.columns = [
            'pos_beg', 'pos_end', 'strand', 'evalue', 'type_elt', 'model',
            'distance_2attC', 'annotation'
        ]
        self.dtype = {
            "pos_beg": 'int',
            "pos_end": 'int',
            "strand": 'int',
            "evalue": 'float',
            "type_elt": 'str',
            "annotation": 'str',
            "model": 'str',
            "distance_2attC": 'float'
        }

        self.max_dtype = {
            'Accession_number': 'str',
            'cm_attC': 'str',
            'cm_debut': 'int',
            'cm_fin': 'int',
            'pos_beg': 'int',
            'pos_end': 'int',
            'sens': 'str',
            'evalue': 'float'
        }
        self.max_cols = [
            'Accession_number', 'cm_attC', 'cm_debut', 'cm_fin', 'pos_beg',
            'pos_end', 'sens', 'evalue'
        ]