def test_MacsyDefaults_virtual_env(self):
        virtual_env = os.environ.get('VIRTUAL_ENV')

        with tempfile.TemporaryDirectory() as fake_virtual_env:
            os.environ['VIRTUAL_ENV'] = fake_virtual_env
            system_models_dir = os.path.join(fake_virtual_env, 'share',
                                             'macsyfinder', 'models')
            os.makedirs(system_models_dir)
            self.defaults['system_models_dir'] = [
                path for path in (system_models_dir,
                                  os.path.join(os.path.expanduser('~'),
                                               '.macsyfinder', 'models'))
                if os.path.exists(path)
            ]
            try:
                defaults = MacsyDefaults()
                self.maxDiff = None
                self.assertDictEqual(defaults, self.defaults)

                new_defaults = {k: v for k, v in self.defaults.items()}
                new_defaults['previous_run'] = True
                new_defaults['worker'] = 5
                defaults = MacsyDefaults(previous_run=True, worker=5)
                self.assertDictEqual(defaults, new_defaults)
            finally:
                if virtual_env:
                    os.environ['VIRTUAL_ENV'] = virtual_env
    def test_MacsyDefaults_no_virtual_env(self):

        virtual_env = os.environ.get('VIRTUAL_ENV')
        common_path = os.path.join('share', 'macsyfinder')
        prefixes = ('/', os.path.join('/', 'usr', 'local'))
        system_models_dir = [
            os.path.join(root, common_path) for root in prefixes
        ]
        system_models_dir.append(
            os.path.join(os.path.expanduser('~'), '.macsyfinder', 'models'))
        self.defaults['system_models_dir'] = [
            path for path in system_models_dir if os.path.exists(path)
        ]

        del os.environ['VIRTUAL_ENV']
        try:
            defaults = MacsyDefaults()
            self.maxDiff = None
            self.assertDictEqual(defaults, self.defaults)

            new_defaults = {k: v for k, v in self.defaults.items()}
            new_defaults['previous_run'] = True
            new_defaults['worker'] = 5
            defaults = MacsyDefaults(previous_run=True, worker=5)
            self.assertDictEqual(defaults, new_defaults)
        finally:
            if virtual_env:
                os.environ['VIRTUAL_ENV'] = virtual_env
    def test_MacsyDefaults(self):
        defaults = MacsyDefaults()
        self.assertDictEqual(defaults, self.defaults)

        new_defaults = {k: v for k, v in self.defaults.items()}
        new_defaults['previous_run'] = True
        new_defaults['worker'] = 5
        defaults = MacsyDefaults(previous_run=True, worker=5)
        self.assertDictEqual(defaults, new_defaults)
Example #4
0
 def setUp(self):
     self._current_dir = os.getcwd()
     self.tmp_dir = os.path.join(tempfile.gettempdir(),
                                 'test_macsyfinder_Config')
     if os.path.exists(self.tmp_dir):
         shutil.rmtree(self.tmp_dir)
     os.mkdir(self.tmp_dir)
     self.defaults = MacsyDefaults()
     self.parsed_args = Namespace()
    def setUp(self):
        self.tmp_dir = os.path.join(tempfile.gettempdir(),
                                    'test_macsyfinder_search_genes')
        if os.path.exists(self.tmp_dir):
            shutil.rmtree(self.tmp_dir)
        os.mkdir(self.tmp_dir)

        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_base.fa")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.log_level = 30
        args.out_dir = os.path.join(self.tmp_dir, 'job_1')
        args.res_search_dir = args.out_dir
        os.mkdir(args.out_dir)

        self.cfg = Config(MacsyDefaults(), args)

        self.model_name = 'foo'
        self.model_location = ModelLocation(
            path=os.path.join(args.models_dir, self.model_name))

        idx = Indexes(self.cfg)
        idx._build_my_indexes()
        self.profile_factory = ProfileFactory(self.cfg)
Example #6
0
    def test_build_my_indexes(self):
        args = argparse.Namespace()
        args.db_type = 'gembase'

        args.out_dir = os.path.join(tempfile.gettempdir(),
                                    'test_macsyfinder_indexes')
        if os.path.exists(args.out_dir):
            shutil.rmtree(
                os.path.join(tempfile.gettempdir(),
                             'test_macsyfinder_indexes'))
        os.makedirs(args.out_dir)
        seq_db = self.find_data("base", "test_base_with_errors.fa")
        shutil.copy(seq_db, args.out_dir)
        args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db))
        self.cfg = Config(MacsyDefaults(), args)

        idx = Indexes(self.cfg)
        with self.assertRaises(MacsypyError) as e:
            # the directory for index exist and is writable but
            # the sequence file is corrupted and cannot be read correctly
            with self.catch_log():
                idx._build_my_indexes(args.out_dir)
        self.assertTrue(
            str(e.exception).startswith(
                "unable to index the sequence dataset:"))
Example #7
0
    def test_set_base_options(self):
        msf_cfg.theme = msf_cfg.Theme()
        cp = msf_cfg.ConfigParserWithComments()
        ask_ori = msf_cfg.ask
        defaults = MacsyDefaults()
        resp = [
            "Yes",  # enter section ?
            "ordered",  # db_type
            "linear",  # replicon_topology
            "None",  # sequence_db
        ]
        g = (r for r in resp)

        def fake_ask(*args, **kwargs):
            return next(g)

        try:
            msf_cfg.ask = fake_ask
            with self.catch_io(out=True):
                msf_cfg.set_base_options(cp, defaults)
                stdout = sys.stdout.getvalue().strip()
            self.assertEqual(
                stdout,
                f"{msf_cfg.theme.SECTION}Configuring base options:{msf_cfg.theme.RESET}"
            )
            self.assertTrue(cp.has_section("base"))
            self.assertEqual(cp.get("base", "db_type"), "ordered")
            self.assertEqual(cp.get("base", "replicon_topology"), "linear")
            self.assertEqual(cp.get("base", "sequence_db"), "None")
        finally:
            msf_cfg.ask = ask_ori
Example #8
0
    def test_set_general_options(self):
        msf_cfg.theme = msf_cfg.Theme()
        cp = msf_cfg.ConfigParserWithComments()
        ask_ori = msf_cfg.ask
        defaults = MacsyDefaults()
        resp = [
            "Yes",  # enter section ?
            'warning',  # log_level
            0,  # worker
            True  # mute
        ]
        g = (r for r in resp)

        def fake_ask(*args, **kwargs):
            return next(g)

        try:
            msf_cfg.ask = fake_ask
            with self.catch_io(out=True):
                msf_cfg.set_general_options(cp, defaults)
                stdout = sys.stdout.getvalue().strip()
            self.assertEqual(
                stdout,
                f"{msf_cfg.theme.SECTION}Configuring general options:{msf_cfg.theme.RESET}"
            )
            self.assertTrue(cp.has_section("general"))
            self.assertEqual(cp.get("general", "log_level"), "warning")
            self.assertEqual(cp.get("general", "worker"), "0")
            self.assertEqual(cp.get("general", "mute"), "True")
        finally:
            msf_cfg.ask = ask_ori
Example #9
0
    def test_set_section_use_defaults(self):
        msf_cfg.theme = msf_cfg.Theme()
        cp = msf_cfg.ConfigParserWithComments()
        sec_name = "new_section"
        options = {
            'hmmer': {
                "question": "that is the question",
                "validator": msf_cfg.check_str,
                "default": "Yes",
                "explanation": ""
            }
        }
        defaults = MacsyDefaults(hmmer="Yes")

        with self.catch_io(out=True):
            msf_cfg.set_section(sec_name,
                                options,
                                cp,
                                defaults,
                                use_defaults=True)
            stdout = sys.stdout.getvalue().strip()
        self.assertTrue(cp.has_section(sec_name))
        self.assertFalse(cp.has_option(sec_name, 'hmmer'))

        self.assertEqual(
            stdout,
            f"{msf_cfg.theme.SECTION}Configuring new_section options:{msf_cfg.theme.RESET}"
        )
 def test_fill_gembase_min_max_with_topology(self):
     self.args.topology_file = self.args.sequence_db + ".topo"
     with open(self.args.topology_file, 'w') as f:
         f.write(
             '# topology file\nESCO030p01 : circular\nPSAE001c01 : linear\n'
         )
     cfg = Config(MacsyDefaults(), self.args)
     RepliconDB.__init__ = self.fake_init
     db = RepliconDB(cfg)
     topo_dict = db._fill_topology()
     db._fill_gembase_min_max(topo_dict, 'circular')
     self.assertEqual(len(db._DB), 3)
     self.assertEqual(set(db._DB.keys()),
                      set(['ESCO030p01', 'PSAE001c01', 'NC_xxxxx_xx']))
     ESCO030p01 = db['ESCO030p01']
     self.assertEqual(ESCO030p01.topology, 'circular')
     self.assertEqual(ESCO030p01.min, 1)
     self.assertEqual(ESCO030p01.max, 67)
     self.assertEqual(ESCO030p01.genes, self.ESCO030p01_genes)
     PSAE001c01 = db['PSAE001c01']
     self.assertEqual(PSAE001c01.topology, 'linear')
     self.assertEqual(PSAE001c01.min, 68)
     self.assertEqual(PSAE001c01.max, 133)
     self.assertEqual(PSAE001c01.genes, self.PSAE001c01_genes)
     DBNC = db['NC_xxxxx_xx']
     self.assertEqual(DBNC.topology, 'circular')
     self.assertEqual(DBNC.min, 134)
     self.assertEqual(DBNC.max, 141)
     self.assertEqual(DBNC.genes, self.NCDB_genes)
Example #11
0
    def test_get_def_to_detect(self):
        cmd_args = argparse.Namespace()
        cmd_args.models_dir = os.path.join(self._data_dir, 'fake_model_dir')
        cmd_args.models = [('set_1', 'def_1_1', 'def_1_2', 'def_1_3')]
        config = Config(
            MacsyDefaults(
                models_dir=os.path.join(self._data_dir, 'fake_model_dir')),
            cmd_args)
        registry = ModelRegistry()
        models_location = scan_models_dir(cmd_args.models_dir)
        for ml in models_location:
            registry.add(ml)

        # case where models are specified on command line
        res = get_def_to_detect([('set_1', ['def_1_1', 'def_1_2', 'def_1_3'])],
                                registry)
        model_loc = registry['set_1']
        exp = [
            model_loc.get_definition(name)
            for name in ('set_1/def_1_1', 'set_1/def_1_2', 'set_1/def_1_3')
        ]
        self.assertListEqual(res, exp)

        # case we search all models
        res = get_def_to_detect([('set_1', ['all'])], registry)
        exp = model_loc.get_all_definitions()
        self.assertListEqual(res, exp)

        # case the models required does not exists
        with self.assertRaises(ValueError):
            get_def_to_detect([('set_1', ['FOO', 'BAR'])], registry)
    def test_build_my_db(self):
        gene_name = "gspD"
        args = argparse.Namespace()
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.log_level = 30
        self.cfg = Config(MacsyDefaults(), args)
        gspD_hmmer_path = self.find_data(
            os.path.join('hmm', 'gspD.search_hmm.out'))

        hmm_prof = macsyprofile.HmmProfile(gene_name, 596, gspD_hmmer_path,
                                           self.cfg)

        db = hmm_prof._build_my_db(gspD_hmmer_path)
        self.assertDictEqual(
            db, {
                'PSAE001c01_031420': None,
                'PSAE001c01_051090': None,
                'PSAE001c01_018920': None,
                'PSAE001c01_043580': None,
                'PSAE001c01_017350': None,
                'PSAE001c01_013980': None,
                'PSAE001c01_026600': None,
                'NC_xxxxx_xx_056141': None,
                'PSAE001c01_006940': None
            })
    def test_min_genes_required_cfg(self):
        # test min_genes_required is specified from configuration
        # so this value must overload the value read from xml
        def_2_parse = set()
        model_fqn = 'foo/model_5'
        def_2_parse.add(model_fqn)
        parsed = set()

        min_genes_required = [[model_fqn, '4']]
        self.args.min_genes_required = min_genes_required

        self.cfg = Config(MacsyDefaults(), self.args)
        self.model_bank = ModelBank()
        self.gene_bank = GeneBank()
        self.model_registry = ModelRegistry()
        models_location = scan_models_dir(self.args.models_dir)
        for ml in models_location:
            self.model_registry.add(ml)
        self.parser = DefinitionParser(self.cfg, self.model_bank,
                                       self.gene_bank, self.model_registry,
                                       self.profile_factory)

        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        self.parser.parse(models_2_detect)
        m = self.model_bank[model_fqn]
        self.assertEqual(m.min_genes_required, 4)
Example #14
0
    def test_execute_hmm_w_GA_n_nocutga(self):
        # case GA threshold in profile but --no-cut-ga is set
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.res_search_dir = tempfile.gettempdir()
        args.log_level = 0
        args.e_value_search = 0.5
        args.no_cut_ga = True
        cfg = Config(MacsyDefaults(), args)

        model = Model("foo/T2SS", 10)
        gene_name = 'T5aSS_PF03797'
        c_gene = CoreGene(self.model_location, gene_name, self.profile_factory)
        gene = ModelGene(c_gene, model)
        profile_path = self.model_location.get_profile("T5aSS_PF03797")
        profile = Profile(gene, cfg, profile_path)
        report = profile.execute()
        hmmer_raw_out = profile.hmm_raw_output
        with open(hmmer_raw_out, 'r') as hmmer_raw_out_file:
            for i in range(9):
                l = hmmer_raw_out_file.readline()
            self.assertEqual(
                "# sequence reporting threshold:    E-value <= 0.5", l.strip())
    def test_fill_my_db(self):
        gene_name = "gspD"
        args = argparse.Namespace()
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.log_level = 30
        args.sequence_db = self.find_data("base", "test_base.fa")
        args.index_dir = self.tmpdir
        cfg = Config(MacsyDefaults(), args)
        gspD_hmmer_path = self.find_data('hmm', 'gspD.search_hmm.out')

        idx = Indexes(cfg)
        macsyfinder_idx = idx.build()
        hmm_prof = macsyprofile.HmmProfile(gene_name, 596, gspD_hmmer_path,
                                           cfg)

        db = hmm_prof._build_my_db(gspD_hmmer_path)
        hmm_prof._fill_my_db(macsyfinder_idx, db)
        self.assertDictEqual(
            db, {
                'PSAE001c01_031420': (658, 73),
                'PSAE001c01_051090': (714, 75),
                'PSAE001c01_018920': (776, 71),
                'PSAE001c01_043580': (416, 74),
                'PSAE001c01_017350': (600, 70),
                'PSAE001c01_013980': (759, 69),
                'PSAE001c01_026600': (273, 72),
                'NC_xxxxx_xx_056141': (803, 141),
                'PSAE001c01_006940': (803, 68)
            })
Example #16
0
    def test_search_systems_unordered(self):
        logger = logging.getLogger('macsypy.macsyfinder')
        macsypy.logger_set_level(level='ERROR')
        defaults = MacsyDefaults()

        out_dir = os.path.join(self.tmp_dir, 'macsyfinder_test_search_systems')
        os.mkdir(out_dir)
        seq_db = self.find_data('base', 'VICH001.B.00001.C001.prt')
        model_dir = self.find_data('data_set', 'models')
        # test unordered replicon
        args = f"--sequence-db {seq_db} --db-type=unordered --models-dir {model_dir} --models set_1 all -w 4 -o {out_dir}"

        _, parsed_args = parse_args(args.split())
        config = Config(defaults, parsed_args)
        model_bank = ModelBank()
        gene_bank = GeneBank()
        profile_factory = ProfileFactory(config)

        systems, uncomplete_sys = search_systems(config, model_bank, gene_bank,
                                                 profile_factory, logger)
        expected_sys_id = [
            'Unordered_T2SS_4', 'Unordered_MSH_3', 'Unordered_T4P_5',
            'Unordered_T4bP_6'
        ]
        self.assertListEqual([s.id for s in systems], expected_sys_id)

        expected_uncomplete_sys_id = [
            'Unordered_Archaeal-T4P_1', 'Unordered_ComM_2', 'Unordered_Tad_7'
        ]
        self.assertListEqual([s.id for s in uncomplete_sys],
                             expected_uncomplete_sys_id)
Example #17
0
    def test_search_systems_model_unknown(self):
        logger = logging.getLogger('macsypy.macsyfinder')
        macsypy.logger_set_level(level='ERROR')
        defaults = MacsyDefaults()

        out_dir = os.path.join(self.tmp_dir, 'macsyfinder_test_search_systems')
        os.mkdir(out_dir)
        seq_db = self.find_data('base', 'test_1.fasta')
        model_dir = self.find_data('data_set', 'models')
        args = f"--sequence-db {seq_db} --db-type=gembase --models-dir {model_dir} --models nimporaoik -w 4 -o {out_dir}"

        _, parsed_args = parse_args(args.split())
        config = Config(defaults, parsed_args)
        model_bank = ModelBank()
        gene_bank = GeneBank()
        profile_factory = ProfileFactory(config)

        exit_ori = sys.exit
        sys.exit = self.fake_exit
        try:
            with self.assertRaises(TypeError) as ctx:
                _ = search_systems(config, model_bank, gene_bank,
                                   profile_factory, logger)
            self.assertEqual(
                str(ctx.exception),
                "macsyfinder: \"No such model definition: 'nimporaoik'\"")
        finally:
            sys.exit = exit_ori
Example #18
0
    def setUp(self) -> None:
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        cfg = Config(MacsyDefaults(), args)

        model_name = 'foo'
        models_location = ModelLocation(path=os.path.join(args.models_dir, model_name))

        model = Model("foo/T2SS", 10)
        profile_factory = ProfileFactory(cfg)

        gene_name = "gspD"
        self.cg_gspd = CoreGene(models_location, gene_name, profile_factory)
        self.mg_gspd = ModelGene(self.cg_gspd, model, loner=True, multi_system=True)

        gene_name = "sctJ"
        self.cg_sctj = CoreGene(models_location, gene_name, profile_factory)
        self.mg_sctj = ModelGene(self.cg_sctj, model)

        model.add_mandatory_gene(self.mg_gspd)
        model.add_accessory_gene(self.mg_sctj)

        self.chit_1 = CoreHit(self.cg_gspd, "hit_1", 803, "replicon_id", 2, 1.0, 1.0, 1.0, 1.0, 10, 20)
        self.chit_2 = CoreHit(self.cg_sctj, "hit_2", 803, "replicon_id", 3, 1.0, 1.0, 1.0, 1.0, 10, 20)
        self.chit_3 = CoreHit(self.cg_gspd, "hit_3", 803, "replicon_id", 10, 1.0, 1.0, 1.0, 1.0, 10, 20)
        self.chit_4 = CoreHit(self.cg_gspd, "hit_4", 803, "replicon_id", 20, 1.0, 1.0, 1.0, 1.0, 10, 20)
        self.mhit_1 = ModelHit(self.chit_1, self.mg_gspd, GeneStatus.MANDATORY)
        self.mhit_2 = ModelHit(self.chit_2, self.mg_sctj, GeneStatus.ACCESSORY)
        self.mhit_3 = ModelHit(self.chit_3, self.mg_gspd, GeneStatus.MANDATORY)
        self.mhit_4 = ModelHit(self.chit_4, self.mg_gspd, GeneStatus.MANDATORY)
Example #19
0
    def setUp(self):
        args = argparse.Namespace()
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.res_search_dir = tempfile.gettempdir()
        args.log_level = 30
        args.out_dir = os.path.join(args.res_search_dir,
                                    'test_macsyfinder_Report')
        if os.path.exists(args.out_dir):
            shutil.rmtree(args.out_dir)
        os.mkdir(args.out_dir)

        seq_db = self.find_data("base", "test_base.fa")
        shutil.copy(seq_db, args.out_dir)
        args.sequence_db = os.path.join(args.out_dir, os.path.basename(seq_db))
        self.cfg = Config(MacsyDefaults(), args)

        os.mkdir(os.path.join(self.cfg.out_dir(), self.cfg.hmmer_dir()))

        self.model_name = 'foo'
        self.model_location = ModelLocation(
            path=os.path.join(args.models_dir, self.model_name))

        # we need to reset the ProfileFactory
        # because it's a like a singleton
        # so other tests are influenced by ProfileFactory and it's configuration
        # for instance search_genes get profile without hmmer_exe
        self.profile_factory = ProfileFactory(self.cfg)

        idx = Indexes(self.cfg)
        idx.build()
Example #20
0
    def setUp(self) -> None:
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        cfg = Config(MacsyDefaults(), args)

        model_name = 'foo'
        models_location = ModelLocation(path=os.path.join(args.models_dir, model_name))

        model = Model("foo/T2SS", 10)
        profile_factory = ProfileFactory(cfg)

        gene_name = "gspD"
        self.c_gene_gspd = CoreGene(models_location, gene_name, profile_factory)
        self.gene_gspd = ModelGene(self.c_gene_gspd, model)

        gene_name = "sctJ"
        self.c_gene_sctj = CoreGene(models_location, gene_name, profile_factory)
        self.gene_sctj = ModelGene(self.c_gene_sctj, model)

        model.add_mandatory_gene(self.gene_gspd)
        model.add_accessory_gene(self.gene_sctj)

        self.hit_1 = Hit(self.c_gene_gspd, "hit_1", 803, "replicon_id", 2, 1.0, 1.0, 1.0, 1.0, 10, 20)
        self.hit_2 = Hit(self.c_gene_sctj, "hit_2", 803, "replicon_id", 3, 1.0, 1.0, 1.0, 1.0, 10, 20)
    def test_inter_gene_max_space_cfg(self):
        # test inter_gene_max_space is specified from configuration
        # so this value must overload the value read from xml
        model_fqn = 'foo/model_5'

        inter_gene_max_space_cfg = [[model_fqn, '222']]
        self.args.inter_gene_max_space = inter_gene_max_space_cfg

        self.cfg = Config(MacsyDefaults(), self.args)
        self.model_bank = ModelBank()
        self.gene_bank = GeneBank()
        self.model_registry = ModelRegistry()
        models_location = scan_models_dir(self.args.models_dir)
        for ml in models_location:
            self.model_registry.add(ml)
        self.parser = DefinitionParser(self.cfg, self.model_bank,
                                       self.gene_bank, self.model_registry,
                                       self.profile_factory)

        models_2_detect = [
            self.model_registry['foo'].get_definition(model_fqn)
        ]
        self.parser.parse(models_2_detect)
        m = self.model_bank[model_fqn]
        self.assertEqual(m.inter_gene_max_space, 222)
Example #22
0
def main(args=None) -> None:
    """
    The main entrypoint of the script

    :param args:
    """
    args = sys.argv[1:] if args is None else args
    parsed_args = parse_args(args)

    col_init()
    global theme
    if parsed_args.no_color:
        theme = Theme(ERROR=Style.BRIGHT,
                      WARN='',
                      SECTION='',
                      RESET=Style.RESET_ALL,
                      RETRY='',
                      QUESTION='',
                      EMPHASIZE='',
                      EXPLANATION='',
                      DEFAULT='')

    elif parsed_args.white_bg:
        theme = Theme(ERROR=Style.BRIGHT + Fore.RED,
                      WARN=Fore.LIGHTRED_EX,
                      RESET=Style.RESET_ALL,
                      RETRY=Style.BRIGHT + Fore.MAGENTA,
                      QUESTION=Style.BRIGHT,
                      EMPHASIZE=Style.BRIGHT,
                      EXPLANATION=Style.RESET_ALL,
                      DEFAULT=Style.BRIGHT + Fore.LIGHTBLACK_EX)

    else:
        # parsed_args.dark_bg is always True
        # add in options only for coherence
        # and is the default
        pass

    config = ConfigParserWithComments()
    defaults = MacsyDefaults()
    conf_path = 'macsyfinder.conf'

    if os.path.exists(conf_path):
        go_on = ask(f"The '{conf_path}' already exists Overwrite/abort",
                    check_choice,
                    expected=["O", "a"],
                    default="O",
                    question_color=theme.QUESTION)
        if go_on == "a":
            sys.exit(1)
    print(prolog())
    set_path_options(config, defaults, use_defaults=parsed_args.defaults)
    set_hmmer_options(config, defaults, use_defaults=parsed_args.defaults)
    set_score_options(config, defaults, use_defaults=parsed_args.defaults)
    set_general_options(config, defaults, use_defaults=parsed_args.defaults)
    set_base_options(config, defaults, use_defaults=parsed_args.defaults)
    serialize(config, conf_path)
    print(epilog(conf_path))
 def setUp(self):
     args = argparse.Namespace()
     args.sequence_db = self.find_data("base", "test_1.fasta")
     args.db_type = 'gembase'
     args.models_dir = self.find_data('models')
     args.res_search_dir = tempfile.gettempdir()
     args.log_level = 30
     self.cfg = Config(MacsyDefaults(), args)
     self.system_bank = ModelBank()
 def test_MacsyDefaults_with_MACSY_DATA(self):
     import macsypy.config
     macsydata = macsypy.config.__MACSY_DATA__
     macsypy.config.__MACSY_DATA__ = 'niportnaoik'
     self.defaults['models_dir'] = 'niportnaoik/data/models'
     try:
         defaults = MacsyDefaults()
         self.assertDictEqual(defaults, self.defaults)
     finally:
         macsypy.config.__MACSY_DATA__ = macsydata
Example #25
0
    def setUp(self) -> None:
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        cfg = Config(MacsyDefaults(), args)

        model_name = 'foo'
        self.models_location = ModelLocation(path=os.path.join(args.models_dir, model_name))

        # we need to reset the ProfileFactory
        # because it's a like a singleton
        # so other tests are influenced by ProfileFactory and it's configuration
        # for instance search_genes get profile without hmmer_exe
        profile_factory = ProfileFactory(cfg)

        model = Model(model_name, 10)
        self.profile_factory = ProfileFactory(cfg)

        gene_name = "gspD"
        c_gene_gspd = CoreGene(self.models_location, gene_name, self.profile_factory)
        gene_gspd = ModelGene(c_gene_gspd, model, multi_system=True)

        gene_name = "sctJ"
        c_gene_sctj = CoreGene(self.models_location, gene_name, self.profile_factory)
        gene_sctj = ModelGene(c_gene_sctj, model, multi_system=True)

        gene_name = "sctN"
        c_gene_sctn = CoreGene(self.models_location, gene_name, self.profile_factory)
        gene_sctn = Exchangeable(c_gene_sctn, gene_sctj)
        gene_sctj.add_exchangeable(gene_sctn)

        model.add_mandatory_gene(gene_gspd)
        model.add_accessory_gene(gene_sctj)

        #        CoreHit(gene, hit_id, hit_seq_length, replicon_name, position, i_eval, score,
        #                       profile_coverage, sequence_coverage, begin_match, end_match
        #                                                        pos      score
        chit_1 = CoreHit(c_gene_gspd, "hit_1", 803, "replicon_id", 2, 1.0, 1.0, 1.0, 1.0, 10, 20)
        chit_2 = CoreHit(c_gene_sctj, "hit_2", 803, "replicon_id", 3, 1.0, 1.0, 1.0, 1.0, 10, 20)
        chit_3 = CoreHit(c_gene_gspd, "hit_3", 803, "replicon_id", 10, 1.0, 3.0, 1.0, 1.0, 10, 20)
        chit_4 = CoreHit(c_gene_sctn, "hit_4", 803, "replicon_id", 14, 1.0, 4.0, 1.0, 1.0, 10, 20)
        chit_5 = CoreHit(c_gene_gspd, "hit_5", 803, "replicon_id", 20, 1.0, 2.0, 1.0, 1.0, 10, 20)

        self.mhit_1 = ModelHit(chit_1, gene_gspd, GeneStatus.MANDATORY)
        self.mhit_2 = ModelHit(chit_2, gene_sctj, GeneStatus.ACCESSORY)
        self.mhit_3 = ModelHit(chit_3, gene_gspd, GeneStatus.MANDATORY)
        self.mhit_4 = ModelHit(chit_4, gene_sctn, GeneStatus.ACCESSORY)
        self.mhit_5 = ModelHit(chit_5, gene_gspd, GeneStatus.MANDATORY)

        self.ms_1 = MultiSystem(chit_1, gene_ref=gene_gspd, gene_status=GeneStatus.MANDATORY)
        self.ms_2 = MultiSystem(chit_2, gene_ref=gene_sctj, gene_status=GeneStatus.ACCESSORY)
        self.ms_3 = MultiSystem(chit_3, gene_ref=gene_gspd, gene_status=GeneStatus.MANDATORY)
        self.ms_4 = MultiSystem(chit_4, gene_ref=gene_sctn, gene_status=GeneStatus.ACCESSORY)
        self.ms_5 = MultiSystem(chit_5, gene_ref=gene_gspd, gene_status=GeneStatus.MANDATORY)
    def setUp(self) -> None:
        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_1.fasta")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        self.cfg = Config(MacsyDefaults(), args)

        self.model_name = 'foo'
        self.model_location = ModelLocation(
            path=os.path.join(args.models_dir, self.model_name))
        self.profile_factory = ProfileFactory(self.cfg)
def main(args=None, log_level=None) -> None:
    """
    main entry point to macsy_merge_results

    :param args: the arguments passed on the command line
    :type args: list of str
    :param log_level: the output verbosity
    :type log_level: a positive int or a string among 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'
    """
    global _log

    args = sys.argv[1:] if args is None else args
    parsed_args = parse_args(args)

    outdir_err = None
    if not os.path.exists(parsed_args.out_dir):
        try:
            os.mkdir(parsed_args.out_dir)
        except PermissionError as err:
            outdir_err = f"Cannot create {parsed_args.out_dir} : {err}"
    elif not os.path.isdir(parsed_args.out_dir):
        outdir_err = f"{parsed_args.out_dir} is not a directory"
    elif not os.access(parsed_args.out_dir, os.W_OK):
        outdir_err = f"{parsed_args.out_dir} is not writable"
    if outdir_err:
        log = colorlog.getLogger('macsypy.merge')
        stdout_handler = colorlog.StreamHandler(sys.stdout)
        stdout_formatter = colorlog.ColoredFormatter("%(log_color)s%(message)s",
                                                     datefmt=None,
                                                     reset=True,
                                                     log_colors={'CRITICAL': 'bold_red'},
                                                     secondary_log_colors={},
                                                     style='%'
                                                     )
        stdout_handler.setFormatter(stdout_formatter)
        log.addHandler(stdout_handler)
        log.critical(outdir_err)
        sys.tracebacklimit = 0
        raise IOError() from None

    macsypy.init_logger(log_file=os.path.join(parsed_args.out_dir, 'macsy_merge_results.out'),
                        out=not parsed_args.mute)
    _log = colorlog.getLogger('macsypy.merge')

    if not log_level:
        # logs are specify from args options
        config = MacsyDefaults()
        log_level = max(config.log_level - (10 * parsed_args.verbose) + (10 * parsed_args.quiet), 1)
        macsypy.logger_set_level(log_level)
    else:
        # used by unit tests to mute or unmute logs
        macsypy.logger_set_level(log_level)

    merge_results(parsed_args.results_dirs, out_dir=parsed_args.out_dir)
    def test_fill_topology(self):
        self.args.topology_file = self.args.sequence_db + ".topo"
        db_send = {'ESCO030p01': 'circular', 'PSAE001c01': 'linear'}
        with open(self.args.topology_file, 'w') as f:
            for k, v in list(db_send.items()):
                f.write('{0} : {1}\n'.format(k, v))

        cfg = Config(MacsyDefaults(), self.args)
        RepliconDB.__init__ = self.fake_init
        db = RepliconDB(cfg)
        rcv_topo = db._fill_topology()
        self.assertDictEqual(db_send, rcv_topo)
Example #29
0
    def setUp(self) -> None:
        self.args = argparse.Namespace()
        self.args.sequence_db = self.find_data("base", "test_1.fasta")
        self.args.db_type = 'gembase'
        self.args.models_dir = self.find_data('models')
        self.args.res_search_dir = "blabla"

        self.cfg = Config(MacsyDefaults(), self.args)
        self.model_name = 'foo'
        self.model_location = ModelLocation(path=os.path.join(self.args.models_dir, self.model_name))
        self.profile_factory = ProfileFactory(self.cfg)
        self.hit_weights = HitWeight(**self.cfg.hit_weights())
Example #30
0
 def test_hit_weight_default(self):
     args = argparse.Namespace()
     args.sequence_db = self.find_data("base", "test_1.fasta")
     args.db_type = 'gembase'
     args.models_dir = self.find_data('models')
     cfg = Config(MacsyDefaults(), args)
     hit_weight = HitWeight(**cfg.hit_weights())
     self.assertEqual(hit_weight.mandatory, 1)
     self.assertEqual(hit_weight.accessory, 0.5)
     self.assertEqual(hit_weight.itself, 1)
     self.assertEqual(hit_weight.exchangeable, 0.8)
     self.assertEqual(hit_weight.loner_multi_system, 0.7)