Esempio n. 1
0
def main(args=None) -> None:
    """
    Main entry point.

    :param args: the arguments passed on the command line (before parsing)
    :type args: list
    :rtype: int
    """
    global _log
    args = sys.argv[1:] if args is None else args
    parser = build_arg_parser()
    parsed_args = parser.parse_args(args)

    log_level = verbosity_to_log_level(parsed_args.verbose)
    # set logger for module 'package'
    macsypy.init_logger()
    macsypy.logger_set_level(level=log_level)
    # set logger for this script
    _log = init_logger(verbosity_to_log_level(parsed_args.verbose))

    if 'func' in parsed_args:
        parsed_args.func(parsed_args)
        _log.debug("'{}' command completed successfully.".format(cmd_name(parsed_args)))
    else:
        parser.print_help()
    def setUp(self):
        self.tmp_dir = os.path.join(tempfile.gettempdir(),
                                    'test_macsyfinder_search_genes')
        if os.path.exists(self.tmp_dir):
            shutil.rmtree(self.tmp_dir)
        os.mkdir(self.tmp_dir)

        macsypy.init_logger()
        macsypy.logger_set_level(30)

        args = argparse.Namespace()
        args.sequence_db = self.find_data("base", "test_base.fa")
        args.db_type = 'gembase'
        args.models_dir = self.find_data('models')
        args.log_level = 30
        args.out_dir = os.path.join(self.tmp_dir, 'job_1')
        args.res_search_dir = args.out_dir
        args.no_cut_ga = True
        args.index_dir = os.path.join(self.tmp_dir)
        os.mkdir(args.out_dir)

        self.cfg = Config(MacsyDefaults(), args)

        self.model_name = 'foo'
        self.model_location = ModelLocation(
            path=os.path.join(args.models_dir, self.model_name))

        idx = Indexes(self.cfg)
        idx.build()
        self.profile_factory = ProfileFactory(self.cfg)
Esempio n. 3
0
    def setUp(self) -> None:
        self.tmpdir = os.path.join(tempfile.gettempdir(), 'macsy_test_package')
        if os.path.exists(self.tmpdir) and os.path.isdir(self.tmpdir):
            shutil.rmtree(self.tmpdir)
        os.makedirs(self.tmpdir)

        macsypy.init_logger()
        macsypy.logger_set_level(30)
        logger = colorlog.getLogger('macsypy.package')
        package._log = logger
        logger = colorlog.getLogger('macsypy.model_conf_parser')
        model_conf_parser._log = logger
        self.metadata = {"maintainer": {"name": "auth_name",
                                    "email": "*****@*****.**"},
                         "short_desc": "this is a short description of the repos",
                         "vers": "0.0b2",
                         "cite": ["bla bla",
                                  "link to publication",
                                  """ligne 1
ligne 2
ligne 3 et bbbbb
"""],
                         "doc": "http://link/to/the/documentation",
                         "license": "CC BY-NC-SA 4.0 (https://creativecommons.org/licenses/by-nc-sa/4.0/)",
                         "copyright": "2019, Institut Pasteur, CNRS"
                          }
Esempio n. 4
0
 def test_logger_set_level_bad_level(self):
     macsypy.init_logger()
     with self.assertRaises(ValueError) as ctx:
         macsypy.logger_set_level(level=-1)
     self.assertEqual(
         str(ctx.exception),
         'Level must be NOTSET, DEBUG, INFO, WARNING, ERROR, CRITICAL or a positive integer'
     )
def main(args=None, log_level=None) -> None:
    """
    main entry point to macsy_merge_results

    :param args: the arguments passed on the command line
    :type args: list of str
    :param log_level: the output verbosity
    :type log_level: a positive int or a string among 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'
    """
    global _log

    args = sys.argv[1:] if args is None else args
    parsed_args = parse_args(args)

    outdir_err = None
    if not os.path.exists(parsed_args.out_dir):
        try:
            os.mkdir(parsed_args.out_dir)
        except PermissionError as err:
            outdir_err = f"Cannot create {parsed_args.out_dir} : {err}"
    elif not os.path.isdir(parsed_args.out_dir):
        outdir_err = f"{parsed_args.out_dir} is not a directory"
    elif not os.access(parsed_args.out_dir, os.W_OK):
        outdir_err = f"{parsed_args.out_dir} is not writable"
    if outdir_err:
        log = colorlog.getLogger('macsypy.merge')
        stdout_handler = colorlog.StreamHandler(sys.stdout)
        stdout_formatter = colorlog.ColoredFormatter("%(log_color)s%(message)s",
                                                     datefmt=None,
                                                     reset=True,
                                                     log_colors={'CRITICAL': 'bold_red'},
                                                     secondary_log_colors={},
                                                     style='%'
                                                     )
        stdout_handler.setFormatter(stdout_formatter)
        log.addHandler(stdout_handler)
        log.critical(outdir_err)
        sys.tracebacklimit = 0
        raise IOError() from None

    macsypy.init_logger(log_file=os.path.join(parsed_args.out_dir, 'macsy_merge_results.out'),
                        out=not parsed_args.mute)
    _log = colorlog.getLogger('macsypy.merge')

    if not log_level:
        # logs are specify from args options
        config = MacsyDefaults()
        log_level = max(config.log_level - (10 * parsed_args.verbose) + (10 * parsed_args.quiet), 1)
        macsypy.logger_set_level(log_level)
    else:
        # used by unit tests to mute or unmute logs
        macsypy.logger_set_level(log_level)

    merge_results(parsed_args.results_dirs, out_dir=parsed_args.out_dir)
Esempio n. 6
0
 def test_logger_set_level_handlers(self):
     macsypy.init_logger()
     macsypy.logger_set_level(level='DEBUG')
     logger = colorlog.getLogger('macsypy')
     self.assertEqual(
         logger.handlers[0].formatter.log_colors, {
             'DEBUG': 'cyan',
             'INFO': 'green',
             'WARNING': 'yellow',
             'ERROR': 'red',
             'CRITICAL': 'bold_red'
         })
Esempio n. 7
0
 def setUp(self) -> None:
     # need to do hugly trick with logger
     # because logger are singleton and
     # trigger some side effect with othe unit tests
     # for instance if run the test below after test_macsypy
     # where I tests loggers model_conf_parser
     # is not replaced by the logger set in setup
     # then the catch_log doesn't work anymore
     macsypy.init_logger()
     macsypy.logger_set_level(logging.INFO)
     logger = colorlog.getLogger('macsypy')
     macsypy.model_conf_parser._log = logger
Esempio n. 8
0
    def setUp(self):
        macsypy.init_logger()
        logger = colorlog.getLogger('macsypy.registries')
        registries._log = logger
        self.tmp_dir = tempfile.mkdtemp()
        self.root_models_dir = os.path.join(self.tmp_dir, 'models')
        os.mkdir(self.root_models_dir)

        self.simple_models = {
            'name': 'simple',
            'profiles': ('prof_1.hmm', 'prof_2.hmm'),
            'not_profiles': ('not_a_profile', ),
            'definitions': {
                'def_1.xml': None,
                'def_2.xml': None
            },
            'not_definitions': {
                'not_a_def': None
            },
        }

        self.complex_models = {
            'name': 'complex',
            'profiles': ('prof_1.hmm', 'prof_2.hmm'),
            'not_profiles': ('not_a_profile', ),
            'definitions': {
                'subdef_1': {
                    'def_1_1.xml': None,
                    'def_1_2.xml': None
                },
                'subdef_2': {
                    'def_2_1.xml': None,
                    'def_2_2.xml': None,
                    'sub_subdef': {
                        'def_2_3_1.xml': None,
                        'def_2_3_2.xml': None
                    }
                },
            },
            'not_definitions': {
                'subdef_1': {
                    'not_a_def': None
                },
                'subdef_2': {
                    'not_a_def': None
                }
            },
        }
Esempio n. 9
0
def main(args=None, loglevel=None):
    """
    main entry point to MacSyFinder do some check before to launch :func:`main_search_systems` which is
    the real function that perform a search

    :param args: the arguments passed on the command line without the program name
    :type args: List of string
    :param loglevel: the output verbosity
    :type loglevel: a positive int or a string among 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'
    """
    args = sys.argv[1:] if args is None else args
    parser, parsed_args = parse_args(args)

    defaults = MacsyDefaults()
    config = Config(defaults, parsed_args)

    ###########################
    # creation of working dir
    ###########################
    working_dir = config.working_dir()
    if not os.path.exists(working_dir):
        os.makedirs(working_dir)
    else:
        if os.path.isdir(working_dir):
            if os.listdir(working_dir):
                raise ValueError(
                    f"'{working_dir}' already exists and is not a empty")
        else:
            raise ValueError(
                f"'{working_dir}' already exists and is not a directory")

    ################
    # init loggers #
    ################
    macsypy.init_logger(log_file=os.path.join(config.working_dir(),
                                              config.log_file()),
                        out=not config.mute())
    if not loglevel:
        # logs are specify from args options
        macsypy.logger_set_level(level=config.log_level())
    else:
        # used by unit tests to mute or unmute logs
        macsypy.logger_set_level(level=loglevel)

    logger = logging.getLogger('macsypy.macsyfinder')

    if parsed_args.list_models:
        print(list_models(parsed_args), file=sys.stdout)
        sys.exit(0)
    else:
        if not parsed_args.previous_run and not parsed_args.models:
            parser.print_help()
            print()
            sys.tracebacklimit = 0
            raise OptionError(
                "argument --models or --previous-run is required.")
        elif not parsed_args.previous_run and not parsed_args.sequence_db:
            parser.print_help()
            print()
            sys.tracebacklimit = 0
            raise OptionError(
                "argument --sequence-db or --previous-run is required.")
        elif not parsed_args.previous_run and not parsed_args.db_type:
            parser.print_help()
            print()
            sys.tracebacklimit = 0
            raise OptionError(
                "argument --db-type or --previous-run is required.")

        _log.info(f"command used: {' '.join(sys.argv)}")

        models = ModelBank()
        genes = GeneBank()
        profile_factory = ProfileFactory(config)
        macsypy.hit.hit_weight = macsypy.hit.HitWeight(itself=3,
                                                       exchangeable=.75,
                                                       mandatory=2,
                                                       accessory=.25,
                                                       neutral=1.5)

        logger.info("\n{:#^70}".format(" Searching systems "))
        all_systems, rejected_clusters = search_systems(
            config, models, genes, profile_factory, logger)

        track_multi_systems_hit = HitSystemTracker(all_systems)
        if config.db_type() in ('gembase', 'ordered_replicon'):
            #############################
            # Ordered/Gembase replicons #
            #############################

            ###########################
            # select the best systems #
            ###########################
            logger.info("\n{:#^70}".format(" Computing best solutions "))
            best_solutions = []
            one_best_solution = []

            # group systems found by replicon
            # before to search best system combination
            import time
            for rep_name, syst_group in itertools.groupby(
                    all_systems, key=lambda s: s.replicon_name):
                syst_group = list(syst_group)
                logger.info(
                    f"Computing best solutions for {rep_name} (nb of systems {len(syst_group)})"
                )
                t0 = time.time()
                best_sol_4_1_replicon, score = find_best_solutions(syst_group)
                t1 = time.time()
                logger.info(
                    f"It took {t1 - t0:.2f}sec to find best solution ({score:.2f}) for replicon {rep_name}"
                )
                # if several solutions are equivalent same number of system and score is same
                # store all equivalent solution in best_solution => all_best_systems
                # pick one in one_best_solution => best_systems
                best_solutions.extend(best_sol_4_1_replicon)
                one_best_solution.append(best_sol_4_1_replicon[0])

            ##############################
            # Write the results in files #
            ##############################
            logger.info("\n{:#^70}".format(" Writing down results "))
            system_filename = os.path.join(config.working_dir(),
                                           "all_systems.txt")
            tsv_filename = os.path.join(config.working_dir(),
                                        "all_systems.tsv")

            with open(system_filename, "w") as sys_file:
                systems_to_txt(all_systems, track_multi_systems_hit, sys_file)

            with open(tsv_filename, "w") as tsv_file:
                systems_to_tsv(all_systems, track_multi_systems_hit, tsv_file)

            cluster_filename = os.path.join(config.working_dir(),
                                            "rejected_clusters.txt")
            with open(cluster_filename, "w") as clst_file:
                rejected_clusters.sort(key=lambda clst: (
                    clst.replicon_name, clst.model, clst.hits))
                rejected_clst_to_txt(rejected_clusters, clst_file)
            if not (all_systems or rejected_clusters):
                logger.info("No Systems found in this dataset.")

            tsv_filename = os.path.join(config.working_dir(),
                                        "all_best_solutions.tsv")
            with open(tsv_filename, "w") as tsv_file:
                solutions_to_tsv(best_solutions, track_multi_systems_hit,
                                 tsv_file)

            tsv_filename = os.path.join(config.working_dir(),
                                        "best_solution.tsv")
            with open(tsv_filename, "w") as tsv_file:
                # flattern the list and sort it
                one_best_solution = [
                    syst for sol in one_best_solution for syst in sol
                ]
                one_best_solution.sort(
                    key=lambda syst: (syst.replicon_name, syst.position[0],
                                      syst.model.fqn, -syst.score))
                systems_to_tsv(one_best_solution, track_multi_systems_hit,
                               tsv_file)
        else:
            #######################
            # Unordered replicons #
            #######################

            ##############################
            # Write the results in files #
            ##############################
            logger.info("\n{:#^70}".format(" Writing down results "))

            system_filename = os.path.join(config.working_dir(),
                                           "all_systems.txt")
            with open(system_filename, "w") as sys_file:
                likely_systems_to_txt(all_systems, track_multi_systems_hit,
                                      sys_file)

            # forbidden = [s for s in all_systems if s.forbidden_occ]
            # system_filename = os.path.join(config.working_dir(), "forbidden_components.tsv")
            # with open(system_filename, "w") as sys_file:
            #     likely_systems_to_tsv(forbidden, track_multi_systems_hit, sys_file)

            system_filename = os.path.join(config.working_dir(),
                                           "all_systems.tsv")
            with open(system_filename, "w") as sys_file:
                likely_systems_to_tsv(all_systems, track_multi_systems_hit,
                                      sys_file)

            cluster_filename = os.path.join(config.working_dir(),
                                            "uncomplete_systems.txt")
            with open(cluster_filename, "w") as clst_file:
                unlikely_systems_to_txt(rejected_clusters, clst_file)

            if not (all_systems or rejected_clusters):
                logger.info("No Systems found in this dataset.")

    logger.info("END")
Esempio n. 10
0
 def test_logger_set_level_error(self):
     macsypy.init_logger()
     macsypy.logger_set_level(level='ERROR')
     logger = logging.getLogger('macsypy')
     self.assertEqual(logger.getEffectiveLevel(), logging.ERROR)
Esempio n. 11
0
 def test_logger_set_level_default(self):
     macsypy.init_logger()
     macsypy.logger_set_level()
     logger = logging.getLogger('macsypy')
     self.assertEqual(logger.getEffectiveLevel(), logging.INFO)
Esempio n. 12
0
 def test_init_logger_logfile(self):
     with tempfile.NamedTemporaryFile() as fp:
         handlers = macsypy.init_logger(log_file=fp.name)
         self.assertEqual(len(handlers), 2)
         self.assertTrue(isinstance(handlers[1], logging.FileHandler))
Esempio n. 13
0
 def test_init_logger_no_out(self):
     handlers = macsypy.init_logger(out=False)
     self.assertEqual(len(handlers), 1)
     self.assertTrue(isinstance(handlers[0], logging.NullHandler))
Esempio n. 14
0
 def test_init_logger_default(self):
     handlers = macsypy.init_logger()
     self.assertEqual(len(handlers), 1)
     self.assertTrue(isinstance(handlers[0], logging.StreamHandler))
     logger = logging.getLogger('macsypy')
     self.assertEqual(logger.getEffectiveLevel(), logging.WARNING)