コード例 #1
0
 def test_set_log_level(self):
     cfg = Config(self.defaults, self.parsed_args)
     cfg._set_log_level(20)
     self.assertEqual(cfg.log_level(), 20)
     cfg._set_log_level('WARNING')
     self.assertEqual(cfg.log_level(), 30)
     with self.assertRaises(ValueError) as ctx:
         cfg._set_log_level('FOO')
     self.assertEqual(str(ctx.exception),
                      "Invalid value for log_level: FOO.")
コード例 #2
0
def main(args=None, loglevel=None):
    """
    main entry point to MacSyFinder do some check before to launch :func:`main_search_systems` which is
    the real function that perform a search

    :param args: the arguments passed on the command line without the program name
    :type args: List of string
    :param loglevel: the output verbosity
    :type loglevel: a positive int or a string among 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'
    """
    args = sys.argv[1:] if args is None else args
    parser, parsed_args = parse_args(args)

    defaults = MacsyDefaults()
    config = Config(defaults, parsed_args)

    ###########################
    # creation of working dir
    ###########################
    working_dir = config.working_dir()
    if not os.path.exists(working_dir):
        os.makedirs(working_dir)
    else:
        if os.path.isdir(working_dir):
            if os.listdir(working_dir):
                raise ValueError(
                    f"'{working_dir}' already exists and is not a empty")
        else:
            raise ValueError(
                f"'{working_dir}' already exists and is not a directory")

    ################
    # init loggers #
    ################
    macsypy.init_logger(log_file=os.path.join(config.working_dir(),
                                              config.log_file()),
                        out=not config.mute())
    if not loglevel:
        # logs are specify from args options
        macsypy.logger_set_level(level=config.log_level())
    else:
        # used by unit tests to mute or unmute logs
        macsypy.logger_set_level(level=loglevel)

    logger = logging.getLogger('macsypy.macsyfinder')

    if parsed_args.list_models:
        print(list_models(parsed_args), file=sys.stdout)
        sys.exit(0)
    else:
        if not parsed_args.previous_run and not parsed_args.models:
            parser.print_help()
            print()
            sys.tracebacklimit = 0
            raise OptionError(
                "argument --models or --previous-run is required.")
        elif not parsed_args.previous_run and not parsed_args.sequence_db:
            parser.print_help()
            print()
            sys.tracebacklimit = 0
            raise OptionError(
                "argument --sequence-db or --previous-run is required.")
        elif not parsed_args.previous_run and not parsed_args.db_type:
            parser.print_help()
            print()
            sys.tracebacklimit = 0
            raise OptionError(
                "argument --db-type or --previous-run is required.")

        _log.info(f"command used: {' '.join(sys.argv)}")

        models = ModelBank()
        genes = GeneBank()
        profile_factory = ProfileFactory(config)
        macsypy.hit.hit_weight = macsypy.hit.HitWeight(itself=3,
                                                       exchangeable=.75,
                                                       mandatory=2,
                                                       accessory=.25,
                                                       neutral=1.5)

        logger.info("\n{:#^70}".format(" Searching systems "))
        all_systems, rejected_clusters = search_systems(
            config, models, genes, profile_factory, logger)

        track_multi_systems_hit = HitSystemTracker(all_systems)
        if config.db_type() in ('gembase', 'ordered_replicon'):
            #############################
            # Ordered/Gembase replicons #
            #############################

            ###########################
            # select the best systems #
            ###########################
            logger.info("\n{:#^70}".format(" Computing best solutions "))
            best_solutions = []
            one_best_solution = []

            # group systems found by replicon
            # before to search best system combination
            import time
            for rep_name, syst_group in itertools.groupby(
                    all_systems, key=lambda s: s.replicon_name):
                syst_group = list(syst_group)
                logger.info(
                    f"Computing best solutions for {rep_name} (nb of systems {len(syst_group)})"
                )
                t0 = time.time()
                best_sol_4_1_replicon, score = find_best_solutions(syst_group)
                t1 = time.time()
                logger.info(
                    f"It took {t1 - t0:.2f}sec to find best solution ({score:.2f}) for replicon {rep_name}"
                )
                # if several solutions are equivalent same number of system and score is same
                # store all equivalent solution in best_solution => all_best_systems
                # pick one in one_best_solution => best_systems
                best_solutions.extend(best_sol_4_1_replicon)
                one_best_solution.append(best_sol_4_1_replicon[0])

            ##############################
            # Write the results in files #
            ##############################
            logger.info("\n{:#^70}".format(" Writing down results "))
            system_filename = os.path.join(config.working_dir(),
                                           "all_systems.txt")
            tsv_filename = os.path.join(config.working_dir(),
                                        "all_systems.tsv")

            with open(system_filename, "w") as sys_file:
                systems_to_txt(all_systems, track_multi_systems_hit, sys_file)

            with open(tsv_filename, "w") as tsv_file:
                systems_to_tsv(all_systems, track_multi_systems_hit, tsv_file)

            cluster_filename = os.path.join(config.working_dir(),
                                            "rejected_clusters.txt")
            with open(cluster_filename, "w") as clst_file:
                rejected_clusters.sort(key=lambda clst: (
                    clst.replicon_name, clst.model, clst.hits))
                rejected_clst_to_txt(rejected_clusters, clst_file)
            if not (all_systems or rejected_clusters):
                logger.info("No Systems found in this dataset.")

            tsv_filename = os.path.join(config.working_dir(),
                                        "all_best_solutions.tsv")
            with open(tsv_filename, "w") as tsv_file:
                solutions_to_tsv(best_solutions, track_multi_systems_hit,
                                 tsv_file)

            tsv_filename = os.path.join(config.working_dir(),
                                        "best_solution.tsv")
            with open(tsv_filename, "w") as tsv_file:
                # flattern the list and sort it
                one_best_solution = [
                    syst for sol in one_best_solution for syst in sol
                ]
                one_best_solution.sort(
                    key=lambda syst: (syst.replicon_name, syst.position[0],
                                      syst.model.fqn, -syst.score))
                systems_to_tsv(one_best_solution, track_multi_systems_hit,
                               tsv_file)
        else:
            #######################
            # Unordered replicons #
            #######################

            ##############################
            # Write the results in files #
            ##############################
            logger.info("\n{:#^70}".format(" Writing down results "))

            system_filename = os.path.join(config.working_dir(),
                                           "all_systems.txt")
            with open(system_filename, "w") as sys_file:
                likely_systems_to_txt(all_systems, track_multi_systems_hit,
                                      sys_file)

            # forbidden = [s for s in all_systems if s.forbidden_occ]
            # system_filename = os.path.join(config.working_dir(), "forbidden_components.tsv")
            # with open(system_filename, "w") as sys_file:
            #     likely_systems_to_tsv(forbidden, track_multi_systems_hit, sys_file)

            system_filename = os.path.join(config.working_dir(),
                                           "all_systems.tsv")
            with open(system_filename, "w") as sys_file:
                likely_systems_to_tsv(all_systems, track_multi_systems_hit,
                                      sys_file)

            cluster_filename = os.path.join(config.working_dir(),
                                            "uncomplete_systems.txt")
            with open(cluster_filename, "w") as clst_file:
                unlikely_systems_to_txt(rejected_clusters, clst_file)

            if not (all_systems or rejected_clusters):
                logger.info("No Systems found in this dataset.")

    logger.info("END")