def test_logger_info(capsys):
    """
    Test that when logger is initialized with "INFO" level, it does not return DEBUG info.
    """
    logfile = os.path.join(GENEPATH, "logfile_test.txt")
    level = logging.INFO
    utils.init_logger(logfile, level, "info")
    logger = logging.getLogger("info")
    logger.debug("info debug")
    logger.details("info details")
    logger.info("info info")
    logger.warning("info warning")
    logger.error("info error")
    logger.critical("info critical")
    out, err = capsys.readouterr()
    assert "info info" in out
    assert "info error" in err
    assert "info critical" in err
    with open(logfile + ".log", "r") as logf:
        assert logf.readline().endswith(" :: INFO :: info info\n")
        assert logf.readline().endswith(" :: WARNING :: info warning\n")
        assert logf.readline().endswith(" :: ERROR :: info error\n")
        assert logf.readline().endswith(" :: CRITICAL :: info critical\n")
    assert not os.path.isfile(logfile + ".log.details")
    assert not os.path.isfile(logfile + ".log.debug")
    with open(logfile + ".log.err", "r") as logf:
        assert logf.readline().endswith(" :: WARNING :: info warning\n")
        assert logf.readline().endswith(" :: ERROR :: info error\n")
        assert logf.readline().endswith(" :: CRITICAL :: info critical\n")
Exemple #2
0
def setup_teardown_module():
    """
    Remove log files at the end of this test module

    Before each test:
    - init logger
    - create directory to put generated files

    After:
    - remove all log files
    - remove directory with generated results
    """
    utils.init_logger(LOGFILE_BASE,
                      logging.DEBUG,
                      'test_post_mmseq',
                      verbose=1)
    os.mkdir(GENEPATH)
    print("setup")

    yield
    shutil.rmtree(GENEPATH)
    for f in LOGFILES:
        if os.path.exists(f):
            os.remove(f)
    print("teardown")
Exemple #3
0
def test_run_prokka_out_problem_running():
    """
    Check that when a problem occurs while trying to run prokka, run_prokka returns False,
    and the error message indicating to read in the log why it couldn't run
    """
    logger = my_logger("test_run_prokka_out_problem_running")
    utils.init_logger(LOGFILE_BASE, 0, 'test_run_prokka_out_problem_running')
    gpath = os.path.join(GEN_PATH, "H299_H561bis.fasta")
    cores_prokka = 2
    name = "test_runprokka_H299-error"
    force = False
    nbcont = 3
    logf = os.path.join(GENEPATH, "H299_H561.fasta-prokka.log")
    trn_file = "nofile.trn"
    arguments = (gpath, GENEPATH, cores_prokka, name, force, nbcont, trn_file,
                 logger[0])
    assert not afunc.run_prokka(arguments)
    q = logger[0]
    assert q.qsize() == 3
    assert q.get().message.startswith("Start annotating")
    assert q.get().message == (
        "Prokka command: prokka "
        "--outdir test/data/annotate/generated_by_unit-tests/"
        "H299_H561bis.fasta-prokkaRes --cpus 2 "
        "--prefix test_runprokka_H299-error "
        "--centre prokka test/data/annotate/genomes/H299_H561bis.fasta")
    assert q.get().message == (
        "Error while trying to run prokka on test_runprokka_H299-error "
        "from test/data/annotate/genomes/H299_H561bis.fasta")
def test_logger_critical(capsys):
    """
    Test that when logger is initialized with "CRITICAL" level, it only returns
    CRITICAL information.

    """
    logfile = os.path.join(GENEPATH, "logfile_test.txt")
    level = logging.CRITICAL
    utils.init_logger(logfile, level, "crit")
    logger = logging.getLogger("crit")
    logger.debug("info debug")
    logger.details("info details")
    logger.info("info info")
    logger.warning("info warning")
    logger.error("info error")
    logger.critical("info critical")
    out, err = capsys.readouterr()
    assert "info info" in out
    assert "info error" in err
    assert "info critical" in err
    files = os.listdir(GENEPATH)
    files = [f for f in files if "fuse" not in f]
    assert len(files) == 2
    with open(logfile + ".log", "r") as logf:
        assert logf.readline().endswith(" :: INFO :: info info\n")
        assert logf.readline().endswith(" :: WARNING :: info warning\n")
        assert logf.readline().endswith(" :: ERROR :: info error\n")
        assert logf.readline().endswith(" :: CRITICAL :: info critical\n")
    assert not os.path.isfile(logfile + ".log.details")
    with open(logfile + ".log.err", "r") as logf:
        assert logf.readline().endswith(" :: WARNING :: info warning\n")
        assert logf.readline().endswith(" :: ERROR :: info error\n")
        assert logf.readline().endswith(" :: CRITICAL :: info critical\n")
Exemple #5
0
def test_run_prokka_out_exists_ok():
    """
    Test that when the output directory already exists, and files inside are OK,
    run_prokka returns True, with a warning message indicating that prokka did not rerun.
    """
    logger = my_logger("test_run_prokka_out_exists_ok")
    utils.init_logger(LOGFILE_BASE, 0, 'prokka_out_exists_ok')
    gpath = "path/to/nogenome/original_name.fna"
    cores_prokka = 1
    name = "prokka_out_for_test"
    force = False
    nbcont = 6
    trn_file = "nofile.trn"
    arguments = (gpath, TEST_DIR, cores_prokka, name, force, nbcont, trn_file,
                 logger[0])
    assert afunc.run_prokka(arguments)

    q = logger[0]
    assert q.qsize() == 4
    # start annotating :
    assert q.get().message.startswith("Start annotating")
    # warning prokka results folder exists:
    assert q.get().message.startswith(
        "Prokka results folder test/data/annotate/"
        "test_files/"
        "original_name.fna-prokkaRes already exists.")
    # Results in result folder are ok
    assert q.get().message.startswith(
        "Prokka did not run again, formatting step used already "
        "generated results of Prokka in "
        "test/data/annotate/test_files/original_name.fna-prokkaRes.")
    # End annotation:
    assert q.get().message.startswith("End annotating")
Exemple #6
0
def test_run_prodigal_out_exists_ok():
    """
    Test that when the output directory already exists, and files inside are OK,
    run_prodigal returns True, with a warning message indicating that prodigal did not rerun.
    """
    logger = my_logger("test_run_prodigal_out_exists_ok")
    utils.init_logger(LOGFILE_BASE, 0, 'prodigal_out_exists_ok')
    gpath = "path/to/nogenome/original_name.fna"
    cores_prodigal = 1
    name = "prodigal.outtest.ok"
    force = False
    nbcont = 7
    trn_file = os.path.join(TEST_DIR, "A_H738-and-B2_A3_5.fna.trn")
    arguments = (gpath, TEST_DIR, cores_prodigal, name, force, nbcont,
                 trn_file, logger[0])
    assert afunc.run_prodigal(arguments)

    q = logger[0]
    assert q.qsize() == 4
    # start annotating :
    assert q.get().message.startswith(
        "Start annotating prodigal.outtest.ok (from "
        "path/to/nogenome/original_name.fna sequence) with Prodigal")
    # # warning prodigal results folder exists:
    assert q.get().message.startswith(
        "Prodigal results folder test/data/annotate/test_files/"
        "original_name.fna-prodigalRes already exists.")
    # Results in result folder are ok
    assert q.get().message.startswith(
        "Prodigal did not run again. Formatting step will use "
        "already generated results of Prodigal in "
        "test/data/annotate/test_files/"
        "original_name.fna-prodigalRes.")
    # End annotation:
    assert q.get().message.startswith("End annotating")
def test_logger_warning_verbose2(capsys):
    """
    Test that when logger is initialized with "WARNING" level, it does not return
    anything in stdout, as DEBUG and INFO are not returned.
    """
    logfile = os.path.join(GENEPATH, "logfile_test.txt")
    level = logging.WARNING
    utils.init_logger(logfile, level, "warn", verbose=2)
    logger = logging.getLogger("warn")
    logger.debug("info debug")
    logger.details("info details")
    logger.info("info info")
    logger.warning("info warning")
    logger.error("info error")
    logger.critical("info critical")
    out, err = capsys.readouterr()
    assert "info info" in out
    assert "info details" in out
    assert "info error" in err
    assert "info warning" in err
    assert "info critical" in err
    files = os.listdir(GENEPATH)
    files = [f for f in files if "fuse" not in f]
    assert len(files) == 2
    with open(logfile + ".log", "r") as logf:
        assert logf.readline().endswith(" :: INFO :: info info\n")
        assert logf.readline().endswith(" :: WARNING :: info warning\n")
        assert logf.readline().endswith(" :: ERROR :: info error\n")
        assert logf.readline().endswith(" :: CRITICAL :: info critical\n")
    assert not os.path.isfile(logfile + ".log.details")
    with open(logfile + ".log.err", "r") as logf:
        assert logf.readline().endswith(" :: WARNING :: info warning\n")
        assert logf.readline().endswith(" :: ERROR :: info error\n")
        assert logf.readline().endswith(" :: CRITICAL :: info critical\n")
Exemple #8
0
def test_run_prokka_out_doesnt_exist_ok():
    """
    Test that when the output directory does not exist, it creates it, and runs prokka
    with all expected outfiles
    """
    logger = my_logger("test_run_prokka_out_doesnt_exist")
    utils.init_logger(LOGFILE_BASE, 0, 'test_run_prokka_out_doesnt_exist')
    gpath = os.path.join(GEN_PATH, "H299_H561.fasta")
    out_dir = os.path.join(GENEPATH, "H299_H561.fasta-prokkaRes")
    cores_prokka = 2
    name = "test_runprokka_H299"
    force = False
    nbcont = 3
    trn_file = "nofile.trn"
    arguments = (gpath, GENEPATH, cores_prokka, name, force, nbcont, trn_file,
                 logger[0])
    assert afunc.run_prokka(arguments)
    # Check content of tbl, ffn and faa files
    exp_dir = os.path.join(EXP_DIR,
                           "H299_H561.fasta-short-contig.fna-prokkaRes",
                           "test_runprokka_H299")
    out_tbl = os.path.join(out_dir, name + ".tbl")
    out_faa = os.path.join(out_dir, name + ".faa")
    out_ffn = os.path.join(out_dir, name + ".ffn")
    out_gff = os.path.join(out_dir, name + ".gff")
    assert os.path.isfile(out_tbl)
    # For tbl file, check that, at least, the 3 contigs were considered,
    # and that the number of CDS is as expected.
    # Before, we checked that the output
    # was exactly as expected. But it changes with the different versions of prokka, so
    # we cannot compare the whole file.
    with open(out_tbl, "r") as outt:
        lines = [line.strip() for line in outt.readlines()]
        # Check that there are 3 contigs
        feature = 0
        for line in lines:
            if 'Feature' in line:
                feature += 1
        assert feature == 3
        # Check that there are 16 CDS
        CDS = 0
        for line in lines:
            if "CDS" in line:
                CDS += 1
        assert CDS == 16
    # Check that faa and ffn files are as expected
    assert os.path.isfile(out_faa)
    assert tutil.compare_order_content(exp_dir + ".faa", out_faa)
    assert os.path.isfile(out_ffn)
    assert tutil.compare_order_content(exp_dir + ".ffn", out_ffn)
    q = logger[0]
    assert q.qsize() == 3
    assert q.get().message.startswith("Start annotating")
    assert q.get().message == (
        "Prokka command: prokka "
        "--outdir test/data/annotate/generated_by_unit-tests/"
        "H299_H561.fasta-prokkaRes --cpus 2 --prefix test_runprokka_H299 "
        "--centre prokka test/data/annotate/genomes/H299_H561.fasta")
    assert q.get().message.startswith("End annotating")
Exemple #9
0
 def make_logger(name="test_post_mmseq"):
     """
     Create logger according to name given
     """
     logfile_base = "log_" + name
     level = logging.DEBUG
     utils.init_logger(logfile_base, level, name, verbose=0, quiet=False)
     return logfile_base
Exemple #10
0
def test_run_prodigal_out_exists_force():
    """
    Test that when the output directory already exists with wrong files, but force is on,
    prodigal is rerun and outputs the right files
    """
    logger = my_logger("test_run_prodigal_out_exists_force")
    utils.init_logger(LOGFILE_BASE, 0, 'force')
    gpath = os.path.join(GEN_PATH, "H299_H561.fasta")
    out_prokdir = os.path.join(GENEPATH, "H299_H561.fasta-prodigalRes")
    name = "test_runprodigal_H299"
    # Put empty tbl, faa, ffn files in prodigal output dir, to check that they are overridden
    os.makedirs(out_prokdir)
    open(os.path.join(out_prokdir, name + ".gff"), "w").close()
    open(os.path.join(out_prokdir, name + ".faa"), "w").close()
    open(os.path.join(out_prokdir, name + ".ffn"), "w").close()
    cores_prodigal = 2
    force = True
    nbcont = 3
    trn_file = os.path.join(TEST_DIR, "A_H738-and-B2_A3_5.fna.trn")
    arguments = (gpath, GENEPATH, cores_prodigal, name, force, nbcont,
                 trn_file, logger[0])
    assert afunc.run_prodigal(arguments)
    # As we used 'force', tbl, faa and ffn files, which were empty, must have been replaced
    # by the prodigal output
    exp_dir = os.path.join(EXP_DIR, "H299_H561.fasta-prodigalRes",
                           "ESCO.1015.00001")
    out_gff = os.path.join(out_prokdir, name + ".gff")
    out_faa = os.path.join(out_prokdir, name + ".faa")
    out_ffn = os.path.join(out_prokdir, name + ".ffn")
    # Check that faa and ffn files are as expected
    assert os.path.isfile(out_faa)
    assert tutil.compare_order_content(exp_dir + ".faa", out_faa)
    assert os.path.isfile(out_ffn)
    assert tutil.compare_order_content(exp_dir + ".ffn", out_ffn)
    q = logger[0]
    assert q.qsize() == 4
    assert q.get().message.startswith(
        "Prodigal results folder already exists, but is "
        "removed because --force option was used")
    assert q.get().message.startswith(
        "Start annotating test_runprodigal_H299 (from test/data/"
        "annotate/genomes/H299_H561.fasta sequence) "
        "with Prodigal")
    assert q.get().message.startswith(
        "Prodigal command: prodigal -i test/data/annotate/genomes/"
        "H299_H561.fasta -d test/data/annotate/"
        "generated_by_unit-tests/H299_H561.fasta-prodigalRes/"
        "test_runprodigal_H299.ffn -a test/data/annotate/"
        "generated_by_unit-tests/H299_H561.fasta-prodigalRes/"
        "test_runprodigal_H299.faa -f gff -o test/data/annotate/"
        "generated_by_unit-tests/H299_H561.fasta-prodigalRes/"
        "test_runprodigal_H299.gff -t "
        "test/data/annotate/test_files/A_H738-and-B2_A3_5.fna.trn "
        "-q")
    assert q.get().message.startswith(
        "End annotating test_runprodigal_H299 "
        "(from test/data/annotate/genomes/H299_H561.fasta)")
def test_log_listen(capsys):
    """
    Check that when we log to a queue listener, and then handle the logs
    via logger_thread, the logs appear.
    """
    import multiprocessing
    import threading

    # Create Queue, QueueHandler, and log messages to it
    m = multiprocessing.Manager()
    q = m.Queue()
    qh = logging.handlers.QueueHandler(q)
    root = logging.getLogger()
    root.setLevel(logging.DEBUG)
    root.handlers = []
    logging.addLevelName(utils.detail_lvl(), "DETAIL")
    root.addHandler(qh)
    logger = logging.getLogger('process')
    logger.debug("debug message")
    logger.log(utils.detail_lvl(), "detail message")
    logger.info("info message")
    logger.warning("warning message")
    logger.error("error message")
    logger.critical("critical message")
    q.put(None)

    # Initialize real logger
    logfile = os.path.join(GENEPATH, "logfile_test.txt")
    utils.init_logger(logfile, 0, '')

    # Listen to QueueHandler and handle messages to stdout/stderr/files
    lp = threading.Thread(target=utils.logger_thread, args=(q, ))
    lp.start()
    lp.join()

    out, err = capsys.readouterr()
    assert "info message" in out
    assert "error message" in err
    assert "critical message" in err
    with open(logfile + ".log", "r") as logf:
        assert logf.readline().endswith(" :: INFO :: info message\n")
        assert logf.readline().endswith(" :: WARNING :: warning message\n")
        assert logf.readline().endswith(" :: ERROR :: error message\n")
        assert logf.readline().endswith(" :: CRITICAL :: critical message\n")
    with open(logfile + ".log.details") as logf:
        assert logf.readline().endswith(" :: DETAIL :: detail message\n")
        assert logf.readline().endswith(" :: INFO :: info message\n")
        assert logf.readline().endswith(" :: WARNING :: warning message\n")
        assert logf.readline().endswith(" :: ERROR :: error message\n")
        assert logf.readline().endswith(" :: CRITICAL :: critical message\n")
    with open(logfile + ".log.err", "r") as logf:
        assert logf.readline().endswith(" :: WARNING :: warning message\n")
        assert logf.readline().endswith(" :: ERROR :: error message\n")
        assert logf.readline().endswith(" :: CRITICAL :: critical message\n")
def test_logger_exists(capsys):
    """
    Test that when the logfiles already exist, it creates new ones with a timestamp added
    """
    logfile = os.path.join(GENEPATH, "logfile_test.txt")
    open(logfile + ".log", "w").close()
    open(logfile + ".log.details", "w").close()
    open(logfile + ".log.debug", "w").close()
    open(logfile + ".log.err", "w").close()
    level = logging.DEBUG
    utils.init_logger(logfile, level, "already_exist", verbose=1)
    logger = logging.getLogger("already_exist")
    logger.debug("info debug")
    logger.details("info details")
    logger.info("info info")
    logger.warning("info warning")
    logger.error("info error")
    logger.critical("info critical")
    out, err = capsys.readouterr()
    assert "info info" in out
    assert "info warning" in err
    assert "info error" in err
    assert "info critical" in err
    # Check that initial log files are still empty
    with open(logfile + ".log", "r") as logf:
        assert logf.readlines() == []
    with open(logfile + ".log.debug", "r") as logf:
        assert logf.readlines() == []
    with open(logfile + ".log.err", "r") as logf:
        assert logf.readlines() == []
    with open(logfile + ".log.details", "r") as logf:
        assert logf.readlines() == []
    # Check for new .log file, remove the one which is empty
    import glob
    logs = glob.glob(logfile + "*" + ".log")
    assert len(logs) == 2
    logs.remove(logfile + ".log")
    with open(logs[0], "r") as logf:
        assert logf.readline().endswith(" :: INFO :: info info\n")
        assert logf.readline().endswith(" :: WARNING :: info warning\n")
        assert logf.readline().endswith(" :: ERROR :: info error\n")
        assert logf.readline().endswith(" :: CRITICAL :: info critical\n")
    # Same thing for .log.err file
    logs_err = glob.glob(logfile + "*" + ".log.err")
    assert len(logs_err) == 2
    logs_err.remove(logfile + ".log.err")
    with open(logs_err[0], "r") as logf:
        assert logf.readline().endswith(" :: WARNING :: info warning\n")
        assert logf.readline().endswith(" :: ERROR :: info error\n")
        assert logf.readline().endswith(" :: CRITICAL :: info critical\n")
def test_logger_verbose2(capsys):
    """
    Test that logger is initialized as expected.
    """
    logfile = os.path.join(GENEPATH, "logfile_test.txt")
    level = logging.DEBUG
    utils.init_logger(logfile, level, "toto", verbose=2)
    logger = logging.getLogger("toto")
    logger.debug("info debug")
    logger.details("info details")
    logger.info("info info")
    logger.warning("info warning")
    logger.error("info error")
    logger.critical("info critical")
    out, err = capsys.readouterr()
    assert "info debug" in out
    assert "info details" in out
    assert "info info" in out
    assert "info warning" in err
    assert "info error" in err
    assert "info critical" in err
    with open(logfile + ".log", "r") as logf:
        assert logf.readline().endswith(" :: INFO :: info info\n")
        assert logf.readline().endswith(" :: WARNING :: info warning\n")
        assert logf.readline().endswith(" :: ERROR :: info error\n")
        assert logf.readline().endswith(" :: CRITICAL :: info critical\n")
    with open(logfile + ".log.details") as logf:
        assert logf.readline().endswith(" :: DETAIL :: info details\n")
        assert logf.readline().endswith(" :: INFO :: info info\n")
        assert logf.readline().endswith(" :: WARNING :: info warning\n")
        assert logf.readline().endswith(" :: ERROR :: info error\n")
        assert logf.readline().endswith(" :: CRITICAL :: info critical\n")
    with open(logfile + ".log.err", "r") as logf:
        assert logf.readline().endswith(" :: WARNING :: info warning\n")
        assert logf.readline().endswith(" :: ERROR :: info error\n")
        assert logf.readline().endswith(" :: CRITICAL :: info critical\n")
    with open(logfile + ".log.debug") as logf:
        assert logf.readline().endswith(
            " :: DEBUG (from toto logger) :: info debug\n")
        assert logf.readline().endswith(
            " :: DETAIL (from toto logger) :: info details\n")
        assert logf.readline().endswith(
            " :: INFO (from toto logger) :: info info\n")
        assert logf.readline().endswith(
            " :: WARNING (from toto logger) :: info warning\n")
        assert logf.readline().endswith(
            " :: ERROR (from toto logger) :: info error\n")
        assert logf.readline().endswith(
            " :: CRITICAL (from toto logger) :: info critical\n")
Exemple #14
0
def test_run_prodigal_small():
    """
    Test that when the output directory does not exist, it creates it, and runs prodigal
    with all expected outfiles. Here, we run prodigal with --small option (on a small genome)
    """
    logger = my_logger("test_run_prodigal_small")
    utils.init_logger(LOGFILE_BASE, 0, 'test_run_prodigal_small')
    gpath = os.path.join(GEN_PATH, "H299_H561.fasta")
    out_dir = os.path.join(GENEPATH, "H299_H561.fasta-prodigalRes")
    cores_prodigal = 2
    name = "test_runprodigal_small_H299"
    force = False
    trn_file = "small option"
    nbcont = 3
    arguments = (gpath, GENEPATH, cores_prodigal, name, force, nbcont,
                 trn_file, logger[0])
    assert afunc.run_prodigal(arguments)

    # Check content of tbl, ffn and faa files
    exp_dir = os.path.join(EXP_DIR, "H299_H561.fasta_small-prodigalRes",
                           "test_runprodigal_small_H299")
    out_faa = os.path.join(out_dir, name + ".faa")
    out_ffn = os.path.join(out_dir, name + ".ffn")
    out_gff = os.path.join(out_dir, name + ".gff")
    # Check that faa and ffn files are as expected
    assert os.path.isfile(out_faa)
    assert tutil.compare_order_content(exp_dir + ".faa", out_faa)
    assert os.path.isfile(out_ffn)
    assert tutil.compare_order_content(exp_dir + ".ffn", out_ffn)
    assert os.path.isfile(out_ffn)
    assert tutil.compare_order_content(exp_dir + ".gff", out_gff)
    # Check logs
    q = logger[0]
    assert q.qsize() == 3
    assert q.get().message.startswith("Start annotating")
    prodigal_cmd = q.get().message
    assert ("Prodigal command: prodigal -i test/data/annotate/genomes/"
            "H299_H561.fasta -d test/data/annotate/"
            "generated_by_unit-tests/H299_H561.fasta-prodigalRes/"
            "test_runprodigal_small_H299.ffn -a test/data/annotate/"
            "generated_by_unit-tests/H299_H561.fasta-prodigalRes/"
            "test_runprodigal_small_H299.faa -f gff -o test/data/annotate/"
            "generated_by_unit-tests/H299_H561.fasta-prodigalRes/"
            "test_runprodigal_small_H299.gff -p meta -q") in prodigal_cmd
    assert q.get().message.startswith("End annotating")
Exemple #15
0
def test_run_prodigal_out_doesnt_exist():
    """
    Test that when the output directory does not exist, it creates it, and runs prodigal
    with all expected outfiles
    """
    logger = my_logger("test_run_prodigal_out_doesnt_exist")
    utils.init_logger(LOGFILE_BASE, 0, 'test_run_prodigal_out_doesnt_exist')
    gpath = os.path.join(GEN_PATH, "H299_H561.fasta")
    out_dir = os.path.join(GENEPATH, "H299_H561.fasta-prodigalRes")
    cores_prodigal = 2
    name = "test_runprodigal_H299"
    force = False
    trn_file = os.path.join(TEST_DIR, "A_H738-and-B2_A3_5.fna.trn")
    nbcont = 3
    arguments = (gpath, GENEPATH, cores_prodigal, name, force, nbcont,
                 trn_file, logger[0])
    assert afunc.run_prodigal(arguments)
    # Check content of tbl, ffn and faa files
    exp_dir = os.path.join(EXP_DIR, "H299_H561.fasta-prodigalRes",
                           "ESCO.1015.00001")
    out_faa = os.path.join(out_dir, name + ".faa")
    out_ffn = os.path.join(out_dir, name + ".ffn")
    out_gff = os.path.join(out_dir, name + ".gff")
    # Check that faa and ffn files are as expected
    assert os.path.isfile(out_faa)
    assert tutil.compare_order_content(exp_dir + ".faa", out_faa)
    assert os.path.isfile(out_ffn)
    assert tutil.compare_order_content(exp_dir + ".ffn", out_ffn)
    assert os.path.isfile(out_ffn)
    assert tutil.compare_order_content(exp_dir + ".gff", out_gff)
    q = logger[0]
    assert q.qsize() == 3
    assert q.get().message.startswith("Start annotating")
    assert q.get().message == (
        "Prodigal command: prodigal -i test/data/annotate/genomes/"
        "H299_H561.fasta -d test/data/annotate/"
        "generated_by_unit-tests/H299_H561.fasta-prodigalRes/"
        "test_runprodigal_H299.ffn -a test/data/annotate/"
        "generated_by_unit-tests/H299_H561.fasta-prodigalRes/"
        "test_runprodigal_H299.faa -f gff -o test/data/annotate/"
        "generated_by_unit-tests/H299_H561.fasta-prodigalRes/"
        "test_runprodigal_H299.gff -t "
        "test/data/annotate/test_files/A_H738-and-B2_A3_5.fna.trn "
        "-q")
    assert q.get().message.startswith("End annotating")
Exemple #16
0
def test_run_prokka_out_exists_error():
    """
    Test that when the output directory already exists, and 1 file is missing,
    run_prokka returns False, and writes the warning message saying that prokka did not
    rerun, + the warning message for the missing file(s).
    """
    logger = my_logger("test_run_prokka_out_exists_error")
    utils.init_logger(LOGFILE_BASE, 0, 'prokka_out_error')
    ori_prok_dir = os.path.join(TEST_DIR, "original_name.fna-prokkaRes")
    ori_name = "prokka_out_for_test"
    new_prok_dir = os.path.join(GENEPATH, "original_name-error-prokkaRes")
    name = "prokka_out_for_test-wrongCDS"
    os.makedirs(new_prok_dir)
    shutil.copyfile(os.path.join(ori_prok_dir, ori_name + ".fna"),
                    os.path.join(new_prok_dir, name + ".fna"))
    shutil.copyfile(os.path.join(ori_prok_dir, ori_name + ".ffn"),
                    os.path.join(new_prok_dir, name + ".ffn"))
    shutil.copyfile(os.path.join(ori_prok_dir, ori_name + ".faa"),
                    os.path.join(new_prok_dir, name + ".faa"))
    shutil.copyfile(os.path.join(ori_prok_dir, ori_name + ".gff"),
                    os.path.join(new_prok_dir, name + ".gff"))
    gpath = "path/to/nogenome/original_name-error"
    cores_prokka = 1
    force = False
    nbcont = 6
    trn_file = "nofile.trn"
    arguments = (gpath, GENEPATH, cores_prokka, name, force, nbcont, trn_file,
                 logger[0])
    assert not afunc.run_prokka(arguments)
    q = logger[0]
    assert q.qsize() == 4
    # start annotating :
    assert q.get().message.startswith("Start annotating")
    # warning prokka results folder exists:
    assert q.get().message == (
        "Prokka results folder test/data/annotate/generated_by_unit-tests/"
        "original_name-error-prokkaRes already exists.")
    # error, no tbl file
    assert q.get(
    ).message == "prokka_out_for_test-wrongCDS original_name-error: no .tbl file"
    # warning, files in outdir are not as expected
    assert q.get().message.startswith(
        "Problems in the files contained in your already existing "
        "output dir (test/data/annotate/generated_by_unit-tests/"
        "original_name-error-prokkaRes)")
Exemple #17
0
def test_run_prodigal_out_exists_error():
    """
    Test that when the output directory already exists, and 1 file is missing,
    run_prodigal returns False, and writes the warning message saying that prodigal did not
    rerun, + the warning message for the missing file(s).
    """
    logger = my_logger("test_run_prodigal_out_exists_error")
    utils.init_logger(LOGFILE_BASE, 0, 'prodigal_out_error')
    ori_prok_dir = os.path.join(TEST_DIR, "original_name.fna-prodigalRes")
    ori_name = "prodigal.outtest.ok"
    new_prok_dir = os.path.join(GENEPATH, "original_name-error-prodigalRes")
    name = "prodigal_out_for_test-wrongCDS"
    os.makedirs(new_prok_dir)
    shutil.copyfile(os.path.join(ori_prok_dir, ori_name + ".ffn"),
                    os.path.join(new_prok_dir, name + ".ffn"))
    shutil.copyfile(os.path.join(ori_prok_dir, ori_name + ".faa"),
                    os.path.join(new_prok_dir, name + ".faa"))
    open(os.path.join(new_prok_dir, name + ".gff"), "w").close()
    gpath = "path/to/nogenome/original_name-error"
    cores_prodigal = 1
    force = False
    trn_file = os.path.join(TEST_DIR, "A_H738-and-B2_A3_5.fna.trn")
    nbcont = 7
    arguments = (gpath, GENEPATH, cores_prodigal, name, force, nbcont,
                 trn_file, logger[0])
    assert not afunc.run_prodigal(arguments)
    q = logger[0]
    assert q.qsize() == 4
    # start annotating :
    assert q.get().message.startswith("Start annotating")
    # warning prodigal results folder exists:
    assert q.get().message == (
        "Prodigal results folder test/data/annotate/"
        "generated_by_unit-tests/"
        "original_name-error-prodigalRes already exists.")
    # error, empty gff
    msg = ("Genome prodigal_out_for_test-wrongCDS (from original_name-error): "
           "At least one of your Prodigal result file is empty.")
    assert q.get().message == msg
    # warning, files in outdir are not as expected
    assert q.get().message.startswith(
        "Problems in the files contained in your already existing "
        "output dir (test/data/annotate/generated_by_unit-tests/"
        "original_name-error-prodigalRes")
Exemple #18
0
def test_run_prodigal_noout_notrain():
    """
    Prodigal result directory does not exist (not already run)
    training file does not exist (probably, problem while trying to train)
    -> return  False
    """
    logger = my_logger("test_run_prodigal_out_exists_error")
    utils.init_logger(LOGFILE_BASE, 0, 'prodigal_out_error')
    gpath = "path/to/nogenome/original_name-error"
    cores_prodigal = 1
    name = "prodigal_out_for_test-wrongCDS"
    force = False
    nbcont = 7
    trn_file = "ghost_trn_file"
    arguments = (gpath, GENEPATH, cores_prodigal, name, force, nbcont,
                 trn_file, logger[0])
    assert not afunc.run_prodigal(arguments)
    q = logger[0]
    assert q.qsize() == 0
Exemple #19
0
def test_run_all_prodigal_error_train():
    """
    Check that when we want to train on a genome but it fails, it returns False for all genomes
    Here, it fails because genome to train on is too small
    """
    logger = my_logger("test_run_all_parallel_more_threads")
    utils.init_logger(LOGFILE_BASE, 0, 'test_run_all_parallel_more_threads')
    # genomes = {genome: [name, gpath, annot_path, size, nbcont, l90]}
    genome1 = "H299_H561.fasta"
    gpath1 = os.path.join(GEN_PATH, genome1)
    genome2 = "A_H738.fasta"
    gpath2 = os.path.join(GEN_PATH, genome2)
    genomes = {
        genome1: ["test_runall_1by1_1", gpath1, gpath1, 12656, 3, 0],
        genome2: ["test_runall_1by1_2", gpath2, gpath2, 456464645, 1, 465]
    }
    threads = 8
    force = False
    trn_gname = genome1
    final = afunc.run_annotation_all(genomes,
                                     threads,
                                     force,
                                     GENEPATH,
                                     trn_gname,
                                     prodigal_only=True,
                                     quiet=True)
    assert not final[genome1]
    assert not final[genome2]
    q = logger[0]
    assert q.qsize() == 4
    assert q.get().message == "Annotating all genomes with prodigal"
    assert q.get().message == ("Prodigal will train using "
                               "test/data/annotate/genomes/H299_H561.fasta")
    assert q.get().message == (
        "prodigal command: prodigal -i "
        "test/data/annotate/genomes/H299_H561.fasta -t "
        "test/data/annotate/generated_by_unit-tests/H299_H561.fasta.trn")
    assert q.get().message == (
        "Error while trying to train prodigal on H299_H561.fasta. See "
        "test/data/annotate/generated_by_unit-tests/"
        "H299_H561.fasta.trn-prodigal-train.log.err.")
Exemple #20
0
def test_run_prodigal_out_problem_running():
    """
    Check that when a problem occurs while trying to run prodigal, run_prodigal returns False,
    and the error message indicating to read in the log why it couldn't run
    """
    logger = my_logger("test_run_prodigal_out_problem_running")
    utils.init_logger(LOGFILE_BASE, 0, 'test_run_prodigal_out_problem_running')
    gpath = os.path.join(GEN_PATH, "H299_H561bis.fasta")
    cores_prodigal = 2
    name = "test_runprodigal_H299-error"
    force = False
    nbcont = 3
    trn_file = os.path.join(TEST_DIR, "A_H738-and-B2_A3_5.fna.trn")
    logf = os.path.join(GENEPATH, "H299_H561bis.fasta-prodigal.log")
    arguments = (gpath, GENEPATH, cores_prodigal, name, force, nbcont,
                 trn_file, logger[0])
    assert not afunc.run_prodigal(arguments)
    # Check that output directory is empty
    outdir = os.path.join(GENEPATH, "H299_H561bis.fasta-prodigalRes")
    assert os.listdir(outdir) == []
    # Check logs
    q = logger[0]
    assert q.qsize() == 3
    assert q.get().message.startswith("Start annotating")
    assert q.get().message.startswith(
        "Prodigal command: prodigal -i test/data/annotate/genomes/"
        "H299_H561bis.fasta -d test/data/annotate/"
        "generated_by_unit-tests/H299_H561bis.fasta-prodigalRes/"
        "test_runprodigal_H299-error.ffn -a test/data/annotate/"
        "generated_by_unit-tests/H299_H561bis.fasta-prodigalRes/"
        "test_runprodigal_H299-error.faa -f gff -o test/data/annotate/"
        "generated_by_unit-tests/H299_H561bis.fasta-prodigalRes/"
        "test_runprodigal_H299-error.gff -t "
        "test/data/annotate/test_files/A_H738-and-B2_A3_5.fna.trn "
        "-q")
    assert q.get().message.startswith(
        "Error while trying to run prodigal. See test/data/"
        "annotate/generated_by_unit-tests/"
        "H299_H561bis.fasta-prodigal.log.err.")
def test_log_no_listen(capsys):
    """
    Check that when we log to a queue listener, but never listen to the queue,
    there is nothing in stderr/stdout/files
    """
    import multiprocessing

    # Create Queue, QueueHandler, and log messages to it
    m = multiprocessing.Manager()
    q = m.Queue()
    qh = logging.handlers.QueueHandler(q)
    root = logging.getLogger()
    root.setLevel(logging.DEBUG)
    root.handlers = []
    logging.addLevelName(utils.detail_lvl(), "DETAIL")
    root.addHandler(qh)
    logger = logging.getLogger('process')
    logger.debug("debug message")
    logger.log(utils.detail_lvl(), "detail message")
    logger.info("info message")
    logger.warning("warning message")
    logger.error("error message")
    logger.critical("critical message")
    q.put(None)

    # Initialize real logger
    logfile = os.path.join(GENEPATH, "test_log_listen")
    utils.init_logger(logfile, 0, '')

    assert q.qsize() == 7
    out, err = capsys.readouterr()
    assert out == ""
    assert err == ""
    with open(logfile + ".log", "r") as logf:
        assert logf.readlines() == []
    with open(logfile + ".log.details") as logf:
        assert logf.readlines() == []
    with open(logfile + ".log.err", "r") as logf:
        assert logf.readlines() == []
Exemple #22
0
def test_run_all_prodigal_train_exists_ok():
    """
    Check that when we want to train on a genome but it fails, it returns False for all genomes
    Here, it fails because genome to train on is too small
    """
    logger = my_logger("test_run_prodigal_train_exist_error")
    utils.init_logger(LOGFILE_BASE, 0, 'test_run_prodigal_train_exist_error')
    # genomes = {genome: [name, gpath, annot_path, size, nbcont, l90]}
    genome1 = "toto.fasta"
    gpath1 = os.path.join(GEN_PATH, genome1)
    genome2 = "A_H738.fasta"
    gpath2 = os.path.join(GEN_PATH, genome2)
    genomes = {
        genome1: ["test_runall_1by1_1", gpath1, gpath1, 12656, 3, 0],
        genome2: ["test_runall_1by1_2", gpath2, gpath2, 456464645, 1, 465]
    }
    threads = 8
    force = False
    trn_gname = genome1
    # Copy trn file to outdir, so that panacota detects that it already exists
    orig_trn_file = os.path.join(TEST_DIR, "A_H738-and-B2_A3_5.fna.trn")
    trn_file = os.path.join(GENEPATH, "toto.fasta.trn")
    shutil.copyfile(orig_trn_file, trn_file)
    # Run annotation all
    final = afunc.run_annotation_all(genomes,
                                     threads,
                                     force,
                                     GENEPATH,
                                     trn_gname,
                                     prodigal_only=True,
                                     quiet=False)
    assert not final[genome1]
    assert final[genome2]
    q = logger[0]
    assert q.qsize() == 9
    assert q.get().message == "Annotating all genomes with prodigal"
    assert q.get().message == ("Prodigal will train using "
                               "test/data/annotate/genomes/toto.fasta")
    assert q.get().message == (
        "A training file already exists (test/data/annotate/"
        "generated_by_unit-tests/toto.fasta.trn). It will be used "
        "to annotate all genomes.")
    # Check that all messages exist. We cannot know in which order,
    # as 'genomes' is a dict, hence unordered, and as computation is done in parallel
    messages = []
    for i in range(6):
        a = q.get().message
        messages.append(a)
    # Check start annotation messages
    message_start_annot1 = (
        "Start annotating test_runall_1by1_1 "
        "(from test/data/annotate/genomes/toto.fasta sequence) "
        "with Prodigal")
    message_start_annot2 = (
        "Start annotating test_runall_1by1_2 "
        "(from test/data/annotate/genomes/A_H738.fasta sequence) "
        "with Prodigal")
    assert message_start_annot1 in messages
    assert message_start_annot2 in messages
    # Prodigal cmd
    message_cmd1 = (
        "Prodigal command: prodigal -i test/data/annotate/genomes/toto.fasta "
        "-d test/data/annotate/generated_by_unit-tests/toto.fasta-prodigalRes/"
        "test_runall_1by1_1.ffn -a test/data/annotate/generated_by_unit-tests/"
        "toto.fasta-prodigalRes/test_runall_1by1_1.faa -f gff "
        "-o test/data/annotate/generated_by_unit-tests/"
        "toto.fasta-prodigalRes/test_runall_1by1_1.gff -t "
        "test/data/annotate/generated_by_unit-tests/toto.fasta.trn -q")
    message_cmd2 = (
        "Prodigal command: prodigal -i test/data/annotate/genomes/A_H738.fasta "
        "-d test/data/annotate/generated_by_unit-tests/A_H738.fasta-prodigalRes/"
        "test_runall_1by1_2.ffn -a test/data/annotate/generated_by_unit-tests/"
        "A_H738.fasta-prodigalRes/test_runall_1by1_2.faa -f gff "
        "-o test/data/annotate/generated_by_unit-tests/A_H738.fasta-prodigalRes/"
        "test_runall_1by1_2.gff -t "
        "test/data/annotate/generated_by_unit-tests/toto.fasta.trn -q")
    assert message_cmd1 in messages
    assert message_cmd2 in messages
    message_end_annot1 = ("Error while trying to run prodigal. See "
                          "test/data/annotate/generated_by_unit-tests/"
                          "toto.fasta-prodigal.log.err.")
    message_end_annot2 = (
        "End annotating test_runall_1by1_2 (from test/data/annotate/genomes/"
        "A_H738.fasta)")
    assert message_end_annot1 in messages
    assert message_end_annot2 in messages
Exemple #23
0
def main(cmd, ncbi_species_name, ncbi_species_taxid, ncbi_taxid, ncbi_strains,
         levels, ncbi_section, outdir, tmp_dir, threads, norefseq, db_dir,
         only_mash, info_file, l90, nbcont, cutn, min_dist, max_dist, verbose,
         quiet):
    """
    Main method, constructing the draft dataset for the given species

    verbosity:
    - defaut 0 : stdout contains INFO, stderr contains ERROR, .log contains INFO and more, .log.err contains warning and more
    - 1: same as 0 + WARNING in stderr
    - 2: same as 1 + DETAILS in stdout + DETAILS in .log.details
    - >=15: same as 2 + Add DEBUG in stdout + create .log.debug with everything from info to debug


    Parameters
    ----------
    cmd : str
        command line used to launch this program
    ncbi_species_name : str
        name of species to download, as given by NCBI
    ncbi_species_taxid : int
        species taxid given in NCBI
    ncbi_taxid : int
        NCBI taxid (sub-species)
    ncbi_strains : str
        specific strains to download
    levels: str
        Level of assembly to download. Choice between 'all', 'complete', 'chromosome',
        'scaffold', 'contig'. Default is 'all'
    outdir : str
        path to output directory (where created database will be saved).
    tmp_dir : str
        Path to directory where tmp files are saved (sequences split at each row of 5 'N')
    threads : int
        max number of threads to use
    norefseq : bool
        True if user does not want to download again the database
    db_dir : str
        Name of the folder where already downloaded fasta files are saved.
    only_mash : bool
        True if user user already has the database and quality of each genome (L90, #contigs etc.)
    info_file : str
        File containing information on QC if it was already ran before (columns to_annotate,
        gsize, nb_conts and L90).
    l90 : int
        Max L90 allowed to keep a genome
    nbcont : int
        Max number of contigs allowed to keep a genome
    cutn : int
        cut at each when there are 'cutn' N in a row. Don't cut if equal to 0
    min_dist : int
        lower limit of distance between 2 genomes to keep them
    max_dist : int
        upper limit of distance between 2 genomes to keep them (default is 0.06)
    verbose : int
        verbosity:
        - defaut 0 : stdout contains INFO, stderr contains ERROR, .log contains INFO and more,
          .log.err contains warning and more
        - 1: same as 0 + WARNING in stderr
        - 2: same as 1 + DETAILS in stdout + DETAILS in .log.details
        - >=15: same as 2 + Add DEBUG in stdout + create .log.debug with everything
          from info to debug
    quiet : bool
        True if nothing must be sent to stdout/stderr, False otherwise
    """

    # get species name in NCBI format
    # -> will be used to name output directory
    # -> will be used to download summary file if given species corresponds to NCBI name
    if ncbi_species_name:
        species_linked = "_".join(ncbi_species_name.split())
        species_linked = "_".join(species_linked.split("/"))

    # if species name not given by user, use species taxID (if given) to name output directory
    elif ncbi_species_taxid:
        species_linked = str(ncbi_species_taxid)
    # if species name not species taxid by user, use taxID (if given) to name output directory
    elif ncbi_taxid:
        species_linked = str(ncbi_taxid)
    # If no species nor taxID, get specific strain names
    elif ncbi_strains:
        if os.path.isfile(ncbi_strains):
            species_linked = os.path.basename(ncbi_strains)
            species_linked = os.path.splitext(species_linked)[0]
        else:
            species_linked = "_".join(ncbi_strains.split())
            species_linked = "-".join(species_linked.split("/"))
            species_linked = "_and_".join(species_linked.split(","))
    # if neither speName, speID, taxID nor strainName given (--norefseq, mashonly), name is NA
    else:
        species_linked = "NA"
    # Default outdir is species name if given, or species taxID
    if not outdir:
        outdir = species_linked
    # Default tmp_dir is outdir/tmp_files
    if not tmp_dir:
        tmp_dir = os.path.join(outdir, "tmp_files")
    # directory that will be created by ncbi_genome_download
    ncbidir = os.path.join(outdir, ncbi_section, "bacteria")
    os.makedirs(outdir, exist_ok=True)
    os.makedirs(tmp_dir, exist_ok=True)

    # Initialize logger
    # set level of logger: level is the minimum level that will be considered.
    if verbose <= 1:
        level = logging.INFO
    # for verbose = 2, ignore only debug
    if verbose >= 2 and verbose < 15:
        level = utils.detail_lvl()  # int corresponding to detail level
    # for verbose >= 15, write everything
    if verbose >= 15:
        level = logging.DEBUG
    logfile_base = os.path.join(outdir,
                                "PanACoTA_prepare_{}").format(species_linked)
    logfile_base, logger = utils.init_logger(logfile_base,
                                             level,
                                             'prepare',
                                             log_details=True,
                                             verbose=verbose,
                                             quiet=quiet)

    # Message on what will be done (cmd, cores used)
    logger.info(f'PanACoTA version {version}')
    logger.info("Command used\n \t > " + cmd)
    message = f"'PanACoTA prepare' will run on {threads} "
    message += f"cores" if threads > 1 else "core"
    logger.info(message)

    # Start prepare step
    # Run more than only mash filter (!only_mash):
    # - start from QC and mash (norefseq)
    # - start from genome download (!norefseq))
    if not only_mash:
        # Not only mash, so a new info file will be created. If the user still gave an info
        # file (he will be warned that it will be ignored), rename it with '.bak'
        # to avoid erasing it
        if info_file and os.path.isfile(info_file):
            os.rename(info_file, info_file + ".back")

        # 'norefseq = True" : Do not download genomes, just do QC and mash filter on given genomes
        # -> if not, error and exit
        if norefseq:
            logger.warning(f'You asked to skip {ncbi_section} downloads.')

            # -> if db_dir given, watch for sequences there. If does not exist, error and exit
            # (user gave a directory (even if it does not exist), so we won't look for
            # the sequences in other folders)
            if db_dir:
                if not os.path.exists(db_dir):
                    logger.error(
                        f"Database folder {db_dir} supposed to contain fasta "
                        "sequences does not "
                        "exist. Please give a valid folder, or leave the default "
                        "directory (no '-d' option).")
                    sys.exit(1)
            # -> If user did not give db_dir, genomes could be in
            # outdir/Database_init/<genome_name>.fna
            else:
                db_dir = os.path.join(outdir, "Database_init")
                # If it does not exist, check if default compressed files folder exists.
                if not os.path.exists(db_dir):
                    logger.warning(
                        f"Database folder {db_dir} supposed to contain fasta "
                        "sequences does not "
                        "exist. We will check if the download folder (with compressed "
                        "sequences) exists.")
                    # -> if not in database_init, genomes must be in
                    # outdir/refeq/bacteria/<genome_name>.fna.gz. In that case,
                    # uncompress and add them to Database_init
                    if not os.path.exists(ncbidir):
                        logger.error(
                            f"Folder {ncbidir} does not exist. You do not have any "
                            "genome to analyse. Possible reasons:\n"
                            "- if you want to rerun analysis in the same folder as "
                            "sequences were downloaded (my_outdir/Database_init or "
                            f"my_outdir/{ncbi_section}), make sure you have '-o my_outdir' "
                            "option\n"
                            "- if you want to rerun analysis and save them in a new "
                            "output folder called 'new_outdir', make sure you have "
                            "'-o new_outdir' option, "
                            "and you specified where the uncompressed sequences to "
                            "use are ('-d sequence_database_path'). ")
                        sys.exit(1)
                    # add genomes from refseq/bacteria folder to Database_init
                    nb_gen, _ = dgf.to_database(outdir, ncbi_section)
        # No sequence: Do all steps -> download, QC, mash filter
        else:
            # Download all genomes of the given taxID
            db_dir, nb_gen = dgf.download_from_ncbi(species_linked,
                                                    ncbi_section,
                                                    ncbi_species_name,
                                                    ncbi_species_taxid,
                                                    ncbi_taxid, ncbi_strains,
                                                    levels, outdir, threads)
            logger.info(f"{nb_gen} {ncbi_section} genome(s) downloaded")

        # Now that genomes are downloaded and uncompressed, check their quality to remove bad ones
        genomes = fg.check_quality(species_linked, db_dir, tmp_dir, l90,
                                   nbcont, cutn)

    # Do only mash filter. Genomes must be already downloaded, and there must be a file with
    # all information on these genomes (L90 etc.)
    else:
        logger.warning('You asked to run only mash steps.')
        if not os.path.exists(
                info_file):  # info-file missing -> error and exit
            logger.error(
                f"Your info file {info_file} does not exist. Please provide the  "
                "right name/path, or remove the '--mash-only option to rerun "
                "quality control.")
            sys.exit(1)
        logger.info(("You want to run only mash steps. Getting information "
                     "from {}").format(info_file))
        genomes = utils.read_genomes_info(
            info_file,
            species_linked,
        )

    # Run Mash
    # genomes : {genome_file: [genome_name, orig_name, path_to_seq_to_annotate, size, nbcont, l90]}
    # sorted_genome : [genome_file] ordered by L90/nbcont (keys of genomes)
    sorted_genomes = fg.sort_genomes_minhash(genomes, l90, nbcont)

    # Write discarded genomes to a file -> orig_name, to_annotate, gsize, nb_conts, L90
    discQC = f"by-L90_nbcont-{species_linked}.txt"
    utils.write_genomes_info(genomes, sorted_genomes, discQC, outdir)

    # Remove genomes not corresponding to mash filters
    removed = fg.iterative_mash(sorted_genomes, genomes, outdir,
                                species_linked, min_dist, max_dist, threads,
                                quiet)
    # Write list of genomes kept, and list of genomes discarded by mash step
    info_file = fg.write_outputfiles(genomes, sorted_genomes, removed, outdir,
                                     species_linked, min_dist, max_dist)
    logger.info("End")
    return info_file
Exemple #24
0
def test_run_prokka_out_exists_force():
    """
    Test that when the output directory already exists with wrong files, but force is on,
    prokka is rerun and outputs the right files
    """
    logger = my_logger("test_run_prokka_out_exists_force")
    utils.init_logger(LOGFILE_BASE, 0, 'force')
    gpath = os.path.join(GEN_PATH, "H299_H561.fasta")
    out_prokdir = os.path.join(GENEPATH, "H299_H561.fasta-prokkaRes")
    name = "test_runprokka_H299"
    # Put empty tbl, faa, ffn files in prokka output dir, to check that they are overridden
    os.makedirs(out_prokdir)
    open(os.path.join(out_prokdir, name + ".tbl"), "w").close()
    open(os.path.join(out_prokdir, name + ".faa"), "w").close()
    open(os.path.join(out_prokdir, name + ".ffn"), "w").close()
    cores_prokka = 2
    force = True
    nbcont = 3
    trn_file = "nofile.trn"
    arguments = (gpath, GENEPATH, cores_prokka, name, force, nbcont, trn_file,
                 logger[0])
    assert afunc.run_prokka(arguments)
    # As we used 'force', tbl, faa and ffn files, which were empty, must have been replaced
    # by the prokka output
    exp_dir = os.path.join(EXP_DIR,
                           "H299_H561.fasta-short-contig.fna-prokkaRes",
                           "test_runprokka_H299")
    out_tbl = os.path.join(out_prokdir, name + ".tbl")
    out_faa = os.path.join(out_prokdir, name + ".faa")
    out_ffn = os.path.join(out_prokdir, name + ".ffn")
    assert os.path.isfile(out_tbl)
    # For tbl file, check that, at least, the 3 contigs were considered,
    # and that the number of CDS is as expected.
    # Before, we checked that the output
    # was exactly as expected. But it changes with the different versions of prokka, so
    # we cannot compare the whole file.
    with open(out_tbl, "r") as outt:
        lines = [line.strip() for line in outt.readlines()]
        # Check that there are 3 contigs
        feature = 0
        for line in lines:
            if 'Feature' in line:
                feature += 1
        assert feature == 3
        # Check that there are 16 CDS
        CDS = 0
        for line in lines:
            if "CDS" in line:
                CDS += 1
        assert CDS == 16
    # Check that faa and ffn files are as expected
    assert os.path.isfile(out_faa)
    assert tutil.compare_order_content(exp_dir + ".faa", out_faa)
    assert os.path.isfile(out_ffn)
    assert tutil.compare_order_content(exp_dir + ".ffn", out_ffn)
    q = logger[0]
    assert q.qsize() == 4
    assert q.get().message.startswith(
        "Start annotating test_runprokka_H299 from test/data/"
        "annotate/genomes/H299_H561.fasta with Prokka")
    assert q.get().message == (
        "Prokka results folder already exists, but removed because "
        "--force option used")
    assert q.get().message == (
        "Prokka command: prokka "
        "--outdir test/data/annotate/generated_by_unit-tests/"
        "H299_H561.fasta-prokkaRes --cpus 2 --prefix test_runprokka_H299 "
        "--centre prokka test/data/annotate/genomes/H299_H561.fasta")
    assert q.get().message.startswith(
        "End annotating test_runprokka_H299 "
        "from test/data/annotate/genomes/H299_H561.fasta")
Exemple #25
0
def test_run_all_prodigal_outexists_error():
    """
    trn file already exists, and output folder too. No force option. Output folder is empty
    -> error message while checking prodigal
    """
    logger = my_logger("test_run_all_parallel_more_threads")
    utils.init_logger(LOGFILE_BASE, 0, 'test_run_all_parallel_more_threads')
    # genomes = {genome: [name, gpath, annot_path, size, nbcont, l90]}
    genome1 = "toto.fasta"
    genome2 = "A_H738.fasta"
    genomes = {
        genome1: ["test_runall_1by1_1", genome1, genome1, 12656, 3, 0],
        genome2: ["test_runall_1by1_2", genome2, genome2, 456464645, 1, 465]
    }
    # Create prodigal result directories
    prodigaldir_g1 = os.path.join(GENEPATH, "A_H738.fasta-prodigalRes")
    prodigaldir_g2 = os.path.join(GENEPATH, "toto.fasta-prodigalRes")
    os.makedirs(prodigaldir_g1)
    os.makedirs(prodigaldir_g2)
    # Other parameters
    threads = 1
    force = False
    # Add existing training file
    orig_trn_file = os.path.join(TEST_DIR, "A_H738-and-B2_A3_5.fna.trn")
    trn_file = os.path.join(GENEPATH, "toto.fasta.trn")
    shutil.copyfile(orig_trn_file, trn_file)
    trn_gname = genome1
    final = afunc.run_annotation_all(genomes,
                                     threads,
                                     force,
                                     GENEPATH,
                                     trn_gname,
                                     prodigal_only=True,
                                     quiet=False)
    assert not final[genome1]
    assert not final[genome2]
    q = logger[0]
    assert q.qsize() == 15
    assert q.get().message == "Annotating all genomes with prodigal"
    assert q.get().message == "Prodigal will train using toto.fasta"
    assert q.get().message == (
        "A training file already exists (test/data/annotate/"
        "generated_by_unit-tests/toto.fasta.trn). It will "
        "be used to annotate all genomes.")
    messages = []
    for i in range(12):
        a = q.get().message
        messages.append(a)
    message_start_annot1 = ("Start annotating test_runall_1by1_1 "
                            "(from toto.fasta sequence) with Prodigal")
    # Check that all messages exist. We cannot know in which order,
    # as 'genomes' is a dict, hence unordered, and as computation is done in parallel
    assert message_start_annot1 in messages
    # Prodigal cmd
    message_exists1 = (
        "Prodigal results folder test/data/annotate/generated_by_unit-tests/"
        "toto.fasta-prodigalRes already exists.")
    message_errorfaa = (
        "test_runall_1by1_1 toto.fasta: no or several .faa file(s)")
    message_errorffn = (
        "test_runall_1by1_1 toto.fasta: no or several .ffn file(s)")
    message_errorgff = (
        "test_runall_1by1_1 toto.fasta: no or several .gff file(s)")
    message_error1 = (
        "Problems in the files contained in your already existing output dir "
        "(test/data/annotate/generated_by_unit-tests/toto.fasta-prodigalRes). "
        "Please check it, or remove it to re-annotate.")
    assert message_exists1 in messages
    assert message_errorfaa in messages
    assert message_errorffn in messages
    assert message_errorgff in messages
    assert message_error1 in messages
    message_start_annot2 = ("Start annotating test_runall_1by1_2 "
                            "(from A_H738.fasta sequence) with Prodigal")
    assert message_start_annot2 in messages
    message_error_annot2 = (
        "Problems in the files contained in your already existing output dir "
        "(test/data/annotate/generated_by_unit-tests/A_H738.fasta-prodigalRes). "
        "Please check it, or remove it to re-annotate.")
    assert message_error_annot2 in messages
Exemple #26
0
def test_run_all_1by1_prokka():
    """
    Check that when running with 3 threads (not parallel), prokka runs as expected,
    and returns True for each genome
    -> Runs 1 by 1, with prokka using 3 cpus
    Start and end must be ordered: (start1, end1, start2, end2) or (start2, end2, start1, end1)
    """
    logger = my_logger("test_runall_1by1_1")
    utils.init_logger(LOGFILE_BASE, 0, 'test_run_all_1by1')
    # genomes = {genome: [name, gpath, size, nbcont, l90]}
    genome1 = "H299_H561.fasta"
    gpath1 = os.path.join(GEN_PATH, genome1)
    genome2 = "A_H738.fasta"
    gpath2 = os.path.join(GEN_PATH, genome2)
    genomes = {
        genome1: ["test_runall_1by1_1", gpath1, gpath1, 12656, 3, 0],
        genome2: ["test_runall_1by1_2", gpath2, gpath2, 456464645, 1, 465]
    }
    threads = 3
    force = False
    trn_file = "nofile.trn"
    annot_folder = os.path.join(GENEPATH, "annot-folder")
    os.makedirs(annot_folder)
    final = afunc.run_annotation_all(genomes, threads, force, annot_folder,
                                     trn_file)
    assert final[genome1]
    assert final[genome2]
    q = logger[0]
    assert q.qsize() == 7
    assert q.get().message == 'Annotating all genomes with prokka'
    # Messages for start and end annotation of the different genomes
    message_start_annot1 = (
        "Start annotating test_runall_1by1_1 test/data/annotate/genomes/"
        "H299_H561.fasta")
    message_cmd1 = (
        "Prokka command: prokka --outdir test/data/annotate/generated_by_unit-tests/"
        "annot-folder/H299_H561.fasta-prokkaRes --cpus 3")
    message_end_annot1 = (
        "End annotating test_runall_1by1_1 from test/data/annotate/genomes/"
        "H299_H561.fasta.")
    message_start_annot2 = (
        "Start annotating test_runall_1by1_2 test/data/annotate/genomes/"
        "A_H738.fasta")
    message_cmd2 = (
        "Prokka command: prokka --outdir test/data/annotate/generated_by_unit-tests/"
        "annot-folder/A_H738.fasta-prokkaRes --cpus 3")
    message_end_annot2 = (
        "End annotating test_runall_1by1_2 from test/data/annotate/genomes/"
        "A_H738.fasta.")
    qget = q.get().message
    # Check logs. Given that it is executed in parallel, we cannot know in which order messages
    # will appear
    assert qget == message_start_annot1 or message_start_annot2
    if qget == message_start_annot1:
        # Ending annotation of first genome (same genome as started because running 1by1)
        assert q.get().message.startswith(message_cmd1)
        assert q.get().message == message_end_annot1
    else:
        assert q.get().message.startswith(message_cmd2)
        assert q.get().message == message_end_annot2
    qget2 = q.get().message
    assert qget2 == message_start_annot1 or message_start_annot2
    if qget2 == message_start_annot2:
        # Ending annotation of first genome (same genome as started because running 1by1)
        assert q.get().message.startswith(message_cmd2)
        assert q.get().message == message_end_annot2
    else:
        assert q.get().message.startswith(message_cmd1)
        assert q.get().message == message_end_annot1
Exemple #27
0
def test_run_all_prokka_parallel_less_threads():
    """
    Check that there is no problem when running with less threads than genomes (each genomes
    uses 2 threads)
    Genomes H299 and A_H738 should run well, but genomes genome* have problems (no CDS found),
    so check_prokka should return false.
    """
    logger = my_logger("test_run_all_parallel_more_threads")
    utils.init_logger(LOGFILE_BASE, 0, 'test_run_all_4threads')
    # genomes = {genome: [name, gpath, size, nbcont, l90]}
    gnames = [
        "H299_H561.fasta", "A_H738.fasta", "genome1.fasta", "genome2.fasta",
        "genome3.fasta"
    ]
    gpaths = [os.path.join(GEN_PATH, name) for name in gnames]
    genomes = {
        gnames[0]: ["test_runall_1by1_1", gpaths[0], gpaths[0], 12656, 3, 1],
        gnames[1]:
        ["test_runall_1by1_2", gpaths[1], gpaths[1], 456464645, 1, 1],
        gnames[2]:
        ["test_runall_1by1_3", gpaths[2], gpaths[2], 456464645, 4, 1],
        gnames[3]:
        ["test_runall_1by1_4", gpaths[3], gpaths[3], 456464645, 3, 1],
        gnames[4]:
        ["test_runall_1by1_5", gpaths[4], gpaths[4], 456464645, 1, 1]
    }
    threads = 4
    force = False
    trn_file = "nofile.trn"
    final = afunc.run_annotation_all(genomes, threads, force, GENEPATH,
                                     trn_file)
    assert final[gnames[0]]
    assert final[gnames[1]]
    assert not final[gnames[2]]
    assert not final[gnames[3]]
    assert not final[gnames[4]]
    q = logger[0]
    # Check size of logs
    # -> starting log -> 1 log
    # -> for each genome ok (2 first ones): start annotate, prokka cmd, end annotate -> 6 logs
    # -> for each genome not ok (3 others):
    #           start annotate, prokka cmd, problem, end annotate -> 12 logs
    assert q.qsize() == 19
    assert q.get().message == "Annotating all genomes with prokka"
    # messages start annotation
    messages = []
    for i in range(18):
        a = q.get().message
        messages.append(a)
    message_start_annot1 = ("Start annotating test_runall_1by1_1 "
                            "from test/data/annotate/genomes/H299_H561.fasta "
                            "with Prokka")
    message_start_annot2 = ("Start annotating test_runall_1by1_2 "
                            "from test/data/annotate/genomes/A_H738.fasta "
                            "with Prokka")
    message_start_annot3 = ("Start annotating test_runall_1by1_4 "
                            "from test/data/annotate/genomes/genome2.fasta "
                            "with Prokka")
    # Check that all messages exist. We cannot know in which order,
    # as 'genomes' is a dict, hence unordered, and as computation is done in parallel
    assert message_start_annot1 in messages
    assert message_start_annot2 in messages
    assert message_start_annot3 in messages
    # messages Prokka cmd
    message_cmd1 = (
        "Prokka command: prokka --outdir test/data/annotate/generated_by_unit-tests/"
        "H299_H561.fasta-prokkaRes --cpus 2 --prefix test_runall_1by1_1 "
        "--centre prokka test/data/annotate/genomes/H299_H561.fasta")
    message_cmd2 = (
        "Prokka command: prokka --outdir test/data/annotate/generated_by_unit-tests/"
        "A_H738.fasta-prokkaRes --cpus 2 --prefix test_runall_1by1_2 "
        "--centre prokka test/data/annotate/genomes/A_H738.fasta")
    message_cmd3 = (
        "Prokka command: prokka --outdir test/data/annotate/generated_by_unit-tests/"
        "genome1.fasta-prokkaRes --cpus 2 --prefix test_runall_1by1_3 "
        "--centre prokka test/data/annotate/genomes/genome1.fasta")
    assert message_cmd1 in messages
    assert message_cmd2 in messages
    assert message_cmd3 in messages
    # Messages end annotation cmd
    message_end1 = ("End annotating test_runall_1by1_1 from "
                    "test/data/annotate/genomes/H299_H561.fasta.")
    message_end2 = ("End annotating test_runall_1by1_3 from "
                    "test/data/annotate/genomes/genome1.fasta.")
    message_end3 = ("End annotating test_runall_1by1_5 from "
                    "test/data/annotate/genomes/genome3.fasta.")
    assert message_end1 in messages
    assert message_end2 in messages
    assert message_end3 in messages
    # Messages error annotation cmd
    message_err1 = "test_runall_1by1_3 genome1.fasta: several .faa files"
    message_err2 = "test_runall_1by1_4 genome2.fasta: several .faa files"
    message_err3 = "test_runall_1by1_5 genome3.fasta: several .faa files"
    assert message_err1 in messages
    assert message_err2 in messages
    assert message_err3 in messages
Exemple #28
0
def main(cmd, pangenome, tol, multi, mixed, outputdir, lstinfo_file, floor,
         verbose, quiet):
    """
    Read pangenome and deduce Persistent genome according to the user criteria

    Parameters
    ----------
    pangenome : str
        file containing pangenome
    tol : float
        min % of genomes present in a family to consider it as persistent (between 0 and 1)
    multi : bool
        True if multigenic families are allowed, False otherwise
    mixed : bool
        True if mixed families are allowed, False otherwise
    outputdir : str or None
        Specific directory for the generated persistent genome. If not given, pangenome directory is used.
    lstinfo_file : str
        list of genomes to include in the core/persistent genome. If not given, include all genomes of pan
    floor : bool
        Require at least floor(nb_genomes*tol) genomes if True, ceil(nb_genomes*tol) if False
    verbose : int
        verbosity:
        - defaut 0 : stdout contains INFO, stderr contains ERROR.
        - 1: stdout contains INFO, stderr contains WARNING and ERROR
        - 2: stdout contains (DEBUG), DETAIL and INFO, stderr contains WARNING and ERROR
        - >=15: Add DEBUG in stdout
    quiet : bool
        True if nothing must be sent to stdout/stderr, False otherwise
    """
    # import needed packages
    import logging
    from PanACoTA import utils
    from PanACoTA import utils_pangenome as utilsp
    import PanACoTA.corepers_module.persistent_functions as pers
    from PanACoTA import __version__ as version

    # get pangenome name info
    _, base_pan = os.path.split(pangenome)
    if lstinfo_file:
        _, base_lst = os.path.split(lstinfo_file)
    else:
        base_lst = "all"
    # Define output filename
    output_name = f"PersGenome_{base_pan}-{base_lst}_"
    if floor:
        output_name += "F"
    output_name += str(tol)
    if multi:
        output_name += "-multi.lst"
    elif mixed:
        output_name += "-mixed.lst"
    else:
        output_name += ".lst"
    # Define output directory and filename path
    if not os.path.isdir(outputdir):
        os.makedirs(outputdir)
    outputfile = os.path.join(outputdir, output_name)
    logfile_base = os.path.join(outputdir, "PanACoTA-corepers")
    # level is the minimum level that will be considered.
    # for verbose = 0 or 1, ignore details and debug, start from info
    if verbose <= 1:
        level = logging.INFO
    # for verbose = 2, ignore only debug
    if verbose >= 2 and verbose < 15:
        level = 15  # int corresponding to detail level
    # for verbose >= 15, write everything
    if verbose >= 15:
        level = logging.DEBUG
    utils.init_logger(logfile_base,
                      level,
                      'corepers',
                      verbose=verbose,
                      quiet=quiet)
    logger = logging.getLogger("corepers")
    logger.info(f'PanACoTA version {version}')
    logger.info("Command used\n \t > " + cmd)

    logger.info(get_info(tol, multi, mixed, floor))

    # Read pangenome
    fams_by_strain, families, all_strains = utilsp.read_pangenome(
        pangenome, logger)
    # If list of genomes given, get subset of previous dicts, including only the genomes aksed
    if lstinfo_file:
        fams_by_strain, families, all_strains = pers.get_subset_genomes(
            fams_by_strain, families, lstinfo_file)
    # Generate persistent genome
    fams = pers.get_pers(fams_by_strain, families, len(all_strains), tol,
                         multi, mixed, floor)
    # Write persistent genome to file
    pers.write_persistent(fams, outputfile)
    logger.info("Persistent genome step done.")
    return outputfile
Exemple #29
0
def test_run_all_prodigal():
    """
    Check that there is no problem when running prodigal on all genomes
    Start and end are not necessarily in the same order (ex: start1, start2, end2, end1)
    """
    logger = my_logger("test_run_all_parallel_more_threads")
    utils.init_logger(LOGFILE_BASE, 0, 'test_run_all_parallel_more_threads')
    # genomes = {genome: [name, gpath, annot_path, size, nbcont, l90]}
    genome1 = "H299_H561.fasta"
    gpath1 = os.path.join(GEN_PATH, genome1)
    genome2 = "A_H738.fasta"
    gpath2 = os.path.join(GEN_PATH, genome2)
    genomes = {
        genome1: ["test_runall_1by1_1", gpath1, gpath1, 12656, 3, 0],
        genome2: ["test_runall_1by1_2", gpath2, gpath2, 456464645, 1, 465]
    }
    threads = 8
    force = False
    trn_gname = genome2
    final = afunc.run_annotation_all(genomes,
                                     threads,
                                     force,
                                     GENEPATH,
                                     trn_gname,
                                     prodigal_only=True,
                                     quiet=True)
    assert final[genome1]
    assert final[genome2]
    q = logger[0]
    assert q.qsize() == 10
    assert q.get().message == "Annotating all genomes with prodigal"
    assert q.get(
    ).message == "Prodigal will train using test/data/annotate/genomes/A_H738.fasta"
    assert q.get().message == (
        "prodigal command: prodigal -i "
        "test/data/annotate/genomes/A_H738.fasta -t "
        "test/data/annotate/generated_by_unit-tests/A_H738.fasta.trn")
    assert q.get(
    ).message == "End training on test/data/annotate/genomes/A_H738.fasta"
    messages = []
    for i in range(6):
        a = q.get().message
        messages.append(a)
    message_start_annot1 = (
        "Start annotating test_runall_1by1_1 "
        "(from test/data/annotate/genomes/H299_H561.fasta sequence) "
        "with Prodigal")
    message_start_annot2 = (
        "Start annotating test_runall_1by1_2 "
        "(from test/data/annotate/genomes/A_H738.fasta sequence) "
        "with Prodigal")
    # Check that all messages exist. We cannot know in which order,
    # as 'genomes' is a dict, hence unordered, and as computation is done in parallel
    assert message_start_annot1 in messages
    assert message_start_annot2 in messages
    # Prodigal cmd
    message_cmd1 = (
        "Prodigal command: prodigal -i test/data/annotate/genomes/H299_H561.fasta "
        "-d test/data/annotate/generated_by_unit-tests/H299_H561.fasta-prodigalRes/"
        "test_runall_1by1_1.ffn -a test/data/annotate/generated_by_unit-tests/"
        "H299_H561.fasta-prodigalRes/test_runall_1by1_1.faa -f gff "
        "-o test/data/annotate/generated_by_unit-tests/"
        "H299_H561.fasta-prodigalRes/test_runall_1by1_1.gff -t "
        "test/data/annotate/generated_by_unit-tests/A_H738.fasta.trn -q")
    message_cmd2 = (
        "Prodigal command: prodigal -i test/data/annotate/genomes/A_H738.fasta "
        "-d test/data/annotate/generated_by_unit-tests/A_H738.fasta-prodigalRes/"
        "test_runall_1by1_2.ffn -a test/data/annotate/generated_by_unit-tests/"
        "A_H738.fasta-prodigalRes/test_runall_1by1_2.faa -f gff "
        "-o test/data/annotate/generated_by_unit-tests/A_H738.fasta-prodigalRes/"
        "test_runall_1by1_2.gff -t "
        "test/data/annotate/generated_by_unit-tests/A_H738.fasta.trn -q")
    assert message_cmd1 in messages
    assert message_cmd2 in messages
    message_end_annot1 = (
        "End annotating test_runall_1by1_1 (from test/data/annotate/genomes/"
        "H299_H561.fasta)")
    message_end_annot2 = (
        "End annotating test_runall_1by1_2 (from test/data/annotate/genomes/"
        "A_H738.fasta)")
    assert message_end_annot1 in messages
    assert message_end_annot2 in messages
Exemple #30
0
def test_run_all_parallel_prokka_more_threads():
    """
    Check that there is no problem when running with more threads than genomes
    (6 threads and 2 genome: each genome uses 3 threads)
    Genomes H299 should run well but genome1.fasta should get an error
    """
    logger = my_logger("test_run_all_parallel_more_threads")
    utils.init_logger(LOGFILE_BASE, 0, 'test_run_all_4threads')
    # genomes = {genome: [name, gpath, size, nbcont, l90]}
    gnames = ["H299_H561.fasta", "genome1.fasta"]
    gpaths = [os.path.join(GEN_PATH, name) for name in gnames]
    genomes = {
        gnames[0]: ["test_runall_1by1_1", gpaths[0], gpaths[0], 12656, 3, 1],
        gnames[1]:
        ["test_runall_1by1_2", gpaths[1], gpaths[1], 456464645, 4, 1],
    }
    threads = 6
    force = False
    trn_file = "nofile.trn"
    final = afunc.run_annotation_all(genomes, threads, force, GENEPATH,
                                     trn_file)
    assert final[gnames[0]]
    assert not final[gnames[1]]
    q = logger[0]
    # Check size of logs
    # -> starting log -> 1 log
    # -> for genome ok : start annotate, prokka cmd, end annotate -> 3 logs
    # -> for genome not ok : start annotate, prokka cmd, problem, end annotate -> 4 logs
    assert q.qsize() == 8
    assert q.get().message == "Annotating all genomes with prokka"
    # messages start annotation
    messages = []
    for i in range(7):
        a = q.get().message
        messages.append(a)
    message_start_annot1 = ("Start annotating test_runall_1by1_1 "
                            "from test/data/annotate/genomes/H299_H561.fasta "
                            "with Prokka")
    message_start_annot2 = ("Start annotating test_runall_1by1_2 "
                            "from test/data/annotate/genomes/genome1.fasta "
                            "with Prokka")
    # Check that all messages exist. We cannot know in which order,
    # as 'genomes' is a dict, hence unordered, and as computation is done in parallel
    assert message_start_annot1 in messages
    assert message_start_annot2 in messages
    # messages Prokka cmd
    message_cmd1 = (
        "Prokka command: prokka --outdir test/data/annotate/generated_by_unit-tests/"
        "H299_H561.fasta-prokkaRes --cpus 3 --prefix test_runall_1by1_1 "
        "--centre prokka test/data/annotate/genomes/H299_H561.fasta")
    message_cmd2 = (
        "Prokka command: prokka --outdir test/data/annotate/generated_by_unit-tests/"
        "genome1.fasta-prokkaRes --cpus 3 --prefix test_runall_1by1_2 "
        "--centre prokka test/data/annotate/genomes/genome1.fasta")
    assert message_cmd1 in messages
    assert message_cmd2 in messages
    # Messages end annotation cmd
    message_end1 = ("End annotating test_runall_1by1_1 from "
                    "test/data/annotate/genomes/H299_H561.fasta.")
    message_end2 = ("End annotating test_runall_1by1_2 from "
                    "test/data/annotate/genomes/genome1.fasta.")
    assert message_end1 in messages
    assert message_end2 in messages
    # Messages error annotation cmd
    message_err1 = "test_runall_1by1_2 genome1.fasta: several .faa files"
    assert message_err1 in messages