예제 #1
0
def test_main_novalid_genome_frominfo(capsys):
    """
    Test that when, in the list file, all genomes are wrong (do not correspond to
    filenames in the given dbpath), it closes the program with an error message.
    """
    listfile = None
    dbpath = None
    name = "TOTO"
    date = "1205"
    infofile = os.path.join(TEST_DIR, "lstinfo-no-genome.lst")
    with pytest.raises(SystemExit):
        annot.main("cmd", listfile, dbpath, GENEPATH, name, date, from_info=infofile,
                   prodigal_only=True)
    out, err = capsys.readouterr()
    # Check logs
    assert ("No genome listed in test/data/annotate/test_files/lstinfo-no-genome.lst "
            "was found.") in err

    # Check output folders not created
    protdir = os.path.join(GENEPATH, "Proteins")
    assert not os.path.isdir(protdir)
    gffdir = os.path.join(GENEPATH, "gff3")
    assert not os.path.isdir(gffdir)
    lstdir = os.path.join(GENEPATH, "LSTINFO")
    assert not os.path.isdir(lstdir)

    # Check tmp_files is empty
    tmp = os.path.join(GENEPATH, "tmp_files")
    assert len(os.listdir(tmp)) == 0
예제 #2
0
def test_main_existing_prodigaldir_errorannot(capsys):
    """
    Test that, when the pipeline is run with a given prodigal dir, where prodigal results have
    problems for both genomes, it returns an error message and the genomes with
    problems are in skipped. Error message with no genome to format.
    """
    list_file = os.path.join(TEST_DIR, "list_genomes-func-test-exist_dir.txt")

    # Create directory with all files needed for the test
    genome_path_used = os.path.join(GENEPATH, "genomes")
    os.makedirs(genome_path_used)
    ori_genome1 = os.path.join(GEN_PATH, "B2_A3_5.fasta-changeName.fna")
    ori_prok_g1 = os.path.join(EXP_DIR, "B2_A3_5.fasta-changeName.fna-prodigalRes")
    used_genome1 = os.path.join(genome_path_used, "B2_A3_5.fasta-changeName.fna")
    used_prok_g1 = used_genome1 + "-prodigalRes"
    # Copy original fasta file to genepath/genomes
    shutil.copyfile(ori_genome1, used_genome1)
    # Copy prokka results to genepath/genomes/gname-prokkaRes
    shutil.copytree(ori_prok_g1, used_prok_g1)
    # Same thing for 2nd genome
    ori_genome2 = os.path.join(GEN_PATH, "H299_H561.fasta")
    ori_prok_g2 = os.path.join(EXP_DIR, "H299_H561.fasta-prodigalRes")
    used_genome2 = os.path.join(genome_path_used, "H299_H561.fasta")
    used_prok_g2 = used_genome2 + "-prodigalRes"
    shutil.copyfile(ori_genome2, used_genome2)
    shutil.copytree(ori_prok_g2, used_prok_g2)

    # Remove faa file for genome1, so that check_prodigal returns an error
    os.remove(os.path.join(used_prok_g1, "ESCO.1116.00002.faa"))
    # Remove gff file for genome1, so that check_prodigal returns an error
    os.remove(os.path.join(used_prok_g2, "ESCO.1015.00001.gff"))

    # Run annotation
    name = "ESCO"
    date = "0417"
    with pytest.raises(SystemExit):
        annot.main("cmd", list_file, genome_path_used, GENEPATH, name, date, cutn=0,
                   res_annot_dir=genome_path_used, prodigal_only=True, verbose=15)

    # Check that Replicons & co folders are not created
    prot_dir = os.path.join(GENEPATH, "Proteins")
    assert not os.path.isdir(prot_dir)
    rep_dir = os.path.join(GENEPATH, "Replicons")
    assert not os.path.isdir(rep_dir)

    # Check that not formatted because exists + error
    logfile = os.path.join(GENEPATH,
                           "PanACoTA-annotate_list_genomes-func-test-exist_dir.log.details")
    with open(logfile, "r") as lc:
        log_content = lc.readlines()
    assert ("Error: No genome was correctly annotated, "
            "no need to format them") in ' '.join(log_content)
    assert ("Prodigal results folder test/data/annotate/generated_by_func-tests/genomes/"
            "H299_H561.fasta-prodigalRes already exists.") in ' '.join(log_content)
    assert ("ESCO.1116.00002 B2_A3_5.fasta-changeName.fna: "
            "no or several .faa file(s)") in ' '.join(log_content)
    assert ("ESCO.1015.00001 H299_H561.fasta: "
            "no or several .gff file(s)") in ' '.join(log_content)
예제 #3
0
def test_run_exist_resdir(caplog):
    """
    Test that when the pipeline is called, with a given resdir which already contains
    results, the program ends, with an error message.
    """
    # Create output directory with a lst file in LSTINFO
    os.makedirs(os.path.join(GENEPATH, "Proteins"))
    open(os.path.join(GENEPATH, "Proteins", "toto.prt"), "w").close()
    with pytest.raises(SystemExit):
        annot.main("cmd", "list_file.lst", "path/db", GENEPATH, "toto", "0123")
    assert ("ERROR: Your output directory already has .prt files in the "
            "Proteins folder. Provide another result directory, or remove the "
            "files in this one.") in caplog.text
    # File was not removed
    assert os.path.isfile(os.path.join(GENEPATH, "Proteins", "toto.prt"))
예제 #4
0
def test_main_novalid_genome(capsys):
    """
    Test that when, in the list file, all genomes are wrong (do not correspond to
    filenames in the given dbpath), it closes the program with an error message.
    """
    list_file = os.path.join(TEST_DIR, "list_no_genome.txt")
    name = "ESCO"
    date = "0417"
    with pytest.raises(SystemExit):
        annot.main("cmd", list_file, GEN_PATH, GENEPATH, name, date)
    _, err = capsys.readouterr()
    assert ("We did not find any genome listed in test/data/annotate/test_files/"
            "list_no_genome.txt "
            "in the folder test/data/annotate/genomes. Please check your list to give valid "
            "genome names.") in err
예제 #5
0
def test_main_frominfo(capsys):
    """
    test that it runs well when giving an info file instead of list file + db etc.
    It does not re-calculate L90 and nbcont
    """
    listfile = None
    dbpath = None
    name = "TOTO"
    date = "1205"
    infofile = os.path.join(TEST_DIR, "lstinfo.lst")
    out_infofile = os.path.join(GENEPATH, "LSTINFO-lstinfo.lst")
    assert annot.main("cmd", listfile, dbpath, GENEPATH, name, date, from_info=infofile,
                      prodigal_only=True) == (out_infofile, 3)
    out, err = capsys.readouterr()
    # Check logs
    assert ("Generating distribution of L90 and #contigs graphs.") in out

    # Check output files present
    protdir = os.path.join(GENEPATH, "Proteins")
    assert len(os.listdir(protdir)) == 3
    gffdir = os.path.join(GENEPATH, "gff3")
    assert len(os.listdir(gffdir)) == 3
    lstdir = os.path.join(GENEPATH, "LSTINFO")
    assert len(os.listdir(lstdir)) == 3

    # Check genomes are renamed as expected, and with expected L90/nbcont values
    exp_lstinfo = os.path.join(EXP_DIR, "exp_LSTINFO-test-main-frominfo.lst")
    res_lstinfo = os.path.join(GENEPATH, "LSTINFO-lstinfo.lst")
    assert tutil.compare_order_content(exp_lstinfo, res_lstinfo)
예제 #6
0
def test_main_onexistingprodigaldir_train_exists(capsys):
    """
    Test that, when the pipeline is run with a given prodigal dir, where prodigal results already
    exist, and are ok, all runs well, no re-annotation, just format

    - no train
    - no reannote
    - format

    2 genomes in list file: B2_A3_5.fasta-changeName.fna and H299_H561.fasta
    """
    # FOLDER with all results
    # Create result folder, with existing prodigal folders (which are OK)
    res_folder = os.path.join(GENEPATH, "results-prodigal")
    os.makedirs(res_folder)
    # copy prodigalRes folders
    B2_A3_5_folder = os.path.join(EXP_DIR, "B2_A3_5.fasta-changeName.fna-prodigalRes")
    H299_folder = os.path.join(EXP_DIR, "H299_H561.fasta-prodigalRes")
    res_B2_A3_5_folder = os.path.join(res_folder, "B2_A3_5.fasta-changeName.fna-prodigalRes")
    res_H299_folder = os.path.join(res_folder, "H299_H561.fasta-prodigalRes")
    shutil.copytree(B2_A3_5_folder, res_B2_A3_5_folder)
    shutil.copytree(H299_folder, res_H299_folder)
    # Add a training file in result folder
    trn_file = os.path.join(res_folder, "H299_H561.fasta.trn")
    open(trn_file, "w").close()

    # Function arguments
    list_file = os.path.join(TEST_DIR, "list_genomes-func-test-exist_dir.txt")
    name = "ESCO"
    date = "0417"
    lstout = os.path.join(GENEPATH, "LSTINFO-list_genomes-func-test-exist_dir.lst")
    lstexp = os.path.join(EXP_DIR, "exp_LSTINFO-func-annot_exists-prokkadir.lst")
    assert annot.main("cmd", list_file, GEN_PATH, GENEPATH, name, date, cutn=0,
                      res_annot_dir=res_folder, verbose=3, prodigal_only=True) == (lstout, 2)
    out, err = capsys.readouterr()
    # Check that tmp files folder is empty (prokka res are somewhere else)
    assert len(os.listdir(os.path.join(GENEPATH, "tmp_files"))) == 0
    # Test that result files are in result dir
    assert os.path.isfile(lstout)
    assert tutil.compare_order_content(lstout, lstexp)
    logfile = os.path.join(GENEPATH,
                           "PanACoTA-annotate_list_genomes-func-test-exist_dir.log.details")
    with open(logfile, "r") as lc:
        log_content = lc.readlines()
    assert ("A training file already exists (test/data/annotate/generated_by_func-tests/"
            "results-prodigal/H299_H561.fasta.trn). It will be used to annotate "
            "all genomes.") in " ".join(log_content)
    assert ("Prodigal results folder test/data/annotate/generated_by_func-tests/"
            "results-prodigal/H299_H561.fasta-prodigalRes "
            "already exists") in " ".join(log_content)
    assert ("Prodigal results folder test/data/annotate/generated_by_func-tests/"
            "results-prodigal/B2_A3_5.fasta-changeName.fna-prodigalRes "
            "already exists") in " ".join(log_content)
    assert ("Prodigal did not run again. Formatting step will use already generated results of "
            "Prodigal in test/data/annotate/generated_by_func-tests/results-prodigal/"
            "H299_H561.fasta-prodigalRes. "
            "If you want to re-run Prodigal, first remove this result folder, or use '-F' or "
            "'--force' option.") in ' '.join(log_content)
    assert "Formatting all genomes" in " ".join(log_content)
    assert "Annotation step done" in " ".join(log_content)
예제 #7
0
def test_main_qc():
    """
    Test that when only QC is run, it writes:
    - the list of all genomes with their characteristics
    - the list of genomes that would be discarded for annotation
    - the 2 png files
    """
    list_file = os.path.join(TEST_DIR, "list_genomes-func-test-default.txt")
    name = "ESCO"
    cutn = 0
    threads = 1
    l90 = 1
    date = "0417"
    force = False
    qc_only = True
    assert annot.main("cmd", list_file, GEN_PATH, GENEPATH, name, date, l90=l90,
                      cutn=cutn, qc_only=qc_only) == ("", 0)
    # Check files are here
    lstfile = os.path.join(GENEPATH, "ALL-GENOMES-info-list_genomes-func-test-default.lst")
    exp_lstfile = os.path.join(EXP_DIR, "exp_ALL-GENOMES-QC.lst")
    discardedfile = os.path.join(GENEPATH, "discarded-list_genomes-func-test-default.lst")
    exp_discarded = os.path.join(EXP_DIR, "exp_discarded_QC.lst")
    assert os.path.isfile(lstfile)
    assert os.path.isfile(discardedfile)
    assert os.path.isfile(os.path.join(GENEPATH,
                          "QC_L90-list_genomes-func-test-default.png"))
    assert os.path.isfile(os.path.join(GENEPATH,
                          "QC_nb-contigs-list_genomes-func-test-default.png"))
    # Check content of discarded genomes
    assert tutil.compare_file_content(lstfile, exp_lstfile)
    assert tutil.compare_file_content(discardedfile, exp_discarded)
예제 #8
0
def test_main_existing_prokkadir_errorannot():
    """
    Test that, when the pipeline is run with a given prokka dir, where prokka results have
    problems (no tbl file and no gff file), it returns an error message and the genome with
    problems is in skipped.
    """
    list_file = os.path.join(TEST_DIR, "list_genomes-func-test-exist_dir.txt")

    # Create directory with all files needed for the test
    genome_path_used = os.path.join(GENEPATH, "genomes")
    os.makedirs(genome_path_used)
    ori_genome1 = os.path.join(GEN_PATH, "B2_A3_5.fasta-changeName.fna")
    ori_prok_g1 = os.path.join(EXP_DIR, "B2_A3_5.fasta-changeName.fna-prokkaRes")
    used_genome1 = os.path.join(genome_path_used, "B2_A3_5.fasta-changeName.fna")
    used_prok_g1 = used_genome1 + "-prokkaRes"
    # Copy original fasta file to genepath/genomes
    shutil.copyfile(ori_genome1, used_genome1)
    # Copy prokka results to genepath/genomes/gname-prokkaRes
    shutil.copytree(ori_prok_g1, used_prok_g1)
    # Same think for 2nd genome
    ori_genome2 = os.path.join(GEN_PATH, "H299_H561.fasta")
    ori_prok_g2 = os.path.join(EXP_DIR, "H299_H561.fasta-prokkaRes")
    used_genome2 = os.path.join(genome_path_used, "H299_H561.fasta")
    used_prok_g2 = used_genome2 + "-prokkaRes"
    shutil.copyfile(ori_genome2, used_genome2)
    shutil.copytree(ori_prok_g2, used_prok_g2)

    # Remove tbl file for genome1, so that check_prokka returns an error
    os.remove(os.path.join(used_prok_g1, "ESCO.1116.00002.tbl"))

    # # Run annotation
    name = "ESCO"
    date = "0417"
    info_file = os.path.join(GENEPATH, "LSTINFO-list_genomes-func-test-exist_dir.lst")
    assert annot.main("cmd", list_file, genome_path_used, GENEPATH, name, date, cutn=0,
                      res_annot_dir=genome_path_used) == (info_file, 1)

    # Check that only 1 genome was formated (the other one had problems with prokka)
    prot_dir = os.path.join(GENEPATH, "Proteins")
    assert len(os.listdir(prot_dir)) == 1
    rep_dir = os.path.join(GENEPATH, "Replicons")
    assert len(os.listdir(rep_dir)) == 1

    # Check that the genome formated was not re-annotated
    logfile = os.path.join(GENEPATH,
                           "PanACoTA-annotate_list_genomes-func-test-exist_dir.log.details")
    with open(logfile, "r") as lf:
        log_content = lf.readlines()
    assert ("Prokka results folder "
            "test/data/annotate/generated_by_func-tests/genomes/H299_H561.fasta-prokkaRes "
            "already exists") in " ".join(log_content)
    # Check that genome not formated because error in prokka res
    assert ("ESCO.1116.00002 B2_A3_5.fasta-changeName.fna: no .tbl file") in " ".join(log_content)
    assert ("Problems in the files contained in your already existing output dir "
            "(test/data/annotate/generated_by_func-tests/genomes/"
            "B2_A3_5.fasta-changeName.fna-prokkaRes). Please check it, "
            "or remove it to re-annotate.") in ' '.join(log_content)
예제 #9
0
def test_main_given_tmp_verbose3(capsys):
    """
    Test that when a tmp folder is given by user, tmp files are saved in it,
    and prokka files too.
    + check that, with verbose=3, warning and details are written to stdout

    Giving 4 genomes in list_files
    - for 1 genome, toto.fst does not exist, and will not be in the concatenated file
    - 2 concatenated files
    - 4 files to annotate
    - 4 prokkaRes
    - 1 genome with problems: no CDS found
    - 3 genomes in result dirs
    """
    list_file = os.path.join(TEST_DIR, "list_genomes-func-test-default.txt")
    tmpdir = os.path.join(GENEPATH, "tmp_funcGivenTmp")
    name = "ESCO"
    l90 = 10
    date = "0417"
    verbose = 3
    info_file = os.path.join(GENEPATH, "LSTINFO-list_genomes-func-test-default.lst")
    assert annot.main("cmd", list_file, GEN_PATH, GENEPATH, name, date, l90,
                      cutn=3, tmp_dir=tmpdir, verbose=verbose) == (info_file, 3)
    out, err = capsys.readouterr()
    # Check that warnings are written to stderr
    assert "WARNING" in err
    assert ("toto.fst genome file does not exist. Its file will be ignored when "
            "concatenating ['A_H738.fasta', 'genome1.fasta', 'toto.fst']") in err
    # Check that tmp files exist in the right folder
    # -> 2 fna files created (concatenations)
    # -> + 3 files created (split 5N)
    assert os.path.isfile(os.path.join(tmpdir, "A_H738.fasta-all.fna"))
    assert os.path.isfile(os.path.join(tmpdir, "H299_H561.fasta-all.fna"))
    assert len(glob.glob(os.path.join(tmpdir, '*.fna'))) == 6
    assert len(glob.glob(os.path.join(tmpdir, '*split3N.fna'))) == 4
    # Check that split contigs were renamed with unique ID at the begining of the header
    res_file = os.path.join(tmpdir, "A_H738.fasta-all.fna_prokka-split3N.fna")
    exp_file = os.path.join(EXP_DIR, "exp_A_H738.fasta-all.fna_prokka-split3N.fna")
    assert tutil.compare_order_content(exp_file, res_file)
    # Check that even for complete genome, contig was renamed with ID
    res_file = os.path.join(tmpdir, "complete_genome.fna_prokka-split3N.fna")
    exp_file = os.path.join(EXP_DIR, "exp_complete_genome.fna_prokka-split3N.fna")
    assert tutil.compare_order_content(exp_file, res_file)
    # Test that prokka folder is in the right directory
    # Only 1 genome annotated by  prokka (the 2 others do not have appropriate L90/nbcont)
    assert os.path.isdir(os.path.join(tmpdir, "A_H738.fasta-all.fna_prokka-split3N.fna-prokkaRes"))
    assert not os.path.isdir(os.path.join(tmpdir, "H299_H561.fasta-all.fna-prokkaRes"))
예제 #10
0
def test_main_all_discard_nbcont(capsys):
    """
    Test that when the genomes given in list file have high nbcontigs compared
    to given threshold, error message as there are no genome to annotate
    """
    list_file = os.path.join(TEST_DIR, "list_genomes-func-test-default.txt")
    name = "ESCO"
    nbcont = 0
    cutn = 0
    date = "0417"
    assert annot.main("cmd", list_file, GEN_PATH, GENEPATH, name, date, nbcont=nbcont,
                      cutn=cutn) == ("", 0)
    # check that there are the 2 concatenated genomes in tmppath.
    # The third genome is listfile is composed of only 1 file, so no need to concatenate, nor
    # to change the file as we do not use cutn
    assert os.path.isfile(os.path.join(GENEPATH, "tmp_files", "H299_H561.fasta-all.fna"))
    assert os.path.isfile(os.path.join(GENEPATH, "tmp_files", "A_H738.fasta-all.fna"))
    out, err = capsys.readouterr()
    assert 'No genome kept for annotation' in out
예제 #11
0
def test_main_onexistingprokkadir(capsys):
    """
    Test that, when the pipeline is run with a given prokka dir, where prokka results already
    exist, and are ok, all runs well, no re-annotation, just format


    main function arguments:
    cmd, list_file, db_path, res_dir, name, date, l90=100, nbcont=999, cutn=5,
    threads=1, force=False, qc_only=False, from_info=None, tmp_dir=None, res_annot_dir=None,
    verbose=0, quiet=False, prodigal_only=False):

    """
    list_file = os.path.join(TEST_DIR, "list_genomes-func-test-exist_dir.txt")
    name = "ESCO"
    date = "0417"
    lstout = os.path.join(GENEPATH, "LSTINFO-list_genomes-func-test-exist_dir.lst")
    lstexp = os.path.join(EXP_DIR, "exp_LSTINFO-func-annot_exists-prokkadir.lst")
    assert annot.main("cmd", list_file, GEN_PATH, GENEPATH, name, date, cutn=0,
                      res_annot_dir=EXP_DIR, verbose=3) == (lstout, 2)
    out, err = capsys.readouterr()
    # Check that tmp files folder is empty (prokka res are somewhere else)
    assert len(os.listdir(os.path.join(GENEPATH, "tmp_files"))) == 0
    # Test that result files are in result dir
    assert os.path.isfile(lstout)
    assert tutil.compare_order_content(lstout, lstexp)
    logfile = os.path.join(GENEPATH,
                           "PanACoTA-annotate_list_genomes-func-test-exist_dir.log.details")
    with open(logfile, "r") as lc:
        log_content = lc.readlines()
    assert ("Prokka results folder "
            "test/data/annotate/exp_files/B2_A3_5.fasta-changeName.fna-prokkaRes "
            "already exists") in " ".join(log_content)
    assert ("Prokka did not run again, formatting step used already generated results of Prokka "
            "in test/data/annotate/exp_files/H299_H561.fasta-prokkaRes. "
            "If you want to re-run prokka, first remove this result folder, or use '-F' or "
            "'--force' option if you want to rerun prokka "
            "for all genomes.") in ' '.join(log_content)
예제 #12
0
def test_main_existing_prokkadir_errorformat():
    """
    Test that, when the pipeline is run with a given prokka dir, where prokka results are ok
    (all expected files), but have problems inside (wrong header format), it returns an error
    message and the genome with problems is in skipped_format.
    """
    list_file = os.path.join(TEST_DIR, "list_genomes-func-test-exist_dir.txt")

    # Create directory with all files needed for the test
    genome_path_used = os.path.join(GENEPATH, "genomes")
    os.makedirs(genome_path_used)
    ori_genome1 = os.path.join(GEN_PATH, "B2_A3_5.fasta-changeName.fna")
    # orig prokka dir has a tbl file with wrong format
    ori_prok_g1 = os.path.join(EXP_DIR, "B2_A3_5.fasta-changeName.fna-short-contig.fna-prokkaRes")
    used_genome1 = os.path.join(genome_path_used, "B2_A3_5.fasta-changeName.fna")
    used_prok_g1 = used_genome1 + "-prokkaRes"
    # Copy original fasta file to genepath/genomes
    shutil.copyfile(ori_genome1, used_genome1)
    # Copy prokka results to genepath/genomes/gname-prokkaRes
    shutil.copytree(ori_prok_g1, used_prok_g1)
    # and add .fna file to prokka-dir
    used_fna1 = os.path.join(used_prok_g1, "B2_A3_5.fasta-changeName.fna")
    shutil.copyfile(ori_genome1, used_fna1)

    # Same thing for 2nd genome
    ori_genome2 = os.path.join(GEN_PATH, "H299_H561.fasta")
    ori_prok_g2 = os.path.join(EXP_DIR, "H299_H561.fasta-prokkaRes")
    used_genome2 = os.path.join(genome_path_used, "H299_H561.fasta")
    used_prok_g2 = used_genome2 + "-prokkaRes"
    # Copy original fasta file to tmp resdir
    shutil.copyfile(ori_genome2, used_genome2)
    # Copy folder with prokka result files to genepath result path
    shutil.copytree(ori_prok_g2, used_prok_g2)
    # and add .fna file to prokka-dir
    used_fna = os.path.join(used_prok_g2, "H299_H561.fasta")
    shutil.copyfile(ori_genome2, used_fna)

    # Run annotation
    name = "ESCO"
    date = "0417"
    info_file = os.path.join(GENEPATH, "LSTINFO-list_genomes-func-test-exist_dir.lst")
    assert annot.main("cmd", list_file, genome_path_used, GENEPATH, name, date, cutn=0,
               res_annot_dir=genome_path_used) == (info_file, 1)

    # Check that genome 1 is not formatted, while no error with prokka
    logfile = os.path.join(GENEPATH,
                           "PanACoTA-annotate_list_genomes-func-test-exist_dir.log.details")
    with open(logfile, "r") as lc:
        log_content = lc.readlines()
    assert ("Prokka results folder test/data/annotate/generated_by_func-tests/genomes/"
            "B2_A3_5.fasta-changeName.fna-prokkaRes already exists.") in ' '.join(log_content)
    assert ("Prokka did not run again, formatting step used already generated "
            "results of Prokka in test/data/annotate/generated_by_func-tests/genomes/"
            "B2_A3_5.fasta-changeName.fna-prokkaRes.") in ' '.join(log_content)
    # Error while trying to format:
    assert ("'changesHead.0417.00010.0005' found in "
            "test/data/annotate/generated_by_func-tests/genomes/"
            "B2_A3_5.fasta-changeName.fna-prokkaRes/test.0417.00002.tbl "
            "does not exist in test/data/annotate/generated_by_func-tests/genomes/"
            "B2_A3_5.fasta-changeName.fna") in ' '.join(log_content)
    assert ("Problems while generating LSTINFO file for ESCO.1116.00002") in ' '.join(log_content)
예제 #13
0
def test_main_prodigal_small_ok(capsys):
    """
    Test that, when the pipeline is run with a given prodigal dir, and --small option, it does:

    - no train
    - reannotate
    - format

    2 genomes in list file: B2_A3_5.fasta-changeName.fna and H299_H561.fasta
    """
    # FOLDER with all results
    # Create result folder, with existing prodigal folders (which are OK)
    res_folder = os.path.join(GENEPATH, "results-prodigal")
    os.makedirs(res_folder)

    # Function arguments
    list_file = os.path.join(GENEPATH, "list_genomes_prodigal_small.txt")
    with open(list_file, "w") as lf:
        lf.write("A_H738.fasta \n")
        lf.write("H299_H561.fasta::TOTO")
    name = "ESCO"
    date = "0417"
    lstout = os.path.join(GENEPATH, "LSTINFO-list_genomes_prodigal_small.lst")
    # lstexp = os.path.join(EXP_DIR, "exp_LSTINFO-func-annot_exists-prokkadir.lst")
    assert annot.main("cmd", list_file, GEN_PATH, GENEPATH, name, date, cutn=0,
                      res_annot_dir=res_folder, verbose=3, prodigal_only=True, small=True) == (lstout, 2)
    out, err = capsys.readouterr()
    # Test that result files are in result dir
    assert os.path.isfile(lstout)
    logfile = os.path.join(GENEPATH,
                           "PanACoTA-annotate_list_genomes_prodigal_small.log.details")
    with open(logfile, "r") as lc:
        log_content = lc.readlines()
    assert ("Start annotating ESCO.0417.00001 (from test/data/annotate/genomes/"
            "A_H738.fasta sequence) with Prodigal") in " ".join(log_content)
    assert ("Start annotating TOTO.0417.00001 (from test/data/annotate/genomes/"
            "H299_H561.fasta sequence) with Prodigal") in " ".join(log_content)
    assert ("Prodigal command: "
            "prodigal -i test/data/annotate/genomes/A_H738.fasta "
            "-d test/data/annotate/generated_by_func-tests/results-prodigal/"
            "A_H738.fasta-prodigalRes/ESCO.0417.00001.ffn "
            "-a test/data/annotate/generated_by_func-tests/results-prodigal/"
            "A_H738.fasta-prodigalRes/ESCO.0417.00001.faa "
            "-f gff -o test/data/annotate/generated_by_func-tests/results-prodigal/"
            "A_H738.fasta-prodigalRes/ESCO.0417.00001.gff "
            "-p meta -q") in " ".join(log_content)
    assert ("Prodigal command: "
            "prodigal -i test/data/annotate/genomes/H299_H561.fasta "
            "-d test/data/annotate/generated_by_func-tests/results-prodigal/"
            "H299_H561.fasta-prodigalRes/TOTO.0417.00001.ffn "
            "-a test/data/annotate/generated_by_func-tests/results-prodigal/"
            "H299_H561.fasta-prodigalRes/TOTO.0417.00001.faa "
            "-f gff -o test/data/annotate/generated_by_func-tests/results-prodigal/"
            "H299_H561.fasta-prodigalRes/TOTO.0417.00001.gff "
            "-p meta -q") in " ".join(log_content)
    assert ("End annotating ESCO.0417.00001 (from test/data/annotate/genomes/"
            "A_H738.fasta)") in " ".join(log_content)
    assert ("End annotating TOTO.0417.00001 (from test/data/annotate/genomes/"
            "H299_H561.fasta)") in " ".join(log_content)
    assert "Formatting all genomes" in " ".join(log_content)
    assert "Annotation step done" in " ".join(log_content)
예제 #14
0
def test_main_prodigal_train_empty(capsys):
    """
    Test that, when the pipeline is run with a given prodigal dir, where prodigal results do
    not exist, and the given trn file is empty
    -> error, with prodigal command

    - no train
    - try reannote but fails -> exits

    2 genomes in list file: B2_A3_5.fasta-changeName.fna and H299_H561.fasta
    """
    # FOLDER with all results
    # Create result folder, with empty trn file
    res_folder = os.path.join(GENEPATH, "results-prodigal")
    os.makedirs(res_folder)
    trn_file = os.path.join(res_folder, "H299_H561.fasta.trn")
    open(trn_file, "w").close()

    # Function arguments
    list_file = os.path.join(TEST_DIR, "list_genomes-func-test-exist_dir.txt")
    name = "ESCO"
    date = "0417"
    lstout = os.path.join(GENEPATH, "LSTINFO-list_genomes-func-test-exist_dir.lst")
    lstexp = os.path.join(EXP_DIR, "exp_LSTINFO-func-annot_exists-prokkadir.lst")
    with pytest.raises(SystemExit):
        annot.main("cmd", list_file, GEN_PATH, GENEPATH, name, date, cutn=0,
                   res_annot_dir=res_folder, verbose=3, prodigal_only=True)
    out, err = capsys.readouterr()
    # Test that result files are in result dir
    assert os.path.isfile(lstout)
    assert tutil.compare_order_content(lstout, lstexp)
    logfile = os.path.join(GENEPATH,
                           "PanACoTA-annotate_list_genomes-func-test-exist_dir.log.details")
    # Check logs
    with open(logfile, "r") as lc:
        log_content = lc.readlines()
    assert ("A training file already exists (test/data/annotate/generated_by_func-tests/"
            "results-prodigal/H299_H561.fasta.trn). It will be used to annotate "
            "all genomes.") in " ".join(log_content)
    assert ("Start annotating ESCO.1116.00002 (from test/data/annotate/genomes/"
            "B2_A3_5.fasta-changeName.fna sequence) with Prodigal") in " ".join(log_content)
    assert ("Start annotating ESCO.1015.00001 (from test/data/annotate/genomes/"
            "H299_H561.fasta sequence) with Prodigal") in " ".join(log_content)
    assert ("Prodigal command: "
            "prodigal -i test/data/annotate/genomes/B2_A3_5.fasta-changeName.fna "
            "-d test/data/annotate/generated_by_func-tests/results-prodigal/"
            "B2_A3_5.fasta-changeName.fna-prodigalRes/ESCO.1116.00002.ffn "
            "-a test/data/annotate/generated_by_func-tests/results-prodigal/"
            "B2_A3_5.fasta-changeName.fna-prodigalRes/ESCO.1116.00002.faa "
            "-f gff -o test/data/annotate/generated_by_func-tests/results-prodigal/"
            "B2_A3_5.fasta-changeName.fna-prodigalRes/ESCO.1116.00002.gff "
            "-t test/data/annotate/generated_by_func-tests/results-prodigal/H299_H561.fasta.trn "
            "-q") in " ".join(log_content)
    assert ("Prodigal command: "
            "prodigal -i test/data/annotate/genomes/H299_H561.fasta "
            "-d test/data/annotate/generated_by_func-tests/results-prodigal/"
            "H299_H561.fasta-prodigalRes/ESCO.1015.00001.ffn "
            "-a test/data/annotate/generated_by_func-tests/results-prodigal/"
            "H299_H561.fasta-prodigalRes/ESCO.1015.00001.faa "
            "-f gff -o test/data/annotate/generated_by_func-tests/results-prodigal/"
            "H299_H561.fasta-prodigalRes/ESCO.1015.00001.gff "
            "-t test/data/annotate/generated_by_func-tests/results-prodigal/H299_H561.fasta.trn "
            "-q") in " ".join(log_content)
    assert ("Error while trying to run prodigal. See test/data/annotate/generated_by_func-tests/"
            "results-prodigal/H299_H561.fasta-prodigal.log.err.") in " ".join(log_content)
    assert ("Error while trying to run prodigal. See test/data/annotate/generated_by_func-tests/"
            "results-prodigal/"
            "B2_A3_5.fasta-changeName.fna-prodigal.log.err") in " ".join(log_content)
    assert ("No genome was correctly annotated, no need to format them.") in " ".join(log_content)
예제 #15
0
def test_main_onexistingprodigaldir(capsys):
    """
    Test that, when the pipeline is run with a given prodigal dir, where prodigal results already
    exist, and are ok, all runs well, no re-annotation, just format

    - trains
    - no re-annotation
    - format

    main function arguments:
    cmd, list_file, db_path, res_dir, name, date, l90=100, nbcont=999, cutn=5,
         threads=1, force=False, qc_only=False, from_info=None, tmp_dir=None, res_annot_dir=None,
         verbose=0, quiet=False, prodigal_only=False

    """
    # FOLDER with all results
    # Create result folder, with existing prodigal folders (which are OK)
    res_folder = os.path.join(GENEPATH, "results-prodigal")
    os.makedirs(res_folder)
    # copy prodigalRes folders
    B2_A3_5_folder = os.path.join(EXP_DIR, "B2_A3_5.fasta-changeName.fna-prodigalRes")
    H299_folder = os.path.join(EXP_DIR, "H299_H561.fasta-prodigalRes")
    res_B2_A3_5_folder = os.path.join(res_folder, "B2_A3_5.fasta-changeName.fna-prodigalRes")
    res_H299_folder = os.path.join(res_folder, "H299_H561.fasta-prodigalRes")
    shutil.copytree(B2_A3_5_folder, res_B2_A3_5_folder)
    shutil.copytree(H299_folder, res_H299_folder)

    list_file = os.path.join(TEST_DIR, "list_genomes-func-test-exist_dir.txt")
    name = "ESCO"
    date = "0417"
    lstout = os.path.join(GENEPATH, "LSTINFO-list_genomes-func-test-exist_dir.lst")
    lstexp = os.path.join(EXP_DIR, "exp_LSTINFO-func-annot_exists-prokkadir.lst")
    assert annot.main("cmd", list_file, GEN_PATH, GENEPATH, name, date, cutn=0,
                      res_annot_dir=res_folder, verbose=3, prodigal_only=True) == (lstout, 2)
    out, err = capsys.readouterr()
    # Check that tmp files folder is empty (prokka res are somewhere else)
    assert len(os.listdir(os.path.join(GENEPATH, "tmp_files"))) == 0
    # Test that result files are in result dir
    assert os.path.isfile(lstout)
    assert tutil.compare_order_content(lstout, lstexp)
    logfile = os.path.join(GENEPATH,
                           "PanACoTA-annotate_list_genomes-func-test-exist_dir.log.details")
    with open(logfile, "r") as lc:
        log_content = lc.readlines()
    assert ("Prodigal will train using "
            "test/data/annotate/genomes/H299_H561.fasta") in " ".join(log_content)
    assert ("prodigal command: prodigal -i test/data/annotate/genomes/H299_H561.fasta "
            "-t test/data/annotate/generated_by_func-tests/results-prodigal/H299_H561.fasta.trn")
    assert("Error while trying to train prodigal on H299_H561.fasta. "
           "See test/data/annotate/generated_by_func-tests/results-prodigal/"
           "H299_H561.fasta.trn-prodigal-train.log.err") in " ".join(log_content)
    assert ("Prodigal results folder test/data/annotate/generated_by_func-tests/"
            "results-prodigal/H299_H561.fasta-prodigalRes "
            "already exists") in " ".join(log_content)
    assert ("Prodigal did not run again. Formatting step will use already generated results of "
            "Prodigal in test/data/annotate/generated_by_func-tests/results-prodigal/"
            "H299_H561.fasta-prodigalRes. "
            "If you want to re-run Prodigal, first remove this result folder, or use '-F' or "
            "'--force' option.") in ' '.join(log_content)
    assert "Formatting all genomes" in " ".join(log_content)
    assert "Annotation step done" in " ".join(log_content)
예제 #16
0
def test_main_existresdirforce(capsys):
    """
    Test that, when the pipeline is run on an existing result directory, but force option is on,
    it removes the result folders and runs again.
    Result folders contain expected files, the ones put before are removed
    Giving 4 genomes
    - 4 are OK
    - trained on complete_genome_big.fna
    """
    list_file = os.path.join(TEST_DIR, "list_genomes-func-test-force.txt")
    # Create output directory with a prt file in Proteins folder
    protdir = os.path.join(GENEPATH, "Proteins")
    os.makedirs(protdir)
    open(os.path.join(protdir, "toto.prt"), "w").close()
    assert os.path.isfile(os.path.join(protdir, "toto.prt"))
    name = "ESCO"
    date = "0417"
    l90 = 5
    cutn = 3
    info_file = os.path.join(GENEPATH, "LSTINFO-list_genomes-func-test-force.lst")
    assert annot.main("cmd", list_file, GEN_PATH, GENEPATH, name, date, force=True, l90=l90,
                      prodigal_only=True, cutn = cutn) == (info_file, 4)
    out, err = capsys.readouterr()

    # Check that tmp files exist in the right folder
    # -> 2 fna files created (concatenations)
    # -> + 4 files created (split 3N)
    assert os.path.isfile(os.path.join(GENEPATH, "tmp_files", "A_H738.fasta-all.fna"))
    assert os.path.isfile(os.path.join(GENEPATH, "tmp_files", "H299_H561.fasta-all.fna"))
    assert len(glob.glob(os.path.join(GENEPATH, "tmp_files", '*.fna'))) == 6
    assert len(glob.glob(os.path.join(GENEPATH, "tmp_files", '*split3N.fna'))) == 4
    # Check that split contigs were renamed with unique ID at the begining of the header
    res_file = os.path.join(GENEPATH, "tmp_files", "A_H738.fasta-all.fna_prodigal-split3N.fna")
    exp_file = os.path.join(EXP_DIR, "exp_A_H738.fasta-all.fna_prokka-split3N.fna")
    assert tutil.compare_order_content(exp_file, res_file)
    # Check that even for complete genome, contig was renamed with ID
    res_file = os.path.join(GENEPATH, "tmp_files", "complete_genome_big.fna_prodigal-split3N.fna")
    exp_file = os.path.join(EXP_DIR, "exp_complete_genome_big.fna-split3N.fna")
    assert tutil.compare_order_content(exp_file, res_file)
    # Check that it trained on expected genome, and training file is ok
    trn_file = os.path.join(GENEPATH, "tmp_files", "complete_genome_big.fna"
                                                   "_prodigal-split3N.fna.trn")
    exp_trn_file = os.path.join(EXP_DIR, "exp_complete_genome_big.fna.trn")
    assert tutil.compare_files_bin(trn_file, exp_trn_file)
    # Check that tmp files exist in the right folder (result/tmp_files)
    assert os.path.isfile(os.path.join(GENEPATH, "tmp_files",
                                       "B2_A3_5.fasta-changeName.fna_prodigal-split3N.fna"))
    assert os.path.isfile(os.path.join(GENEPATH, "tmp_files",
                                       "H299_H561.fasta-all.fna_prodigal-split3N.fna"))
    assert os.path.isfile(os.path.join(GENEPATH, "tmp_files",
                                       "H299_H561.fasta-all.fna"))
    assert os.path.isfile(os.path.join(GENEPATH, "tmp_files",
                                       "A_H738.fasta-all.fna_prodigal-split3N.fna"))
    assert os.path.isfile(os.path.join(GENEPATH, "tmp_files",
                                       "A_H738.fasta-all.fna"))
    # Test all result folders are empty (in particular Proteins) as no genome is annotated
    assert os.path.isdir(protdir)
    assert len(os.listdir(protdir)) == 4
    assert not os.path.isfile(os.path.join(protdir, "toto.prt"))
    assert os.path.isfile(os.path.join(protdir, "ESCO.0417.00001.prt"))
    assert os.path.isfile(os.path.join(protdir, "ESCO.1015.00002.prt"))
    assert os.path.isfile(os.path.join(protdir, "ESCO.1015.00003.prt"))
    assert os.path.isfile(os.path.join(protdir, "ESCO.1116.00004.prt"))
예제 #17
0
def main(cmd, args_all, args_prepare, args_annot, args_pan, args_corepers,
         args_align, args_tree):
    """
    Call all modules, one by one, using output of one as input for the next one

    Parameters
    ----------
    cmd : str
        command line used to launch the program
    args_all : tuple
        arguments common to all modules: output directory (str),
        threads (int), verbose (int), quiet (bool)
    args_prepare : tuple
        arguments for prepare module (see subcommands.prepare.py): NCBI_species_taxid (int),
        NCBI_species_name (str), NCBI_species_taxid (int), NCBI_taxid (int), NCBI_strains (str), levels (str), NCBI_section (str),
        tmp_dir (str), norefseq (bool), db_dir (str),
        only_mash (bool), info_file (str), l90 (int), nbcont (int), cutn (int),
        min_dist (float), max_dist (float)
    args_annot : tuple
        arguments for annotate module (see subcommands/annotate.py): name (str), qc_only (bool),
        date (str), prodigal_only (bool), small (bool)
    args_pan : tuple
        arguments for pangenome module (see subcommands/pangenome.py): min_id (float),
        clust_mode (int), spe_dir (str), outfile (str)
    args_corepers : tuple
        arguments for corepers module (see subcommands.corepers.py): tol (float), mixed (bool),
        multi (bool), floor (bool)
    args_align : tuple
        arguments for align module (see subcommands.align.py): prot_ali (bool)
    args_tree : tuple
        arguments for tree module (see subcommands.tree.py): soft (str), model (str), boot (bool),
        write_boot (bool), memory (str), fast (bool)
    """
    outdir, threads, verbose, quiet = args_all
    os.makedirs(outdir, exist_ok=True)
    # Initialize logger
    import logging
    # set level of logger: level is the minimum level that will be considered.
    if verbose <= 1:
        level = logging.INFO
    # for verbose = 2, ignore only debug
    if verbose >= 2 and verbose < 15:
        level = utils.detail_lvl()  # int corresponding to detail level
    # for verbose >= 15, write everything
    if verbose >= 15:
        level = logging.DEBUG
    logfile_base = os.path.join(outdir, "PanACoTA-all_modules")
    logfile_base = utils.init_logger(logfile_base,
                                     level,
                                     name='all_modules',
                                     verbose=verbose,
                                     quiet=quiet)
    logger = logging.getLogger('all_modules')
    logger.info(f'PanACoTA version {version}')
    logger.info("Command used\n \t > " + cmd)

    # Run prepare module
    outdir_prepare = os.path.join(outdir, "1-prepare_module")
    (NCBI_species_name, NCBI_species_taxid, NCBI_taxid, NCBI_strains, levels,
     NCBI_section, tmp_dir, norefseq, db_dir, only_mash, info_file, l90,
     nbcont, cutn, min_dist, max_dist) = args_prepare
    logger.info("prepare step")
    info_file = prepare.main("PanACoTA prepare", NCBI_species_name,
                             NCBI_species_taxid, NCBI_taxid, NCBI_strains,
                             levels, NCBI_section, outdir_prepare, tmp_dir,
                             threads, norefseq, db_dir, only_mash, info_file,
                             l90, nbcont, cutn, min_dist, max_dist, verbose,
                             quiet)

    # Run annotate module
    list_file = ""
    db_path = ""
    tmp_dir = ""
    force = False
    outdir_annotate = os.path.join(outdir, "2-annotate_module")
    (name, qc_only, date, prodigal_only, small) = args_annot
    res_annot_dir = None

    logger.info("annotate step")
    lstinfo, nbgenomes = annotate.main("PanACoTA annotate",
                                       list_file,
                                       db_path,
                                       outdir_annotate,
                                       name,
                                       date,
                                       l90,
                                       nbcont,
                                       cutn,
                                       threads,
                                       force,
                                       qc_only,
                                       info_file,
                                       tmp_dir,
                                       res_annot_dir,
                                       verbose,
                                       quiet,
                                       prodigal_only=prodigal_only,
                                       small=small)
    if qc_only:
        return "QC_only done"

    # Pangenome step
    name_pan = f"{name}_{nbgenomes}"
    outdir_pan = os.path.join(outdir, "3-pangenome_module")
    dbpath = os.path.join(outdir_annotate, "Proteins")
    (min_id, clust_mode, spe_dir, outfile) = args_pan
    logger.info("pangenome step")
    panfile = pangenome.main("PanACoTA pangenome",
                             lstinfo,
                             name_pan,
                             dbpath,
                             min_id,
                             outdir_pan,
                             clust_mode,
                             spe_dir,
                             threads,
                             outfile,
                             verbose=verbose,
                             quiet=quiet)

    # Coregenome step
    outdir_corpers = os.path.join(outdir, "4-corepers_module")
    logger.info("corepers step")
    (tol, mixed, multi, floor) = args_corepers
    lstinfo_file = ""  # include all genomes in core
    corepers_file = corepers.main("PanACoTA corepers", panfile, tol, multi,
                                  mixed, outdir_corpers, lstinfo_file, floor,
                                  verbose, quiet)
    # Align step
    outdir_align = os.path.join(outdir, "5-align_module")
    force = False
    logger.info("align step")
    (prot_ali) = args_align
    align_file = align.main("PanACoTA align",
                            corepers_file,
                            lstinfo,
                            name_pan,
                            outdir_annotate,
                            outdir_align,
                            prot_ali,
                            threads,
                            force,
                            verbose=verbose,
                            quiet=quiet)

    # Tree step
    (soft, model, boot, write_boot, memory, fast) = args_tree
    outdir_tree = os.path.join(outdir, "6-tree_module")
    logger.info("tree step")
    tree.main("PanACoTA tree",
              align_file,
              outdir_tree,
              soft,
              model,
              threads,
              boot,
              write_boot,
              memory,
              fast,
              verbose=verbose,
              quiet=quiet)
    logger.info("All modules of PanACOTA are finished.")
    return 0
예제 #18
0
def test_main_prodigal_train_ok(capsys):
    """
    Test that, when the pipeline is run with a given prodigal dir, where prodigal train exists and is ok:

    - no train
    - reannotate
    - format

    2 genomes in list file: B2_A3_5.fasta-changeName.fna and H299_H561.fasta
    """
    # FOLDER with all results
    # Create result folder, with existing prodigal folders (which are OK)
    res_folder = os.path.join(GENEPATH, "results-prodigal")
    os.makedirs(res_folder)
    # Add a valid training file in result folder
    orig_trn_file = os.path.join(EXP_DIR, "exp_complete_genome_big.fna.trn")
    trn_file = os.path.join(res_folder, "H299_H561.fasta.trn")
    shutil.copyfile(orig_trn_file, trn_file)

    # Function arguments
    list_file = os.path.join(TEST_DIR, "list_genomes-func-test-exist_dir.txt")
    name = "ESCO"
    date = "0417"
    lstout = os.path.join(GENEPATH, "LSTINFO-list_genomes-func-test-exist_dir.lst")
    lstexp = os.path.join(EXP_DIR, "exp_LSTINFO-func-annot_exists-prokkadir.lst")
    assert annot.main("cmd", list_file, GEN_PATH, GENEPATH, name, date, cutn=0,
                      res_annot_dir=res_folder, verbose=3, prodigal_only=True) == (lstout, 2)
    out, err = capsys.readouterr()
    # Check that tmp files folder is empty (prokka res are somewhere else)
    assert len(os.listdir(os.path.join(GENEPATH, "tmp_files"))) == 0
    # Test that result files are in result dir
    assert os.path.isfile(lstout)
    assert tutil.compare_order_content(lstout, lstexp)
    logfile = os.path.join(GENEPATH,
                           "PanACoTA-annotate_list_genomes-func-test-exist_dir.log.details")
    with open(logfile, "r") as lc:
        log_content = lc.readlines()
    assert ("A training file already exists (test/data/annotate/generated_by_func-tests/"
            "results-prodigal/H299_H561.fasta.trn). It will be used to annotate "
            "all genomes.") in " ".join(log_content)
    assert ("Start annotating ESCO.1116.00002 (from test/data/annotate/genomes/"
            "B2_A3_5.fasta-changeName.fna sequence) with Prodigal") in " ".join(log_content)
    assert ("Start annotating ESCO.1015.00001 (from test/data/annotate/genomes/"
            "H299_H561.fasta sequence) with Prodigal") in " ".join(log_content)
    assert ("Prodigal command: "
            "prodigal -i test/data/annotate/genomes/B2_A3_5.fasta-changeName.fna "
            "-d test/data/annotate/generated_by_func-tests/results-prodigal/"
            "B2_A3_5.fasta-changeName.fna-prodigalRes/ESCO.1116.00002.ffn "
            "-a test/data/annotate/generated_by_func-tests/results-prodigal/"
            "B2_A3_5.fasta-changeName.fna-prodigalRes/ESCO.1116.00002.faa "
            "-f gff -o test/data/annotate/generated_by_func-tests/results-prodigal/"
            "B2_A3_5.fasta-changeName.fna-prodigalRes/ESCO.1116.00002.gff "
            "-t test/data/annotate/generated_by_func-tests/results-prodigal/H299_H561.fasta.trn "
            "-q") in " ".join(log_content)
    assert ("Prodigal command: "
            "prodigal -i test/data/annotate/genomes/H299_H561.fasta "
            "-d test/data/annotate/generated_by_func-tests/results-prodigal/"
            "H299_H561.fasta-prodigalRes/ESCO.1015.00001.ffn "
            "-a test/data/annotate/generated_by_func-tests/results-prodigal/"
            "H299_H561.fasta-prodigalRes/ESCO.1015.00001.faa "
            "-f gff -o test/data/annotate/generated_by_func-tests/results-prodigal/"
            "H299_H561.fasta-prodigalRes/ESCO.1015.00001.gff "
            "-t test/data/annotate/generated_by_func-tests/results-prodigal/H299_H561.fasta.trn "
            "-q") in " ".join(log_content)
    assert ("End annotating ESCO.1015.00001 (from test/data/annotate/genomes/"
            "H299_H561.fasta)") in " ".join(log_content)
    assert "Formatting all genomes" in " ".join(log_content)
    assert "Annotation step done" in " ".join(log_content)