Ejemplo n.º 1
0
def test_main_default(capsys):
    """
    Test that with default parameters, it creates the expected core genome.
    """
    tol = 1
    multi = False
    mixed = False
    cmd = "cmd"
    floor = False
    verbose = 0
    quiet = False
    lstinfo = ""
    out_pers = os.path.join(GENEPATH, "PersGenome_pangenome.lst-all_1.lst")
    assert corepers.main(cmd, UPAN, tol, multi, mixed, GENEPATH, lstinfo,
                         floor, verbose, quiet) == out_pers
    # Check creation of binary file for pangenome, and remove it
    assert os.path.isfile(UPAN + ".bin")
    # Check presence of persistent genome, and its content, and remove it
    exp_pers = os.path.join(EXP_PATH, "exp_coregenome.txt")
    assert os.path.isfile(out_pers)
    assert tutil.compare_order_content(out_pers, exp_pers)
    # Check presence of log files and remove them
    logfile = os.path.join(GENEPATH, "PanACoTA-corepers.log")
    assert os.path.isfile(logfile)
    assert os.path.isfile(logfile + ".err")
    # Check log messages
    out, err = capsys.readouterr()
    assert "Will generate a CoreGenome." in out
    assert "Saving all information to a binary file for later use" in out
    assert "Generating Persistent genome of a dataset containing 4 genomes" in out
    assert ("The core genome contains 2 families, each one having exactly "
            "4 members, from the 4 different genomes.") in out
Ejemplo n.º 2
0
def test_main_pers_floor_multi(capsys):
    """
    Test that with default parameters, it creates the expected core genome.
    """
    tol = 0.99
    multi = True
    mixed = False
    floor = True
    verbose = 15
    lstinfo = os.path.join(GENEPATH, "lstinfo-ok.lst")
    with open(lstinfo, "w") as lst:
        lst.write("GEN4.1111.00001 toto we don't use other fields\n")
        lst.write("GENO.1216.00003\n")
    quiet = False
    cmd = "cmd"
    outdir = os.path.join(GENEPATH, "outdir")
    out_pers = os.path.join(
        outdir, "PersGenome_pangenome.lst-lstinfo-ok.lst_F0.99-multi.lst")
    assert corepers.main(cmd, UPAN, tol, multi, mixed, outdir, lstinfo, floor,
                         verbose, quiet) == out_pers
    # Check creation of binary file for pangenome, and remove it
    assert os.path.isfile(UPAN + ".bin")
    # Check presence of persistent genome, and its content, and remove it
    exp_pers = os.path.join(EXP_PATH, "exp_pers-floor-multi_subset.txt")
    assert os.path.isfile(out_pers)
    assert tutil.compare_order_content(out_pers, exp_pers)
    # Check presence of log files and remove them
    logfile = os.path.join(outdir, "PanACoTA-corepers.log")
    assert os.path.isfile(logfile)
    assert os.path.isfile(logfile + ".err")
    # Check log messages
    out, err = capsys.readouterr()
    assert (
        "Will generate a Persistent genome with member(s) in at least 99.0% "
        "of all genomes in each family.") in out
    assert (
        "Multigenic families are allowed (several members in any genome of a family)."
    ) in out
    assert "Saving all information to a binary file for later use" in out
    assert "Generating Persistent genome of a dataset containing 2 genomes" in out
    assert (
        "The persistent genome contains 9 families with members present in "
        "at least 1 different genomes (99.0% of the total number of genomes)."
    ) in out
Ejemplo n.º 3
0
def test_main_pers_floor_mixed_debug(capsys):
    """
    Test that with default parameters, it creates the expected core genome.
    """
    tol = 0.99
    multi = False
    mixed = True
    floor = True
    verbose = 15
    quiet = False
    lstinfo = ""
    cmd = "cmd"
    out_pers = os.path.join(GENEPATH,
                            "PersGenome_pangenome.lst-all_F0.99-mixed.lst")
    assert corepers.main(cmd, UPAN, tol, multi, mixed, GENEPATH, lstinfo,
                         floor, verbose, quiet) == out_pers
    # Check creation of binary file for pangenome, and remove it
    assert os.path.isfile(UPAN + ".bin")
    # Check presence of persistent genome, and its content, and remove it
    exp_pers = os.path.join(EXP_PATH, "exp_pers-floor-mixed.txt")
    assert os.path.isfile(out_pers)
    assert tutil.compare_order_content(out_pers, exp_pers)
    # Check presence of log files and remove them
    logfile = os.path.join(GENEPATH, "PanACoTA-corepers.log")
    assert os.path.isfile(logfile)
    assert os.path.isfile(logfile + ".details")
    assert os.path.isfile(logfile + ".debug")
    assert os.path.isfile(logfile + ".err")
    # Check log messages
    out, err = capsys.readouterr()
    assert (
        "Will generate a Persistent genome with member(s) in at least 99.0% "
        "of all genomes in each family.") in out
    assert (
        "Mixed families are allowed. To be considered as persistent, "
        "a family must have exactly 1 member in 99.0% of the genomes, but in the "
        "remaining 1.0% genomes, there can be 0, 1 or several members.") in out
    assert "Saving all information to a binary file for later use" in out
    assert "Generating Persistent genome of a dataset containing 4 genomes" in out
    assert (
        "The persistent genome contains 7 families, each one having exactly "
        "1 member from at least 99.0% of the genomes (3 genomes). In the "
        "remaining 1.0% genomes, there can be 0, 1 or several members.") in out
Ejemplo n.º 4
0
def test_main_pers(capsys):
    """
    Test that with default parameters, it creates the expected core genome.
    """
    tol = 0.99
    multi = False
    mixed = False
    lstinfo = os.path.join(TEST_PATH, "test_lstinfo.txt")
    floor = False
    verbose = 0
    quiet = False
    cmd = "cmd"
    out_pers = os.path.join(
        GENEPATH, "PersGenome_pangenome.lst-test_lstinfo.txt_0.99.lst")
    assert corepers.main(cmd, UPAN, tol, multi, mixed, GENEPATH, lstinfo,
                         floor, verbose, quiet) == out_pers
    # Check creation of binary file for pangenome, and remove it
    assert os.path.isfile(UPAN + ".bin")
    # Check presence of persistent genome, and its content, and remove it
    exp_pers = os.path.join(EXP_PATH, "exp_coregenome.txt")
    assert os.path.isfile(out_pers)
    assert tutil.compare_order_content(out_pers, exp_pers)
    # Check presence of log files and remove them
    logfile = os.path.join(GENEPATH, "PanACoTA-corepers.log")
    assert os.path.isfile(logfile)
    assert os.path.isfile(logfile + ".err")
    # Check log messages
    out, err = capsys.readouterr()
    assert (
        "Will generate a Persistent genome with member(s) in at least 99.0% "
        "of all genomes in each family.") in out
    assert ("To be considered as persistent, a family "
            "must contain exactly 1 member in at least 99.0% of all genomes. "
            "The other genomes are absent from the family.") in out
    assert ("Getting subset of pangenome for genomes in "
            "test/data/persgenome/test_files/test_lstinfo.txt") in out
    assert "Saving all information to a binary file for later use" in out
    assert "Generating Persistent genome of a dataset containing 4 genomes" in out
    assert (
        "The persistent genome contains 2 families, each one having exactly "
        "1 member from at least 99.0% of the 4 different genomes "
        "(that is 4 genomes). The other genomes are absent from the family."
    ) in out
Ejemplo n.º 5
0
def test_main_core_subset(capsys):
    """
    Test that with default parameters, it creates the expected core genome.
    """
    tol = 1
    multi = False
    mixed = False
    cmd = "cmd"
    floor = False
    verbose = 0
    quiet = False
    lstinfo = os.path.join(GENEPATH, "lstinfo-ok.lst")
    with open(lstinfo, "w") as lst:
        lst.write("GEN4.1111.00001 toto we don't use other fields\n")
        lst.write("GENO.1216.00003\n")
    out_pers = os.path.join(GENEPATH,
                            "PersGenome_pangenome.lst-lstinfo-ok.lst_1.lst")
    assert corepers.main(cmd, UPAN, tol, multi, mixed, GENEPATH, lstinfo,
                         floor, verbose, quiet) == out_pers
    # Check creation of binary file for pangenome
    assert os.path.isfile(UPAN + ".bin")
    # Check presence of persistent genome, and its content
    exp_pers = os.path.join(EXP_PATH, "exp_coregenome_subset.txt")
    assert os.path.isfile(out_pers)
    assert tutil.compare_order_content(out_pers, exp_pers)
    # Check presence of log files
    logfile = os.path.join(GENEPATH, "PanACoTA-corepers.log")
    assert os.path.isfile(logfile)
    assert os.path.isfile(logfile + ".err")
    # Check log messages
    out, err = capsys.readouterr()
    assert "Will generate a CoreGenome." in out
    assert "Saving all information to a binary file for later use" in out
    assert "Generating Persistent genome of a dataset containing 2 genomes" in out
    assert ("The core genome contains 3 families, each one having exactly "
            "2 members, from the 2 different genomes.") in out
Ejemplo n.º 6
0
def main(cmd, args_all, args_prepare, args_annot, args_pan, args_corepers,
         args_align, args_tree):
    """
    Call all modules, one by one, using output of one as input for the next one

    Parameters
    ----------
    cmd : str
        command line used to launch the program
    args_all : tuple
        arguments common to all modules: output directory (str),
        threads (int), verbose (int), quiet (bool)
    args_prepare : tuple
        arguments for prepare module (see subcommands.prepare.py): NCBI_species_taxid (int),
        NCBI_species_name (str), NCBI_species_taxid (int), NCBI_taxid (int), NCBI_strains (str), levels (str), NCBI_section (str),
        tmp_dir (str), norefseq (bool), db_dir (str),
        only_mash (bool), info_file (str), l90 (int), nbcont (int), cutn (int),
        min_dist (float), max_dist (float)
    args_annot : tuple
        arguments for annotate module (see subcommands/annotate.py): name (str), qc_only (bool),
        date (str), prodigal_only (bool), small (bool)
    args_pan : tuple
        arguments for pangenome module (see subcommands/pangenome.py): min_id (float),
        clust_mode (int), spe_dir (str), outfile (str)
    args_corepers : tuple
        arguments for corepers module (see subcommands.corepers.py): tol (float), mixed (bool),
        multi (bool), floor (bool)
    args_align : tuple
        arguments for align module (see subcommands.align.py): prot_ali (bool)
    args_tree : tuple
        arguments for tree module (see subcommands.tree.py): soft (str), model (str), boot (bool),
        write_boot (bool), memory (str), fast (bool)
    """
    outdir, threads, verbose, quiet = args_all
    os.makedirs(outdir, exist_ok=True)
    # Initialize logger
    import logging
    # set level of logger: level is the minimum level that will be considered.
    if verbose <= 1:
        level = logging.INFO
    # for verbose = 2, ignore only debug
    if verbose >= 2 and verbose < 15:
        level = utils.detail_lvl()  # int corresponding to detail level
    # for verbose >= 15, write everything
    if verbose >= 15:
        level = logging.DEBUG
    logfile_base = os.path.join(outdir, "PanACoTA-all_modules")
    logfile_base = utils.init_logger(logfile_base,
                                     level,
                                     name='all_modules',
                                     verbose=verbose,
                                     quiet=quiet)
    logger = logging.getLogger('all_modules')
    logger.info(f'PanACoTA version {version}')
    logger.info("Command used\n \t > " + cmd)

    # Run prepare module
    outdir_prepare = os.path.join(outdir, "1-prepare_module")
    (NCBI_species_name, NCBI_species_taxid, NCBI_taxid, NCBI_strains, levels,
     NCBI_section, tmp_dir, norefseq, db_dir, only_mash, info_file, l90,
     nbcont, cutn, min_dist, max_dist) = args_prepare
    logger.info("prepare step")
    info_file = prepare.main("PanACoTA prepare", NCBI_species_name,
                             NCBI_species_taxid, NCBI_taxid, NCBI_strains,
                             levels, NCBI_section, outdir_prepare, tmp_dir,
                             threads, norefseq, db_dir, only_mash, info_file,
                             l90, nbcont, cutn, min_dist, max_dist, verbose,
                             quiet)

    # Run annotate module
    list_file = ""
    db_path = ""
    tmp_dir = ""
    force = False
    outdir_annotate = os.path.join(outdir, "2-annotate_module")
    (name, qc_only, date, prodigal_only, small) = args_annot
    res_annot_dir = None

    logger.info("annotate step")
    lstinfo, nbgenomes = annotate.main("PanACoTA annotate",
                                       list_file,
                                       db_path,
                                       outdir_annotate,
                                       name,
                                       date,
                                       l90,
                                       nbcont,
                                       cutn,
                                       threads,
                                       force,
                                       qc_only,
                                       info_file,
                                       tmp_dir,
                                       res_annot_dir,
                                       verbose,
                                       quiet,
                                       prodigal_only=prodigal_only,
                                       small=small)
    if qc_only:
        return "QC_only done"

    # Pangenome step
    name_pan = f"{name}_{nbgenomes}"
    outdir_pan = os.path.join(outdir, "3-pangenome_module")
    dbpath = os.path.join(outdir_annotate, "Proteins")
    (min_id, clust_mode, spe_dir, outfile) = args_pan
    logger.info("pangenome step")
    panfile = pangenome.main("PanACoTA pangenome",
                             lstinfo,
                             name_pan,
                             dbpath,
                             min_id,
                             outdir_pan,
                             clust_mode,
                             spe_dir,
                             threads,
                             outfile,
                             verbose=verbose,
                             quiet=quiet)

    # Coregenome step
    outdir_corpers = os.path.join(outdir, "4-corepers_module")
    logger.info("corepers step")
    (tol, mixed, multi, floor) = args_corepers
    lstinfo_file = ""  # include all genomes in core
    corepers_file = corepers.main("PanACoTA corepers", panfile, tol, multi,
                                  mixed, outdir_corpers, lstinfo_file, floor,
                                  verbose, quiet)
    # Align step
    outdir_align = os.path.join(outdir, "5-align_module")
    force = False
    logger.info("align step")
    (prot_ali) = args_align
    align_file = align.main("PanACoTA align",
                            corepers_file,
                            lstinfo,
                            name_pan,
                            outdir_annotate,
                            outdir_align,
                            prot_ali,
                            threads,
                            force,
                            verbose=verbose,
                            quiet=quiet)

    # Tree step
    (soft, model, boot, write_boot, memory, fast) = args_tree
    outdir_tree = os.path.join(outdir, "6-tree_module")
    logger.info("tree step")
    tree.main("PanACoTA tree",
              align_file,
              outdir_tree,
              soft,
              model,
              threads,
              boot,
              write_boot,
              memory,
              fast,
              verbose=verbose,
              quiet=quiet)
    logger.info("All modules of PanACOTA are finished.")
    return 0