def test_main_default(capsys): """ Test that with default parameters, it creates the expected core genome. """ tol = 1 multi = False mixed = False cmd = "cmd" floor = False verbose = 0 quiet = False lstinfo = "" out_pers = os.path.join(GENEPATH, "PersGenome_pangenome.lst-all_1.lst") assert corepers.main(cmd, UPAN, tol, multi, mixed, GENEPATH, lstinfo, floor, verbose, quiet) == out_pers # Check creation of binary file for pangenome, and remove it assert os.path.isfile(UPAN + ".bin") # Check presence of persistent genome, and its content, and remove it exp_pers = os.path.join(EXP_PATH, "exp_coregenome.txt") assert os.path.isfile(out_pers) assert tutil.compare_order_content(out_pers, exp_pers) # Check presence of log files and remove them logfile = os.path.join(GENEPATH, "PanACoTA-corepers.log") assert os.path.isfile(logfile) assert os.path.isfile(logfile + ".err") # Check log messages out, err = capsys.readouterr() assert "Will generate a CoreGenome." in out assert "Saving all information to a binary file for later use" in out assert "Generating Persistent genome of a dataset containing 4 genomes" in out assert ("The core genome contains 2 families, each one having exactly " "4 members, from the 4 different genomes.") in out
def test_main_pers_floor_multi(capsys): """ Test that with default parameters, it creates the expected core genome. """ tol = 0.99 multi = True mixed = False floor = True verbose = 15 lstinfo = os.path.join(GENEPATH, "lstinfo-ok.lst") with open(lstinfo, "w") as lst: lst.write("GEN4.1111.00001 toto we don't use other fields\n") lst.write("GENO.1216.00003\n") quiet = False cmd = "cmd" outdir = os.path.join(GENEPATH, "outdir") out_pers = os.path.join( outdir, "PersGenome_pangenome.lst-lstinfo-ok.lst_F0.99-multi.lst") assert corepers.main(cmd, UPAN, tol, multi, mixed, outdir, lstinfo, floor, verbose, quiet) == out_pers # Check creation of binary file for pangenome, and remove it assert os.path.isfile(UPAN + ".bin") # Check presence of persistent genome, and its content, and remove it exp_pers = os.path.join(EXP_PATH, "exp_pers-floor-multi_subset.txt") assert os.path.isfile(out_pers) assert tutil.compare_order_content(out_pers, exp_pers) # Check presence of log files and remove them logfile = os.path.join(outdir, "PanACoTA-corepers.log") assert os.path.isfile(logfile) assert os.path.isfile(logfile + ".err") # Check log messages out, err = capsys.readouterr() assert ( "Will generate a Persistent genome with member(s) in at least 99.0% " "of all genomes in each family.") in out assert ( "Multigenic families are allowed (several members in any genome of a family)." ) in out assert "Saving all information to a binary file for later use" in out assert "Generating Persistent genome of a dataset containing 2 genomes" in out assert ( "The persistent genome contains 9 families with members present in " "at least 1 different genomes (99.0% of the total number of genomes)." ) in out
def test_main_pers_floor_mixed_debug(capsys): """ Test that with default parameters, it creates the expected core genome. """ tol = 0.99 multi = False mixed = True floor = True verbose = 15 quiet = False lstinfo = "" cmd = "cmd" out_pers = os.path.join(GENEPATH, "PersGenome_pangenome.lst-all_F0.99-mixed.lst") assert corepers.main(cmd, UPAN, tol, multi, mixed, GENEPATH, lstinfo, floor, verbose, quiet) == out_pers # Check creation of binary file for pangenome, and remove it assert os.path.isfile(UPAN + ".bin") # Check presence of persistent genome, and its content, and remove it exp_pers = os.path.join(EXP_PATH, "exp_pers-floor-mixed.txt") assert os.path.isfile(out_pers) assert tutil.compare_order_content(out_pers, exp_pers) # Check presence of log files and remove them logfile = os.path.join(GENEPATH, "PanACoTA-corepers.log") assert os.path.isfile(logfile) assert os.path.isfile(logfile + ".details") assert os.path.isfile(logfile + ".debug") assert os.path.isfile(logfile + ".err") # Check log messages out, err = capsys.readouterr() assert ( "Will generate a Persistent genome with member(s) in at least 99.0% " "of all genomes in each family.") in out assert ( "Mixed families are allowed. To be considered as persistent, " "a family must have exactly 1 member in 99.0% of the genomes, but in the " "remaining 1.0% genomes, there can be 0, 1 or several members.") in out assert "Saving all information to a binary file for later use" in out assert "Generating Persistent genome of a dataset containing 4 genomes" in out assert ( "The persistent genome contains 7 families, each one having exactly " "1 member from at least 99.0% of the genomes (3 genomes). In the " "remaining 1.0% genomes, there can be 0, 1 or several members.") in out
def test_main_pers(capsys): """ Test that with default parameters, it creates the expected core genome. """ tol = 0.99 multi = False mixed = False lstinfo = os.path.join(TEST_PATH, "test_lstinfo.txt") floor = False verbose = 0 quiet = False cmd = "cmd" out_pers = os.path.join( GENEPATH, "PersGenome_pangenome.lst-test_lstinfo.txt_0.99.lst") assert corepers.main(cmd, UPAN, tol, multi, mixed, GENEPATH, lstinfo, floor, verbose, quiet) == out_pers # Check creation of binary file for pangenome, and remove it assert os.path.isfile(UPAN + ".bin") # Check presence of persistent genome, and its content, and remove it exp_pers = os.path.join(EXP_PATH, "exp_coregenome.txt") assert os.path.isfile(out_pers) assert tutil.compare_order_content(out_pers, exp_pers) # Check presence of log files and remove them logfile = os.path.join(GENEPATH, "PanACoTA-corepers.log") assert os.path.isfile(logfile) assert os.path.isfile(logfile + ".err") # Check log messages out, err = capsys.readouterr() assert ( "Will generate a Persistent genome with member(s) in at least 99.0% " "of all genomes in each family.") in out assert ("To be considered as persistent, a family " "must contain exactly 1 member in at least 99.0% of all genomes. " "The other genomes are absent from the family.") in out assert ("Getting subset of pangenome for genomes in " "test/data/persgenome/test_files/test_lstinfo.txt") in out assert "Saving all information to a binary file for later use" in out assert "Generating Persistent genome of a dataset containing 4 genomes" in out assert ( "The persistent genome contains 2 families, each one having exactly " "1 member from at least 99.0% of the 4 different genomes " "(that is 4 genomes). The other genomes are absent from the family." ) in out
def test_main_core_subset(capsys): """ Test that with default parameters, it creates the expected core genome. """ tol = 1 multi = False mixed = False cmd = "cmd" floor = False verbose = 0 quiet = False lstinfo = os.path.join(GENEPATH, "lstinfo-ok.lst") with open(lstinfo, "w") as lst: lst.write("GEN4.1111.00001 toto we don't use other fields\n") lst.write("GENO.1216.00003\n") out_pers = os.path.join(GENEPATH, "PersGenome_pangenome.lst-lstinfo-ok.lst_1.lst") assert corepers.main(cmd, UPAN, tol, multi, mixed, GENEPATH, lstinfo, floor, verbose, quiet) == out_pers # Check creation of binary file for pangenome assert os.path.isfile(UPAN + ".bin") # Check presence of persistent genome, and its content exp_pers = os.path.join(EXP_PATH, "exp_coregenome_subset.txt") assert os.path.isfile(out_pers) assert tutil.compare_order_content(out_pers, exp_pers) # Check presence of log files logfile = os.path.join(GENEPATH, "PanACoTA-corepers.log") assert os.path.isfile(logfile) assert os.path.isfile(logfile + ".err") # Check log messages out, err = capsys.readouterr() assert "Will generate a CoreGenome." in out assert "Saving all information to a binary file for later use" in out assert "Generating Persistent genome of a dataset containing 2 genomes" in out assert ("The core genome contains 3 families, each one having exactly " "2 members, from the 2 different genomes.") in out
def main(cmd, args_all, args_prepare, args_annot, args_pan, args_corepers, args_align, args_tree): """ Call all modules, one by one, using output of one as input for the next one Parameters ---------- cmd : str command line used to launch the program args_all : tuple arguments common to all modules: output directory (str), threads (int), verbose (int), quiet (bool) args_prepare : tuple arguments for prepare module (see subcommands.prepare.py): NCBI_species_taxid (int), NCBI_species_name (str), NCBI_species_taxid (int), NCBI_taxid (int), NCBI_strains (str), levels (str), NCBI_section (str), tmp_dir (str), norefseq (bool), db_dir (str), only_mash (bool), info_file (str), l90 (int), nbcont (int), cutn (int), min_dist (float), max_dist (float) args_annot : tuple arguments for annotate module (see subcommands/annotate.py): name (str), qc_only (bool), date (str), prodigal_only (bool), small (bool) args_pan : tuple arguments for pangenome module (see subcommands/pangenome.py): min_id (float), clust_mode (int), spe_dir (str), outfile (str) args_corepers : tuple arguments for corepers module (see subcommands.corepers.py): tol (float), mixed (bool), multi (bool), floor (bool) args_align : tuple arguments for align module (see subcommands.align.py): prot_ali (bool) args_tree : tuple arguments for tree module (see subcommands.tree.py): soft (str), model (str), boot (bool), write_boot (bool), memory (str), fast (bool) """ outdir, threads, verbose, quiet = args_all os.makedirs(outdir, exist_ok=True) # Initialize logger import logging # set level of logger: level is the minimum level that will be considered. if verbose <= 1: level = logging.INFO # for verbose = 2, ignore only debug if verbose >= 2 and verbose < 15: level = utils.detail_lvl() # int corresponding to detail level # for verbose >= 15, write everything if verbose >= 15: level = logging.DEBUG logfile_base = os.path.join(outdir, "PanACoTA-all_modules") logfile_base = utils.init_logger(logfile_base, level, name='all_modules', verbose=verbose, quiet=quiet) logger = logging.getLogger('all_modules') logger.info(f'PanACoTA version {version}') logger.info("Command used\n \t > " + cmd) # Run prepare module outdir_prepare = os.path.join(outdir, "1-prepare_module") (NCBI_species_name, NCBI_species_taxid, NCBI_taxid, NCBI_strains, levels, NCBI_section, tmp_dir, norefseq, db_dir, only_mash, info_file, l90, nbcont, cutn, min_dist, max_dist) = args_prepare logger.info("prepare step") info_file = prepare.main("PanACoTA prepare", NCBI_species_name, NCBI_species_taxid, NCBI_taxid, NCBI_strains, levels, NCBI_section, outdir_prepare, tmp_dir, threads, norefseq, db_dir, only_mash, info_file, l90, nbcont, cutn, min_dist, max_dist, verbose, quiet) # Run annotate module list_file = "" db_path = "" tmp_dir = "" force = False outdir_annotate = os.path.join(outdir, "2-annotate_module") (name, qc_only, date, prodigal_only, small) = args_annot res_annot_dir = None logger.info("annotate step") lstinfo, nbgenomes = annotate.main("PanACoTA annotate", list_file, db_path, outdir_annotate, name, date, l90, nbcont, cutn, threads, force, qc_only, info_file, tmp_dir, res_annot_dir, verbose, quiet, prodigal_only=prodigal_only, small=small) if qc_only: return "QC_only done" # Pangenome step name_pan = f"{name}_{nbgenomes}" outdir_pan = os.path.join(outdir, "3-pangenome_module") dbpath = os.path.join(outdir_annotate, "Proteins") (min_id, clust_mode, spe_dir, outfile) = args_pan logger.info("pangenome step") panfile = pangenome.main("PanACoTA pangenome", lstinfo, name_pan, dbpath, min_id, outdir_pan, clust_mode, spe_dir, threads, outfile, verbose=verbose, quiet=quiet) # Coregenome step outdir_corpers = os.path.join(outdir, "4-corepers_module") logger.info("corepers step") (tol, mixed, multi, floor) = args_corepers lstinfo_file = "" # include all genomes in core corepers_file = corepers.main("PanACoTA corepers", panfile, tol, multi, mixed, outdir_corpers, lstinfo_file, floor, verbose, quiet) # Align step outdir_align = os.path.join(outdir, "5-align_module") force = False logger.info("align step") (prot_ali) = args_align align_file = align.main("PanACoTA align", corepers_file, lstinfo, name_pan, outdir_annotate, outdir_align, prot_ali, threads, force, verbose=verbose, quiet=quiet) # Tree step (soft, model, boot, write_boot, memory, fast) = args_tree outdir_tree = os.path.join(outdir, "6-tree_module") logger.info("tree step") tree.main("PanACoTA tree", align_file, outdir_tree, soft, model, threads, boot, write_boot, memory, fast, verbose=verbose, quiet=quiet) logger.info("All modules of PanACOTA are finished.") return 0