def setUp(self): self.identify_dir_reference = 'tests/data/identify_dir_reference/' self.align_dir_reference = 'tests/data/align_dir_reference/' self.genome_dir = 'gtdbtk/tests/data/genomes/' self.options = argparse.ArgumentParser() self.options.batchfile = None self.options.prefix = 'gtdbtk' self.options.cpus = 1 self.options.extension = 'fna' self.options.debug = False # align option self.options.skip_gtdb_refs = False self.options.taxa_filter = None self.options.custom_msa_filters = False self.options.min_consensus = None self.options.min_perc_taxa = None self.options.skip_gtdb_refs = False self.options.cols_per_gene = None self.options.max_consensus = None self.options.min_perc_aa = 50 # classify options self.options.scratch_dir = None # infer options self.options.prot_model = 'WAG' self.options.no_support = False self.options.no_gamma = True self.version = ' unittest' self.optionparser = OptionsParser(self.version) logger_setup(None, "gtdbtk.log", "GTDB-Tk", self.version, True) self.generic_out_path = 'tests/data/results'
def setUp(self): self.identify_dir_reference = os.path.join( os.path.dirname(__file__), 'data/identify_dir_reference/') self.align_dir_reference = 'tests/data/align_dir_reference/' self.genome_dir = 'gtdbtk/tests/data/genomes/' self.options = argparse.ArgumentParser() self.options.batchfile = None self.options.prefix = 'gtdbtk' self.options.cpus = 1 self.options.extension = 'fna' self.options.debug = False self.options.force = False self.options.genes = False self.options.write_single_copy_genes = False # align option self.options.skip_gtdb_refs = False self.options.taxa_filter = None self.options.custom_msa_filters = False self.options.skip_trimming = False self.options.min_consensus = None self.options.min_perc_taxa = None self.options.skip_gtdb_refs = False self.options.cols_per_gene = None self.options.max_consensus = None self.options.min_perc_aa = 50 self.options.rnd_seed = 42 self.options.outgroup_taxon = None # classify options self.options.scratch_dir = None self.options.keep_ref_red = None self.options.pplacer_cpus = None self.options.min_af = None # infer options self.options.prot_model = 'WAG' self.options.no_support = False self.options.no_gamma = True self.version = ' unittest' self.optionparser = OptionsParser(self.version) logger_setup(None, "gtdbtk.log", "GTDB-Tk", self.version, True) # self.generic_out_path = 'tests/data/results' self.generic_out_path = tempfile.mkdtemp(prefix='gtdbtk_tmp_')
def main(): # ------------------------------------------------- # get and check options args = None if len(sys.argv) == 1: print_help() sys.exit(0) elif sys.argv[1] in {'-v', '--v', '-version', '--version'}: print(f"gtdbtk: version {__version__} {__copyright__} {__author__}") # Warn the user they are not using the latest version (if possible) latest_ver = get_gtdbtk_latest_version() if latest_ver and latest_ver != __version__: print(f'Note: There is a newer version of GTDB-Tk available: v{latest_ver}') sys.exit(0) elif sys.argv[1] in {'-h', '--h', '-help', '--help'}: print_help() sys.exit(0) else: args = get_main_parser().parse_args() # setup logger logger_setup(args.out_dir if hasattr(args, 'out_dir') else None, "gtdbtk.log", "GTDB-Tk", __version__, False, hasattr(args, 'debug') and args.debug) logger = logging.getLogger('timestamp') # ------------------------------------------------- # do what we came here to do try: gt_parser = OptionsParser(__version__) gt_parser.parse_options(args) except SystemExit: logger.error('Controlled exit resulting from early termination.') sys.exit(1) except KeyboardInterrupt: logger.error('Controlled exit resulting from interrupt signal.') sys.exit(1) except GTDBTkExit as e: if len(str(e)) > 0: logger.error('{}'.format(e)) logger.error('Controlled exit resulting from an unrecoverable error or warning.') sys.exit(1) except (GTDBTkException, BioLibError) as e: msg = 'Controlled exit resulting from an unrecoverable error or warning.\n\n' msg += '=' * 80 + '\n' msg += 'EXCEPTION: {}\n'.format(type(e).__name__) msg += ' MESSAGE: {}\n'.format(e) msg += '_' * 80 + '\n\n' msg += traceback.format_exc() msg += '=' * 80 logger.error(msg) sys.exit(1) except Exception as e: msg = 'Uncontrolled exit resulting from an unexpected error.\n\n' msg += '=' * 80 + '\n' msg += 'EXCEPTION: {}\n'.format(type(e).__name__) msg += ' MESSAGE: {}\n'.format(e) msg += '_' * 80 + '\n\n' msg += traceback.format_exc() msg += '=' * 80 logger.error(msg) sys.exit(1)
default=0.95, help= 'maximum percentage of the same amino acid required to retain column') parser.add_argument( '--min_perc_taxa', type=float, default=0.50, help='minimum percentage of taxa required to retain column') parser.add_argument('--rnd_seed', type=int, default=None, help='random seed to use for selecting columns') parser.add_argument('--out_dir', help='output directory') args = parser.parse_args() logger_setup(args.out_dir, "trim_msa.log", "trim_msa", __version__, False) try: p = TrimMSA(args.cols_per_gene, args.min_perc_aa, args.min_consensus, args.max_consensus, args.min_perc_taxa, args.rnd_seed, args.out_dir) p.run(args.msa, args.marker_list) except SystemExit: print( "\nControlled exit resulting from an unrecoverable error or warning." ) except: print("\nUnexpected error:", sys.exc_info()[0]) raise
help= 'The prefix of the GTDB-Tk output files specified in --gtdbtk_output_dir.' ) # Parse the arguments if len(sys.argv) == 1: print_help() sys.exit(0) elif sys.argv[1] in {'-h', '--h', '-help', '--help'}: print_help() sys.exit(0) else: args = parser.parse_args() logger_setup(args.out_dir if hasattr(args, 'out_dir') else None, "gtdbtk.log", 'GTDB to NCBI majority vote', __version__, False, hasattr(args, 'debug') and args.debug) logger = logging.getLogger('timestamp') try: p = Translate() p.run(args.gtdbtk_output_dir, args.ar122_metadata_file, args.bac120_metadata_file, args.output_file, args.gtdbtk_prefix) logger.info('Done.') except SystemExit: logger.error('Controlled exit resulting from early termination.') sys.exit(1) except KeyboardInterrupt: logger.error('Controlled exit resulting from interrupt signal.') sys.exit(1) except GTDBTkExit as e:
def main(): parser = argparse.ArgumentParser(prog='gtdbtk', add_help=False, conflict_handler='resolve') parser.add_argument('-f', '--force', action="store_true", default=False, help="overwrite existing files without prompting.") subparsers = parser.add_subparsers(help="--", dest='subparser_name') # de novo workflow denovo_wf_parser = subparsers.add_parser( 'de_novo_wf', conflict_handler='resolve', formatter_class=CustomHelpFormatter, help='Infer de novo tree and decorate with GTDB taxonomy.') mutual_genome_denovo_wf = denovo_wf_parser.add_argument_group( 'mutually exclusive required arguments') mutex_group = mutual_genome_denovo_wf.add_mutually_exclusive_group( required=True) mutex_group.add_argument( '--genome_dir', help="directory containing genome files in FASTA format") mutex_group.add_argument( '--batchfile', help= "file describing genomes - tab separated in 2 columns (FASTA file, genome ID)" ) mutual_ms_denovo_wf = denovo_wf_parser.add_argument_group( 'mutually exclusive required arguments') mutex_group = mutual_ms_denovo_wf.add_mutually_exclusive_group( required=True) mutex_group.add_argument('--bacteria', action='store_true', help='process bacterial genomes') mutex_group.add_argument('--archaea', action='store_true', help='process archaeal genomes') required_denovo_wf = denovo_wf_parser.add_argument_group( 'required named arguments') required_denovo_wf.add_argument( '--outgroup_taxon', required=True, help= "taxon to use as outgroup (e.g., p__Patescibacteria or p__Altiarchaeota)" ) required_denovo_wf.add_argument('--out_dir', required=True, help="directory to output files") optional_denovo_wf = denovo_wf_parser.add_argument_group( 'optional arguments') optional_denovo_wf.add_argument( '-x', '--extension', default='fna', help='extension of files to process, gz = gzipped') optional_denovo_wf.add_argument( '--skip_gtdb_refs', action="store_true", help= 'do not include GTDB reference genomes in multiple sequence alignment') optional_denovo_wf.add_argument( '--taxa_filter', help=('filter GTDB genomes to taxa (comma separated) within ' + 'specific taxonomic groups (e.g., d__Bacteria ' + 'or p__Proteobacteria, p__Actinobacteria)')) optional_denovo_wf.add_argument( '--min_perc_aa', type=float, default=10, help= 'filter genomes with an insufficient percentage of AA in the MSA (inclusive bound)' ) optional_denovo_wf.add_argument( '--custom_msa_filters', action="store_true", help= ('perform custom filtering of MSA with cols_per_gene, min_consensus ' + 'max_consensus, and min_perc_taxa parameters instead of using canonical mask' )) optional_denovo_wf.add_argument( '--cols_per_gene', type=int, default=42, help='maximum number of columns to retain per gene') optional_denovo_wf.add_argument( '--min_consensus', type=float, default=25, help= 'minimum percentage of the same amino acid required to retain column (inclusive bound)' ) optional_denovo_wf.add_argument( '--max_consensus', type=float, default=95, help= 'maximum percentage of the same amino acid required to retain column (exclusive bound)' ) optional_denovo_wf.add_argument( '--min_perc_taxa', type=float, default=50, help= 'minimum percentage of taxa required to retain column (inclusive bound)' ) optional_denovo_wf.add_argument( '--rnd_seed', type=int, default=None, help='random seed to use for selecting columns') optional_denovo_wf.add_argument( '--prot_model', choices=['JTT', 'WAG', 'LG'], help='protein substitution model for tree inference', default='WAG') optional_denovo_wf.add_argument( '--no_support', action="store_true", help= "do not compute local support values using the Shimodaira-Hasegawa test" ) optional_denovo_wf.add_argument( '--gamma', action="store_true", help="rescale branch lengths to optimize the Gamma20 likelihood") optional_denovo_wf.add_argument( '--gtdbtk_classification_file', help= "file with GTDB-Tk classifications produced by the `classify` command") optional_denovo_wf.add_argument( '--custom_taxonomy_file', help= "file indicating custom taxonomy string for at least the genomes belonging to the outgroup" ) optional_denovo_wf.add_argument('--prefix', default='gtdbtk', help='desired prefix for output files') optional_denovo_wf.add_argument('--cpus', default=1, type=int, help='number of CPUs to use') optional_denovo_wf.add_argument( '--force', action='store_const', const=True, default=False, help='continue processing if an error occurs on a single genome') optional_denovo_wf.add_argument( '--debug', action="store_true", help='create intermediate files for debugging purposes') optional_denovo_wf.add_argument('-h', '--help', action="help", help="show help message") # classify workflow classify_wf_parser = subparsers.add_parser( 'classify_wf', conflict_handler='resolve', formatter_class=CustomHelpFormatter, help='Classify genomes by placement in GTDB reference tree.') mutual_genome_classify_wf = classify_wf_parser.add_argument_group( 'mutually exclusive required arguments') mutex_group = mutual_genome_classify_wf.add_mutually_exclusive_group( required=True) mutex_group.add_argument( '--genome_dir', help="directory containing genome files in FASTA format") mutex_group.add_argument( '--batchfile', help= "file describing genomes - tab separated in 3 columns (FASTA file, genome ID, translation table [optional])" ) required_classify_wf = classify_wf_parser.add_argument_group( 'required named arguments') required_classify_wf.add_argument('--out_dir', required=True, help="directory to output files") optional_classify_wf = classify_wf_parser.add_argument_group( 'optional arguments') optional_classify_wf.add_argument( '-x', '--extension', default='fna', help='extension of files to process, gz = gzipped') optional_classify_wf.add_argument( '--min_perc_aa', type=float, default=10, help='filter genomes with an insufficient percentage of AA in the MSA') optional_classify_wf.add_argument('--prefix', required=False, default='gtdbtk', help='desired prefix for output files') optional_classify_wf.add_argument('--cpus', default=1, type=int, help='number of CPUs to use') optional_classify_wf.add_argument( '--pplacer_cpus', type=int, default=None, help='use PPLACER_CPUS during placement (default: CPUS)') optional_classify_wf.add_argument( '--force', action='store_const', const=True, default=False, help='continue processing if an error occurs on a single genome') optional_classify_wf.add_argument( '--scratch_dir', help='Reduce memory usage by writing to disk (slower).') optional_classify_wf.add_argument( '-r', '--recalculate_red', action='store_true', help= 'recalculate RED values based on the reference tree and all added user genomes' ) # optional_classify_wf.add_argument('-s', '--split_tree', action='store_true', # help='Use shards of the reference tree (for Bacteria only). reduce memory usage (slower).') optional_classify_wf.add_argument( '-d', '--debug', action="store_true", help='create intermediate files for debugging purposes') optional_classify_wf.add_argument('-h', '--help', action="help", help="show help message") # identify marker genes in genomes identify_parser = subparsers.add_parser( 'identify', conflict_handler='resolve', formatter_class=CustomHelpFormatter, help='Identify marker genes in genome.') mutex_identify = identify_parser.add_argument_group( 'mutually exclusive required arguments') mutex_group = mutex_identify.add_mutually_exclusive_group(required=True) mutex_group.add_argument( '--genome_dir', help="directory containing genome files in FASTA format") mutex_group.add_argument( '--batchfile', help= "file describing genomes - tab separated in 3 columns (FASTA file, genome ID, translation table [optional])" ) required_identify = identify_parser.add_argument_group( 'required named arguments') required_identify.add_argument('--out_dir', required=True, help="directory to output files") optional_identify = identify_parser.add_argument_group( 'optional arguments') optional_identify.add_argument( '-x', '--extension', default='fna', help='extension of files to process, gz = gzipped') optional_identify.add_argument('--prefix', default='gtdbtk', help='desired prefix for output files') optional_identify.add_argument('--cpus', default=1, type=int, help='number of CPUs to use') optional_identify.add_argument( '--force', action='store_const', const=True, default=False, help='continue processing if an error occurs on a single genome') optional_identify.add_argument('-h', '--help', action="help", help="show help message") # create multiple sequence alignment align_parser = subparsers.add_parser( 'align', conflict_handler='resolve', formatter_class=CustomHelpFormatter, help='Create multiple sequence alignment.', ) required_align = align_parser.add_argument_group( 'required named arguments') required_align.add_argument('--identify_dir', required=True, help="output directory of 'identify' command") required_align.add_argument('--out_dir', required=True, help='directory to output files') optional_align = align_parser.add_argument_group('optional arguments') optional_align.add_argument( '--skip_gtdb_refs', action="store_true", help= 'do not include GTDB reference genomes in multiple sequence alignment') optional_align.add_argument( '--taxa_filter', help=('filter GTDB genomes to taxa (comma separated) within ' + 'specific taxonomic groups (e.g., d__Bacteria ' + 'or p__Proteobacteria, p__Actinobacteria)')) optional_align.add_argument( '--min_perc_aa', type=float, default=10, help= 'filter genomes with an insufficient percentage of AA in the MSA (inclusive bound)' ) mutual_genome_align = align_parser.add_argument_group( 'mutually exclusive optional arguments') mutex_align_group = mutual_genome_align.add_mutually_exclusive_group() mutex_align_group.add_argument( '--custom_msa_filters', action="store_true", help= ('perform custom filtering of MSA with cols_per_gene, min_consensus ' + 'max_consensus, and min_perc_taxa parameters instead of using canonical mask' )) mutex_align_group.add_argument( '--skip_trimming', action="store_true", default=False, help='skip trimming step and return the full MSAs') optional_align.add_argument( '--cols_per_gene', type=int, default=42, help='maximum number of columns to retain per gene') optional_align.add_argument( '--min_consensus', type=float, default=25, help= 'minimum percentage of the same amino acid required to retain column (inclusive bound)' ) optional_align.add_argument( '--max_consensus', type=float, default=95, help= 'maximum percentage of the same amino acid required to retain column (exclusive bound)' ) optional_align.add_argument( '--min_perc_taxa', type=float, default=50, help= 'minimum percentage of taxa required to retain column (inclusive bound)' ) optional_align.add_argument( '--rnd_seed', type=int, default=None, help='random seed to use for selecting columns') optional_align.add_argument('--prefix', required=False, default='gtdbtk', help='desired prefix for output files') optional_align.add_argument('--cpus', default=1, type=int, help='number of CPUs to use') optional_align.add_argument( '--debug', action="store_true", help='create intermediate files for debugging purposes') optional_align.add_argument('-h', '--help', action="help", help="show help message") # infer tree infer_parser = subparsers.add_parser( 'infer', conflict_handler='resolve', formatter_class=CustomHelpFormatter, help='Infer tree from multiple sequence alignment.', ) required_infer = infer_parser.add_argument_group( 'required named arguments') required_infer.add_argument( '--msa_file', required=True, help="multiple sequence alignment in FASTA format") required_infer.add_argument('--out_dir', required=True, help='directory to output files') optional_infer = infer_parser.add_argument_group('optional arguments') optional_infer.add_argument( '--prot_model', choices=['JTT', 'WAG', 'LG'], help='protein substitution model for tree inference', default='WAG') optional_infer.add_argument( '--no_support', action="store_true", help= "do not compute local support values using the Shimodaira-Hasegawa test" ) optional_infer.add_argument( '--gamma', action="store_true", help="rescale branch lengths to optimize the Gamma20 likelihood") optional_infer.add_argument('--prefix', required=False, default='gtdbtk', help='desired prefix for output files') optional_infer.add_argument('--cpus', default=1, type=int, help='number of CPUs to use') optional_infer.add_argument('-h', '--help', action="help", help="show help message") # classify genomes via placement with pplacer classify_parser = subparsers.add_parser( 'classify', conflict_handler='resolve', formatter_class=CustomHelpFormatter, help='Determine taxonomic classification of genomes.', ) mutual_genome_classify = classify_parser.add_argument_group( 'mutually exclusive required arguments') mutex_group = mutual_genome_classify.add_mutually_exclusive_group( required=True) mutex_group.add_argument( '--genome_dir', help="directory containing genome files in FASTA format") mutex_group.add_argument( '--batchfile', help= "file describing genomes - tab separated in 2 columns (FASTA file, genome ID)" ) required_classify = classify_parser.add_argument_group( 'required named arguments') required_classify.add_argument('--align_dir', required=True, help="output directory of 'align' command") required_classify.add_argument('--out_dir', required=True, help='directory to output files') optional_classify = classify_parser.add_argument_group( 'optional arguments') optional_classify.add_argument( '-x', '--extension', default='fna', help='extension of files to process, gz = gzipped') optional_classify.add_argument('--prefix', required=False, default='gtdbtk', help='desired prefix for output files') optional_classify.add_argument('--cpus', default=1, type=int, help='number of CPUs to use') optional_classify.add_argument( '--pplacer_cpus', type=int, default=None, help='use PPLACER_CPUS during placement (default: CPUS)') optional_classify.add_argument( '--scratch_dir', help='reduce memory usage by writing to disk (slower)') # optional_classify.add_argument('-s', '--split_tree', action='store_true', # help='Use shards of the reference tree (for Bacteria only). reduce memory usage (slower).') optional_classify.add_argument( '-r', '--recalculate_red', action='store_true', help= 'recalculate RED values based on the reference tree and all added user genomes' ) optional_classify.add_argument( '--debug', action="store_true", help='create intermediate files for debugging purposes') optional_classify.add_argument('-h', '--help', action="help", help="show help message") # root tree using outgroup root_parser = subparsers.add_parser( 'root', conflict_handler='resolve', formatter_class=CustomHelpFormatter, help='Root tree using an outgroup.', ) required_root = root_parser.add_argument_group('required named arguments') required_root.add_argument('--input_tree', required=True, help="tree to root in Newick format") required_root.add_argument( '--outgroup_taxon', required=True, help= "taxon to use as outgroup (e.g., p__Patescibacteria or p__Altiarchaeota)" ) required_root.add_argument('--output_tree', required=True, help='output tree') optional_root = root_parser.add_argument_group('optional arguments') optional_root.add_argument( '--gtdbtk_classification_file', help= "file with GTDB-Tk classifications produced by the `classify` command") optional_root.add_argument( '--custom_taxonomy_file', help= "file indicating custom taxonomy strings for user genomes, which should contain any genomes belonging to the outgroup" ) optional_root.add_argument('-h', '--help', action="help", help="show help message") # decorate tree decorate_parser = subparsers.add_parser( 'decorate', conflict_handler='resolve', formatter_class=CustomHelpFormatter, help='Decorate tree with GTDB taxonomy.', ) required_decorate = decorate_parser.add_argument_group( 'required named arguments') required_decorate.add_argument('--input_tree', required=True, help="tree to root in Newick format") required_decorate.add_argument('--output_tree', required=True, help='output tree') optional_decorate = decorate_parser.add_argument_group( 'optional arguments') optional_decorate.add_argument( '--gtdbtk_classification_file', help= "file with GTDB-Tk classifications produced by the `classify` command") optional_decorate.add_argument( '--custom_taxonomy_file', help="file indicating custom taxonomy strings for user genomes") optional_decorate.add_argument('-h', '--help', action="help", help="show help message") # establish taxonomic ranks of internal nodes using RED infer_ranks_parser = subparsers.add_parser( 'infer_ranks', conflict_handler='resolve', formatter_class=CustomHelpFormatter, help='Establish taxonomic ranks of internal nodes using RED.', ) infer_ranks_req = infer_ranks_parser.add_argument_group( 'required named arguments') infer_ranks_req.add_argument( '--input_tree', required=True, help="rooted input tree with labelled ingroup taxon") infer_ranks_req.add_argument( '--ingroup_taxon', required=True, help= "labelled ingroup taxon to use as root for establish RED values (e.g., c__Bacilli or f__Lactobacillaceae" ) infer_ranks_req.add_argument('--output_tree', required=True, help="output tree") infer_ranks_opt = infer_ranks_parser.add_argument_group( 'optional arguments') infer_ranks_opt.add_argument('-h', '--help', action="help", help="show help message") # ani_rep ani_rep_parser = subparsers.add_parser( 'ani_rep', conflict_handler='resolve', formatter_class=CustomHelpFormatter, help='Calculates ANI to GTDB representative genomes.', ) # ani_rep mutex required input genomes ani_rep_mutex_genome = ani_rep_parser.add_argument_group( 'mutually exclusive required arguments') ani_rep_mutex_in = ani_rep_mutex_genome.add_mutually_exclusive_group( required=True) ani_rep_mutex_in.add_argument( '--genome_dir', help="directory containing genome files in FASTA format") ani_rep_mutex_in.add_argument( '--batchfile', help= "file describing genomes - tab separated in 2 columns (FASTA file, genome ID)" ) # ani_rep required arguments ani_rep_req = ani_rep_parser.add_argument_group('required named arguments') ani_rep_req.add_argument('--out_dir', required=True, help="directory to output files") # ani_rep mash arguments ani_rep_mash = ani_rep_parser.add_argument_group('optional Mash arguments') ani_rep_mash.add_argument('--no_mash', action='store_const', const=True, default=False, help='skip pre-filtering using MASH') ani_rep_mash.add_argument('--mash_k', default=16, type=int, help='k-mer size [1-32]') ani_rep_mash.add_argument('--mash_s', default=5000, type=int, help='maximum number of non-redundant hashes') ani_rep_mash.add_argument('--mash_d', default=0.1, type=float, help='maximum distance to keep [0-1]') ani_rep_mash.add_argument('--mash_v', default=1.0, type=float, help='maximum p-value to keep [0-1]') ani_rep_fastani_opt = ani_rep_parser.add_argument_group( 'optional FastANI arguments') ani_rep_fastani_opt.add_argument( '--min_af', default=AF_THRESHOLD, type=float, help='alignment fraction to consider closest genome') # ani_rep optional arguments ani_rep_opt = ani_rep_parser.add_argument_group('optional arguments') ani_rep_opt.add_argument( '-x', '--extension', default='fna', help='extension of files to process, gz = gzipped') ani_rep_opt.add_argument('--prefix', default='gtdbtk', help='desired prefix for output files') ani_rep_opt.add_argument('--cpus', default=1, type=int, help='number of CPUs to use') ani_rep_opt.add_argument('-h', '--help', action="help", help="show help message") # test test_parser = subparsers.add_parser( 'test', conflict_handler='resolve', formatter_class=CustomHelpFormatter, help='Test the classify_wf pipeline with 3 archaeal genomes.') required_test = test_parser.add_argument_group('required named arguments') required_test.add_argument('--out_dir', required=True, help='directory to output files') optional_test = test_parser.add_argument_group('optional arguments') optional_test.add_argument('--cpus', default=1, type=int, help='number of CPUs to use') optional_test.add_argument('-h', '--help', action="help", help="show help message") # trim MSA msa_parser = subparsers.add_parser( 'trim_msa', conflict_handler='resolve', formatter_class=CustomHelpFormatter, help='Trim an untrimmed MSA file based on a mask.', ) required_msa = msa_parser.add_argument_group('required named arguments') required_msa.add_argument('--untrimmed_msa', required=True, help="untrimmed MSA file") required_msa.add_argument('--output', required=True, help='output file') mutual_trim_msa = msa_parser.add_argument_group( 'mutually exclusive required arguments') mutex_msa_group = mutual_trim_msa.add_mutually_exclusive_group( required=True) mutex_msa_group.add_argument('--mask_file', help="mask file to use for trimming the MSA") mutex_msa_group.add_argument( '--reference_mask', choices=['arc', 'bac'], help="reference mask already present in GTDB-Tk") optional_msa = msa_parser.add_argument_group('optional arguments') optional_msa.add_argument('-h', '--help', action="help", help="show help message") # export msa export_msa_parser = subparsers.add_parser( 'export_msa', conflict_handler='resolve', formatter_class=CustomHelpFormatter, help='Export the untrimmed archaeal or bacterial MSA file.', ) required_export_msa = export_msa_parser.add_argument_group( 'required named arguments') required_export_msa.add_argument('--domain', required=True, choices=['arc', 'bac'], help="select domain to download") required_export_msa.add_argument('--output', required=True, help='output file') optional_export_msa = export_msa_parser.add_argument_group( 'optional arguments') optional_export_msa.add_argument('-h', '--help', action="help", help="show help message") # verify install check_install_parser = subparsers.add_parser( 'check_install', conflict_handler='resolve', formatter_class=CustomHelpFormatter, help='Verify if all gtdb data files are present to run GTDB-Tk.', ) optional_check_install = check_install_parser.add_argument_group( 'optional arguments') optional_check_install.add_argument('-h', '--help', action="help", help="show help message") # ------------------------------------------------- # get and check options args = None if len(sys.argv) == 1: print_help() sys.exit(0) elif sys.argv[1] in {'-v', '--v', '-version', '--version'}: print("gtdbtk: version %s %s %s" % (__version__, __copyright__, __author__)) sys.exit(0) elif sys.argv[1] in {'-h', '--h', '-help', '--help'}: print_help() sys.exit(0) else: args = parser.parse_args() # setup logger logger_setup(args.out_dir if hasattr(args, 'out_dir') else None, "gtdbtk.log", "GTDB-Tk", __version__, False, hasattr(args, 'debug') and args.debug) logger = logging.getLogger('timestamp') # ------------------------------------------------- # do what we came here to do try: gt_parser = OptionsParser(__version__) if False: import cProfile cProfile.run('gt_parser.parseOptions(args)', 'prof') else: gt_parser.parse_options(args) except SystemExit: sys.stdout.write('\n') sys.stdout.flush() logger.error('Controlled exit resulting from early termination.') sys.exit(1) except KeyboardInterrupt: sys.stdout.write('\n') sys.stdout.flush() logger.error('Controlled exit resulting from interrupt signal.') sys.exit(1) except GTDBTkExit as e: sys.stdout.write('\n') sys.stdout.flush() if len(str(e)) > 0: logger.error('{}'.format(e)) logger.error( 'Controlled exit resulting from an unrecoverable error or warning.' ) sys.exit(1) except (GTDBTkException, BioLibError) as e: sys.stdout.write('\n') sys.stdout.flush() msg = 'Controlled exit resulting from an unrecoverable error or warning.\n\n' msg += '=' * 80 + '\n' msg += 'EXCEPTION: {}\n'.format(type(e).__name__) msg += ' MESSAGE: {}\n'.format(e) msg += '_' * 80 + '\n\n' msg += traceback.format_exc() msg += '=' * 80 logger.error(msg) sys.exit(1) except Exception as e: sys.stdout.write('\n') sys.stdout.flush() msg = 'Uncontrolled exit resulting from an unexpected error.\n\n' msg += '=' * 80 + '\n' msg += 'EXCEPTION: {}\n'.format(type(e).__name__) msg += ' MESSAGE: {}\n'.format(e) msg += '_' * 80 + '\n\n' msg += traceback.format_exc() msg += '=' * 80 logger.error(msg) sys.exit(1)
unroot_tree_parser.add_argument('output_tree', help='') unroot_tree_parser.add_argument( '--silent', help="suppress output", action='store_true') # get and check options args = None if len(sys.argv) == 1 or sys.argv[1] == '-h' or sys.argv == '--help': print_help() sys.exit(0) else: args = parser.parse_args() try: logger_setup(args.output_dir, 'gtdbtk_toolset.log', 'GTDB Tk converter', version(), args.silent) except: logger_setup(None, 'gtdbtk_toolset.log', 'GTDB Tk converter', __version__, args.silent) # do what we came here to do try: parser = OptionsParser() if False: # import pstats # p = pstats.Stats('prof')