def identify(self, genomes, out_dir, prefix, force): """Identify marker genes in genomes.""" check_dependencies(['prodigal', 'hmmsearch']) try: self.logger.info('Identifying markers in %d genomes with %d threads.' % (len(genomes), self.cpus)) self.logger.info("Running Prodigal to identify genes.") self.marker_gene_dir = os.path.join(out_dir, DIR_MARKER_GENE) prodigal = Prodigal(self.cpus, False, self.marker_gene_dir, self.protein_file_suffix, self.nt_gene_file_suffix, self.gff_file_suffix, force) genome_dictionary = prodigal.run(genomes) # annotated genes against TIGRFAM and Pfam databases self.logger.info("Identifying TIGRFAM protein families.") gene_files = [genome_dictionary[db_genome_id]['aa_gene_path'] for db_genome_id in genome_dictionary.keys()] tigr_search = TigrfamSearch(self.cpus, self.tigrfam_hmms, self.protein_file_suffix, self.tigrfam_suffix, self.tigrfam_top_hit_suffix, self.checksum_suffix, self.marker_gene_dir) tigr_search.run(gene_files) self.logger.info("Identifying Pfam protein families.") pfam_search = PfamSearch(self.cpus, self.pfam_hmm_dir, self.protein_file_suffix, self.pfam_suffix, self.pfam_top_hit_suffix, self.checksum_suffix, self.marker_gene_dir) pfam_search.run(gene_files) self._report_identified_marker_genes( genome_dictionary, out_dir, self.marker_gene_dir, prefix) except IOError as e: self.logger.error(str(e)) self.logger.error("GTDB-Tk has encountered an error.") except Exception as e: self.logger.error(str(e)) raise
def infer(self, options): """Infer tree from MSA.""" check_file_exists(options.msa_file) make_sure_path_exists(options.out_dir) if options.cpus > 1: check_dependencies(['FastTreeMP']) else: check_dependencies(['FastTree']) self.logger.info('Inferring tree with FastTree using %s+GAMMA.' % options.prot_model) if hasattr(options, 'suffix'): output_tree = os.path.join( options.out_dir, options.prefix + options.suffix + '.unrooted.tree') tree_log = os.path.join( options.out_dir, options.prefix + options.suffix + '.tree.log') fasttree_log = os.path.join( options.out_dir, options.prefix + options.suffix + '.fasttree.log') else: output_tree = os.path.join(options.out_dir, options.prefix + '.unrooted.tree') tree_log = os.path.join(options.out_dir, options.prefix + '.tree.log') fasttree_log = os.path.join(options.out_dir, options.prefix + '.fasttree.log') if options.prot_model == 'JTT': model_str = '' elif options.prot_model == 'WAG': model_str = ' -wag' elif options.prot_model == 'LG': model_str = ' -lg' support_str = '' if options.no_support: support_str = ' -nosupport' gamma_str = ' -gamma' if options.no_gamma: gamma_str = '' cmd = '-quiet%s%s%s -log %s %s > %s 2> %s' % ( support_str, model_str, gamma_str, tree_log, options.msa_file, output_tree, fasttree_log) if options.cpus > 1: cmd = 'FastTreeMP ' + cmd else: cmd = 'FastTree ' + cmd self.logger.info('Running: %s' % cmd) os.system(cmd) self.logger.info('Done.')
def parse_options(self, options): """Parse user options and call the correct pipeline(s)""" if (options.subparser_name == 'de_novo_wf'): check_dependencies(['prodigal', 'hmmalign']) if (options.cpus > 1): check_dependencies(['FastTreeMP']) else: check_dependencies(['FastTree']) self.identify(options) options.identify_dir = options.out_dir self.align(options) if options.bac120_ms: options.suffix = ".bac120" else: options.suffix = ".ar122" if options.skip_gtdb_refs: options.msa_file = os.path.join( options.out_dir, Config.INTERMEDIATE_RESULTS, options.prefix + options.suffix + ".user_msa.fasta") else: options.msa_file = os.path.join( options.out_dir, Config.INTERMEDIATE_RESULTS, options.prefix + options.suffix + ".msa.fasta") self.infer(options) options.input_tree = os.path.join( options.out_dir, Config.INTERMEDIATE_RESULTS, options.prefix + options.suffix + ".unrooted.tree") options.output_tree = os.path.join( options.out_dir, options.prefix + options.suffix + ".rooted.tree") self.root(options) self.decorate(options) elif (options.subparser_name == 'classify_wf'): check_dependencies( ['prodigal', 'hmmalign', 'pplacer', 'guppy', 'fastANI']) self.identify(options) options.identify_dir = options.out_dir options.align_dir = options.out_dir options.taxa_filter = None options.custom_msa_filters = False options.min_consensus = None options.min_perc_taxa = None options.skip_gtdb_refs = False options.cols_per_gene = None options.max_consensus = None options.rnd_seed = None self.align(options) self.classify(options) elif (options.subparser_name == 'identify'): self.identify(options) elif (options.subparser_name == 'align'): self.align(options) elif (options.subparser_name == 'infer'): self.infer(options) elif (options.subparser_name == 'classify'): self.classify(options) elif (options.subparser_name == 'root'): self.root(options) elif (options.subparser_name == 'decorate'): self.decorate(options) elif (options.subparser_name == 'trim_msa'): self.trim_msa(options) elif (options.subparser_name == 'test'): self.run_test(options) elif (options.subparser_name == 'check_install'): self.check_install() else: self.logger.error('Unknown GTDB-Tk command: "' + options.subparser_name + '"\n') sys.exit() return 0
def parse_options(self, options): """Parse user options and call the correct pipeline(s)""" if (options.subparser_name == 'de_novo_wf'): check_dependencies(['prodigal', 'hmmalign']) if (options.cpus > 1): check_dependencies(['FastTreeMP']) else: check_dependencies(['FastTree']) self.identify(options) options.identify_dir = options.out_dir self.align(options) if options.bac120_ms: options.suffix = "bac120" else: options.suffix = "ar122" if options.skip_gtdb_refs: if options.suffix == 'bac120': options.msa_file = os.path.join( options.out_dir, PATH_BAC120_USER_MSA.format(prefix=options.prefix)) elif options.suffix == 'ar122': options.msa_file = os.path.join( options.out_dir, PATH_AR122_USER_MSA.format(prefix=options.prefix)) else: self.logger.error( 'There was an error determining the marker set.') raise GenomeMarkerSetUnknown else: if options.suffix == 'bac120': options.msa_file = os.path.join( options.out_dir, PATH_BAC120_MSA.format(prefix=options.prefix)) elif options.suffix == 'ar122': options.msa_file = os.path.join( options.out_dir, PATH_AR122_MSA.format(prefix=options.prefix)) else: self.logger.error( 'There was an error determining the marker set.') raise GenomeMarkerSetUnknown self.infer(options) if options.suffix == 'bac120': options.input_tree = os.path.join( options.out_dir, PATH_BAC120_UNROOTED_TREE.format(prefix=options.prefix)) options.output_tree = os.path.join( options.out_dir, PATH_BAC120_ROOTED_TREE.format(prefix=options.prefix)) elif options.suffix == 'ar122': options.input_tree = os.path.join( options.out_dir, PATH_AR122_UNROOTED_TREE.format(prefix=options.prefix)) options.output_tree = os.path.join( options.out_dir, PATH_AR122_ROOTED_TREE.format(prefix=options.prefix)) else: self.logger.error( 'There was an error determining the marker set.') raise GenomeMarkerSetUnknown self.root(options) self.decorate(options) elif (options.subparser_name == 'classify_wf'): check_dependencies( ['prodigal', 'hmmalign', 'pplacer', 'guppy', 'fastANI']) self.identify(options) options.identify_dir = options.out_dir options.align_dir = options.out_dir options.taxa_filter = None options.custom_msa_filters = False options.skip_trimming = False # Added here due to the other mutex argument being include above. options.min_consensus = None options.min_perc_taxa = None options.skip_gtdb_refs = False options.cols_per_gene = None options.max_consensus = None options.rnd_seed = None self.align(options) self.classify(options) elif (options.subparser_name == 'identify'): self.identify(options) elif (options.subparser_name == 'align'): self.align(options) elif (options.subparser_name == 'infer'): self.infer(options) elif (options.subparser_name == 'classify'): self.classify(options) elif (options.subparser_name == 'root'): self.root(options) elif (options.subparser_name == 'decorate'): self.decorate(options) elif (options.subparser_name == 'trim_msa'): self.trim_msa(options) elif (options.subparser_name == 'export_msa'): self.export_msa(options) elif (options.subparser_name == 'test'): self.run_test(options) elif (options.subparser_name == 'check_install'): self.check_install() else: self.logger.error('Unknown GTDB-Tk command: "' + options.subparser_name + '"\n') sys.exit() return 0
def infer(self, options): """Infer tree from MSA.""" check_file_exists(options.msa_file) make_sure_path_exists(options.out_dir) if options.cpus > 1: check_dependencies(['FastTreeMP']) os.environ['OMP_NUM_THREADS'] = '%d' % options.cpus else: check_dependencies(['FastTree']) self.logger.info('Inferring tree with FastTree using %s+GAMMA.' % options.prot_model) if hasattr(options, 'suffix'): output_tree = os.path.join( options.out_dir, PATH_MARKER_UNROOTED_TREE.format(prefix=options.prefix, marker=options.suffix)) tree_log = os.path.join( options.out_dir, PATH_MARKER_TREE_LOG.format(prefix=options.prefix, marker=options.suffix)) fasttree_log = os.path.join( options.out_dir, PATH_MARKER_FASTTREE_LOG.format(prefix=options.prefix, marker=options.suffix)) else: output_tree = os.path.join( options.out_dir, PATH_UNROOTED_TREE.format(prefix=options.prefix)) tree_log = os.path.join( options.out_dir, PATH_TREE_LOG.format(prefix=options.prefix)) fasttree_log = os.path.join( options.out_dir, PATH_FASTTREE_LOG.format(prefix=options.prefix)) make_sure_path_exists(os.path.dirname(output_tree)) make_sure_path_exists(os.path.dirname(tree_log)) make_sure_path_exists(os.path.dirname(fasttree_log)) if options.prot_model == 'JTT': model_str = '' elif options.prot_model == 'WAG': model_str = ' -wag' elif options.prot_model == 'LG': model_str = ' -lg' support_str = '' if options.no_support: support_str = ' -nosupport' gamma_str = ' -gamma' gamma_str_info = '+GAMMA' if options.no_gamma: gamma_str = '' gamma_str_info = '' self.logger.info('Inferring tree with FastTree using {}.'.format( options.prot_model, gamma_str_info)) cmd = '-quiet%s%s%s -log %s %s > %s 2> %s' % ( support_str, model_str, gamma_str, tree_log, options.msa_file, output_tree, fasttree_log) if options.cpus > 1: cmd = 'FastTreeMP ' + cmd else: cmd = 'FastTree ' + cmd self.logger.info('Running: %s' % cmd) os.system(cmd) self.logger.info('Done.')