def run_test(self, options): """Run test of classify workflow.""" make_sure_path_exists(options.out_dir) output_dir = os.path.join(options.out_dir, 'output') genome_test_dir = os.path.join(options.out_dir, 'genomes') if os.path.exists(genome_test_dir): self.logger.error( 'Test directory {} already exists. Test must be run with a new directory.' .format(genome_test_dir)) sys.exit(-1) current_path = os.path.dirname(os.path.realpath(__file__)) input_dir = os.path.join(current_path, 'tests', 'data', 'genomes') shutil.copytree(input_dir, genome_test_dir) cmd = 'gtdbtk classify_wf --genome_dir {} --out_dir {} --cpus {}'.format( genome_test_dir, output_dir, options.cpus) print("Command:") print(cmd) os.system(cmd) summary_file = os.path.join( output_dir, PATH_AR122_SUMMARY_OUT.format(prefix='gtdbtk')) if not os.path.exists(summary_file): print("{} is missing.\nTest has failed.".format(summary_file)) sys.exit(-1) self.logger.info('Test has successfully finished.')
def run_test(self, options): make_sure_path_exists(options.out_dir) genome_test_dir = os.path.join(options.out_dir, 'genomes') output_dir = os.path.join(options.out_dir, 'output') if os.path.isdir(genome_test_dir): shutil.rmtree(genome_test_dir) current_path = os.path.dirname(os.path.realpath(__file__)) input_dir = os.path.join(current_path, 'tests', 'data', 'genomes') shutil.copytree(input_dir, genome_test_dir) cmd = 'gtdbtk classify_wf --genome_dir {} --out_dir {} --cpus {}'.format( genome_test_dir, output_dir, options.cpus) print "Command:" print cmd os.system(cmd) summary_file = os.path.join(output_dir, 'gtdbtk.ar122.summary.tsv') if not os.path.exists(summary_file): print "{} is missing.\nTest has failed.".format(summary_file) sys.exit(-1) self.logger.info('Test has successfully finished.')
def export_msa(self, domain, output_file): file_to_export = Config.CONCAT_BAC120 if domain == 'arc': file_to_export = Config.CONCAT_AR122 make_sure_path_exists(os.path.dirname(output_file)) copyfile(file_to_export, output_file)
def infer(self, options): """Infer tree from MSA.""" check_file_exists(options.msa_file) make_sure_path_exists(options.out_dir) if options.cpus > 1: check_dependencies(['FastTreeMP']) else: check_dependencies(['FastTree']) self.logger.info('Inferring tree with FastTree using %s+GAMMA.' % options.prot_model) if hasattr(options, 'suffix'): output_tree = os.path.join( options.out_dir, options.prefix + options.suffix + '.unrooted.tree') tree_log = os.path.join( options.out_dir, options.prefix + options.suffix + '.tree.log') fasttree_log = os.path.join( options.out_dir, options.prefix + options.suffix + '.fasttree.log') else: output_tree = os.path.join(options.out_dir, options.prefix + '.unrooted.tree') tree_log = os.path.join(options.out_dir, options.prefix + '.tree.log') fasttree_log = os.path.join(options.out_dir, options.prefix + '.fasttree.log') if options.prot_model == 'JTT': model_str = '' elif options.prot_model == 'WAG': model_str = ' -wag' elif options.prot_model == 'LG': model_str = ' -lg' support_str = '' if options.no_support: support_str = ' -nosupport' gamma_str = ' -gamma' if options.no_gamma: gamma_str = '' cmd = '-quiet%s%s%s -log %s %s > %s 2> %s' % ( support_str, model_str, gamma_str, tree_log, options.msa_file, output_tree, fasttree_log) if options.cpus > 1: cmd = 'FastTreeMP ' + cmd else: cmd = 'FastTree ' + cmd self.logger.info('Running: %s' % cmd) os.system(cmd) self.logger.info('Done.')
def classify(self, options): """Determine taxonomic classification of genomes.""" check_dir_exists(options.align_dir) make_sure_path_exists(options.out_dir) if options.scratch_dir: make_sure_path_exists(options.scratch_dir) genomes = self._genomes_to_process(options.genome_dir, options.batchfile, options.extension) classify = Classify(options.cpus) classify.run(genomes, options.align_dir, options.out_dir, options.prefix, options.scratch_dir, options.debug) self.logger.info('Done.')
def align(self, options): """Create MSA from marker genes.""" check_dir_exists(options.identify_dir) make_sure_path_exists(options.out_dir) if not hasattr(options, 'outgroup_taxon'): options.outgroup_taxon = None markers = Markers(options.cpus) markers.align(options.identify_dir, options.skip_gtdb_refs, options.taxa_filter, options.min_perc_aa, options.custom_msa_filters, options.rnd_seed, options.cols_per_gene, options.min_consensus, options.max_consensus, options.min_perc_taxa, options.out_dir, options.prefix, options.outgroup_taxon) self.logger.info('Done.')
def identify(self, options): """Identify marker genes in genomes.""" if options.genome_dir: check_dir_exists(options.genome_dir) if options.batchfile: check_file_exists(options.batchfile) make_sure_path_exists(options.out_dir) genomes = self._genomes_to_process(options.genome_dir, options.batchfile, options.extension) markers = Markers(options.cpus) markers.identify(genomes, options.out_dir, options.prefix) self.logger.info('Done.')
def infer(self, options): """Infer tree from MSA.""" check_file_exists(options.msa_file) make_sure_path_exists(options.out_dir) if options.cpus > 1: check_dependencies(['FastTreeMP']) os.environ['OMP_NUM_THREADS'] = '%d' % options.cpus else: check_dependencies(['FastTree']) self.logger.info('Inferring tree with FastTree using %s+GAMMA.' % options.prot_model) if hasattr(options, 'suffix'): output_tree = os.path.join( options.out_dir, PATH_MARKER_UNROOTED_TREE.format(prefix=options.prefix, marker=options.suffix)) tree_log = os.path.join( options.out_dir, PATH_MARKER_TREE_LOG.format(prefix=options.prefix, marker=options.suffix)) fasttree_log = os.path.join( options.out_dir, PATH_MARKER_FASTTREE_LOG.format(prefix=options.prefix, marker=options.suffix)) else: output_tree = os.path.join( options.out_dir, PATH_UNROOTED_TREE.format(prefix=options.prefix)) tree_log = os.path.join( options.out_dir, PATH_TREE_LOG.format(prefix=options.prefix)) fasttree_log = os.path.join( options.out_dir, PATH_FASTTREE_LOG.format(prefix=options.prefix)) make_sure_path_exists(os.path.dirname(output_tree)) make_sure_path_exists(os.path.dirname(tree_log)) make_sure_path_exists(os.path.dirname(fasttree_log)) if options.prot_model == 'JTT': model_str = '' elif options.prot_model == 'WAG': model_str = ' -wag' elif options.prot_model == 'LG': model_str = ' -lg' support_str = '' if options.no_support: support_str = ' -nosupport' gamma_str = ' -gamma' gamma_str_info = '+GAMMA' if options.no_gamma: gamma_str = '' gamma_str_info = '' self.logger.info('Inferring tree with FastTree using {}.'.format( options.prot_model, gamma_str_info)) cmd = '-quiet%s%s%s -log %s %s > %s 2> %s' % ( support_str, model_str, gamma_str, tree_log, options.msa_file, output_tree, fasttree_log) if options.cpus > 1: cmd = 'FastTreeMP ' + cmd else: cmd = 'FastTree ' + cmd self.logger.info('Running: %s' % cmd) os.system(cmd) self.logger.info('Done.')