Beispiel #1
0
    def run_test(self, options):
        """Run test of classify workflow."""

        make_sure_path_exists(options.out_dir)

        output_dir = os.path.join(options.out_dir, 'output')
        genome_test_dir = os.path.join(options.out_dir, 'genomes')
        if os.path.exists(genome_test_dir):
            self.logger.error(
                'Test directory {} already exists. Test must be run with a new directory.'
                .format(genome_test_dir))
            sys.exit(-1)

        current_path = os.path.dirname(os.path.realpath(__file__))
        input_dir = os.path.join(current_path, 'tests', 'data', 'genomes')

        shutil.copytree(input_dir, genome_test_dir)

        cmd = 'gtdbtk classify_wf --genome_dir {} --out_dir {} --cpus {}'.format(
            genome_test_dir, output_dir, options.cpus)
        print("Command:")
        print(cmd)
        os.system(cmd)
        summary_file = os.path.join(
            output_dir, PATH_AR122_SUMMARY_OUT.format(prefix='gtdbtk'))

        if not os.path.exists(summary_file):
            print("{} is missing.\nTest has failed.".format(summary_file))
            sys.exit(-1)

        self.logger.info('Test has successfully finished.')
Beispiel #2
0
    def run_test(self, options):
        make_sure_path_exists(options.out_dir)

        genome_test_dir = os.path.join(options.out_dir, 'genomes')
        output_dir = os.path.join(options.out_dir, 'output')

        if os.path.isdir(genome_test_dir):
            shutil.rmtree(genome_test_dir)

        current_path = os.path.dirname(os.path.realpath(__file__))
        input_dir = os.path.join(current_path, 'tests', 'data', 'genomes')

        shutil.copytree(input_dir, genome_test_dir)

        cmd = 'gtdbtk classify_wf --genome_dir {} --out_dir {} --cpus {}'.format(
            genome_test_dir, output_dir, options.cpus)
        print "Command:"
        print cmd
        os.system(cmd)
        summary_file = os.path.join(output_dir, 'gtdbtk.ar122.summary.tsv')

        if not os.path.exists(summary_file):
            print "{} is missing.\nTest has failed.".format(summary_file)
            sys.exit(-1)

        self.logger.info('Test has successfully finished.')
Beispiel #3
0
    def export_msa(self, domain, output_file):
        file_to_export = Config.CONCAT_BAC120
        if domain == 'arc':
            file_to_export = Config.CONCAT_AR122

        make_sure_path_exists(os.path.dirname(output_file))
        copyfile(file_to_export, output_file)
Beispiel #4
0
    def infer(self, options):
        """Infer tree from MSA."""

        check_file_exists(options.msa_file)
        make_sure_path_exists(options.out_dir)

        if options.cpus > 1:
            check_dependencies(['FastTreeMP'])
        else:
            check_dependencies(['FastTree'])

        self.logger.info('Inferring tree with FastTree using %s+GAMMA.' %
                         options.prot_model)

        if hasattr(options, 'suffix'):
            output_tree = os.path.join(
                options.out_dir,
                options.prefix + options.suffix + '.unrooted.tree')
            tree_log = os.path.join(
                options.out_dir, options.prefix + options.suffix + '.tree.log')
            fasttree_log = os.path.join(
                options.out_dir,
                options.prefix + options.suffix + '.fasttree.log')
        else:
            output_tree = os.path.join(options.out_dir,
                                       options.prefix + '.unrooted.tree')
            tree_log = os.path.join(options.out_dir,
                                    options.prefix + '.tree.log')
            fasttree_log = os.path.join(options.out_dir,
                                        options.prefix + '.fasttree.log')

        if options.prot_model == 'JTT':
            model_str = ''
        elif options.prot_model == 'WAG':
            model_str = ' -wag'
        elif options.prot_model == 'LG':
            model_str = ' -lg'

        support_str = ''
        if options.no_support:
            support_str = ' -nosupport'

        gamma_str = ' -gamma'
        if options.no_gamma:
            gamma_str = ''

        cmd = '-quiet%s%s%s -log %s %s > %s 2> %s' % (
            support_str, model_str, gamma_str, tree_log, options.msa_file,
            output_tree, fasttree_log)
        if options.cpus > 1:
            cmd = 'FastTreeMP ' + cmd
        else:
            cmd = 'FastTree ' + cmd
        self.logger.info('Running: %s' % cmd)
        os.system(cmd)

        self.logger.info('Done.')
Beispiel #5
0
    def classify(self, options):
        """Determine taxonomic classification of genomes."""

        check_dir_exists(options.align_dir)
        make_sure_path_exists(options.out_dir)
        if options.scratch_dir:
            make_sure_path_exists(options.scratch_dir)

        genomes = self._genomes_to_process(options.genome_dir,
                                           options.batchfile,
                                           options.extension)

        classify = Classify(options.cpus)
        classify.run(genomes, options.align_dir, options.out_dir,
                     options.prefix, options.scratch_dir, options.debug)

        self.logger.info('Done.')
Beispiel #6
0
    def align(self, options):
        """Create MSA from marker genes."""

        check_dir_exists(options.identify_dir)
        make_sure_path_exists(options.out_dir)

        if not hasattr(options, 'outgroup_taxon'):
            options.outgroup_taxon = None

        markers = Markers(options.cpus)
        markers.align(options.identify_dir, options.skip_gtdb_refs,
                      options.taxa_filter, options.min_perc_aa,
                      options.custom_msa_filters, options.rnd_seed,
                      options.cols_per_gene, options.min_consensus,
                      options.max_consensus, options.min_perc_taxa,
                      options.out_dir, options.prefix, options.outgroup_taxon)

        self.logger.info('Done.')
Beispiel #7
0
    def identify(self, options):
        """Identify marker genes in genomes."""

        if options.genome_dir:
            check_dir_exists(options.genome_dir)

        if options.batchfile:
            check_file_exists(options.batchfile)

        make_sure_path_exists(options.out_dir)

        genomes = self._genomes_to_process(options.genome_dir,
                                           options.batchfile,
                                           options.extension)

        markers = Markers(options.cpus)
        markers.identify(genomes, options.out_dir, options.prefix)

        self.logger.info('Done.')
Beispiel #8
0
    def infer(self, options):
        """Infer tree from MSA."""

        check_file_exists(options.msa_file)
        make_sure_path_exists(options.out_dir)

        if options.cpus > 1:
            check_dependencies(['FastTreeMP'])
            os.environ['OMP_NUM_THREADS'] = '%d' % options.cpus
        else:
            check_dependencies(['FastTree'])

        self.logger.info('Inferring tree with FastTree using %s+GAMMA.' %
                         options.prot_model)

        if hasattr(options, 'suffix'):
            output_tree = os.path.join(
                options.out_dir,
                PATH_MARKER_UNROOTED_TREE.format(prefix=options.prefix,
                                                 marker=options.suffix))
            tree_log = os.path.join(
                options.out_dir,
                PATH_MARKER_TREE_LOG.format(prefix=options.prefix,
                                            marker=options.suffix))
            fasttree_log = os.path.join(
                options.out_dir,
                PATH_MARKER_FASTTREE_LOG.format(prefix=options.prefix,
                                                marker=options.suffix))
        else:
            output_tree = os.path.join(
                options.out_dir,
                PATH_UNROOTED_TREE.format(prefix=options.prefix))
            tree_log = os.path.join(
                options.out_dir, PATH_TREE_LOG.format(prefix=options.prefix))
            fasttree_log = os.path.join(
                options.out_dir,
                PATH_FASTTREE_LOG.format(prefix=options.prefix))

        make_sure_path_exists(os.path.dirname(output_tree))
        make_sure_path_exists(os.path.dirname(tree_log))
        make_sure_path_exists(os.path.dirname(fasttree_log))

        if options.prot_model == 'JTT':
            model_str = ''
        elif options.prot_model == 'WAG':
            model_str = ' -wag'
        elif options.prot_model == 'LG':
            model_str = ' -lg'

        support_str = ''
        if options.no_support:
            support_str = ' -nosupport'

        gamma_str = ' -gamma'
        gamma_str_info = '+GAMMA'
        if options.no_gamma:
            gamma_str = ''
            gamma_str_info = ''

        self.logger.info('Inferring tree with FastTree using {}.'.format(
            options.prot_model, gamma_str_info))

        cmd = '-quiet%s%s%s -log %s %s > %s 2> %s' % (
            support_str, model_str, gamma_str, tree_log, options.msa_file,
            output_tree, fasttree_log)
        if options.cpus > 1:
            cmd = 'FastTreeMP ' + cmd
        else:
            cmd = 'FastTree ' + cmd
        self.logger.info('Running: %s' % cmd)
        os.system(cmd)

        self.logger.info('Done.')