コード例 #1
0
    def generate_index_page(self):
        """Generates the index page for help where all programs and artifacts are listed"""

        # let's add the 'path' for each artifact to simplify
        # access from the template:
        for artifact in self.artifacts_info:
            self.artifacts_info[artifact]['path'] = f"artifacts/{artifact}"

        # please note that artifacts get a fancy dictionary with everything, while programs get a crappy tuples list.
        # if we need to improve the functionality of the help index page, we may need to update programs
        # to a fancy dictionary, too.
        d = {
            'programs': [(p, 'programs/%s' % p,
                          self.programs[p].meta_info['description']['value'])
                         for p in self.programs],
            'artifacts':
            self.artifacts_info,
            'artifact_types':
            self.artifact_types,
            'meta': {
                'summary_type': 'programs_and_artifacts_index',
                'version':
                '%s (%s)' % (anvio.anvio_version, anvio.anvio_codename),
                'date': utils.get_date()
            }
        }

        d['program_provides_requires'] = self.get_program_requires_provides_dict(
            prefix='')

        output_file_path = os.path.join(self.output_directory_path, 'index.md')
        open(output_file_path,
             'w').write(SummaryHTMLOutput(d, r=run, p=progress).render())
コード例 #2
0
    def generate_pages_for_artifacts(self):
        """Generates static pages for artifacts in the output directory"""

        self.progress.new("Rendering artifact pages",
                          progress_total_items=len(ANVIO_ARTIFACTS))
        self.progress.update('...')

        for artifact in ANVIO_ARTIFACTS:
            self.progress.update(f"'{artifact}' ...", increment=True)

            d = {
                'artifact': ANVIO_ARTIFACTS[artifact],
                'meta': {
                    'summary_type':
                    'artifact',
                    'version':
                    '\n'.join([
                        '|%s|%s|' % (t[0], t[1])
                        for t in anvio.get_version_tuples()
                    ]),
                    'date':
                    utils.get_date(),
                    'version_short_identifier':
                    self.version_short_identifier
                }
            }

            d['artifact']['name'] = artifact
            d['artifact']['required_by'] = [
                (r, '../../programs/%s' % r)
                for r in self.artifacts_info[artifact]['required_by']
            ]
            d['artifact']['provided_by'] = [
                (r, '../../programs/%s' % r)
                for r in self.artifacts_info[artifact]['provided_by']
            ]
            d['artifact']['description'] = self.artifacts_info[artifact][
                'description']
            d['artifact'][
                'icon'] = '../../images/icons/%s.png' % ANVIO_ARTIFACTS[
                    artifact]['type']

            if anvio.DEBUG:
                self.progress.reset()
                run.warning(None, 'THE OUTPUT DICT')
                import json
                print(json.dumps(d, indent=2))

            self.progress.update(f"'{artifact}' ... rendering ...",
                                 increment=False)
            artifact_output_dir = filesnpaths.gen_output_directory(
                os.path.join(self.artifacts_output_dir, artifact))
            output_file_path = os.path.join(artifact_output_dir, 'index.md')
            open(output_file_path,
                 'w').write(SummaryHTMLOutput(d, r=run, p=progress).render())

        self.progress.end()
コード例 #3
0
ファイル: programs.py プロジェクト: mschecht/anvio
    def generate(self):
        d = {}

        log_file = filesnpaths.get_temp_file_path()
        num_all_programs = len(self.all_programs)
        for i in range(0, num_all_programs):
            program_path = self.all_programs[i]
            program_name = os.path.basename(program_path)

            if program_name in self.programs_to_skip:
                run.warning("Someone doesn't want %s to be in the output :/ Fine. Skipping." % (program_name))

            progress.new('Bleep bloop')
            progress.update('%s (%d of %d)' % (program_name, i+1, num_all_programs))

            output = utils.run_command_STDIN('%s --help' % (program_path), log_file, '').split('\n')

            if anvio.DEBUG:
                    usage, description, params, output = parse_help_output(output)
            else:
                try:
                    usage, description, params, output = parse_help_output(output)
                except Exception as e:
                    progress.end()
                    run.warning("The program '%s' does not seem to have the expected help menu output. Skipping to the next.\
                                 For the curious, this was the error message: '%s'" % (program_name, str(e).strip()))
                    continue

            d[program_name] = {'usage': usage,
                               'description': description,
                               'params': params,
                               'tags': get_meta_information_from_file(program_path, '__tags__'),
                               'resources': get_meta_information_from_file(program_path, '__resources__')}

            progress.end()

        os.remove(log_file)

        # generate output
        program_names = sorted([p for p in d if not p.startswith('anvi-script-')])
        script_names = sorted([p for p in d if p.startswith('anvi-script-')])
        vignette = {'vignette': d,
                    'program_names': program_names,
                    'script_names': script_names,
                    'all_names': program_names + script_names,
                    'meta': {'summary_type': 'vignette',
                             'version': '\n'.join(['|%s|%s|' % (t[0], t[1]) for t in anvio.get_version_tuples()]),
                             'date': utils.get_date()}}

        if anvio.DEBUG:
            run.warning(None, 'THE OUTPUT DICT')
            import json
            print(json.dumps(d, indent=2))

        open(self.output_file_path, 'w').write(SummaryHTMLOutput(vignette, r=run, p=progress).render())

        run.info('Output file', os.path.abspath(self.output_file_path))
コード例 #4
0
    def generate_pages_for_programs(self):
        """Generates static pages for programs in the output directory"""

        program_provides_requires_dict = self.get_program_requires_provides_dict(
        )

        for program_name in self.programs:
            program = self.programs[program_name]
            d = {
                'program': {},
                'meta': {
                    'summary_type':
                    'program',
                    'version':
                    '\n'.join([
                        '|%s|%s|' % (t[0], t[1])
                        for t in anvio.get_version_tuples()
                    ]),
                    'date':
                    utils.get_date()
                }
            }

            d['program']['name'] = program_name
            d['program']['usage'] = program.usage
            d['program']['description'] = program.meta_info['description'][
                'value']
            d['program']['resources'] = program.meta_info['resources']['value']
            d['program']['requires'] = program_provides_requires_dict[
                program_name]['requires']
            d['program']['provides'] = program_provides_requires_dict[
                program_name]['provides']
            d['program']['icon'] = '../../images/icons/%s.png' % 'PROGRAM'
            d['artifacts'] = self.artifacts_info

            if anvio.DEBUG:
                run.warning(None, 'THE OUTPUT DICT')
                import json
                print(json.dumps(d, indent=2))

            program_output_dir = filesnpaths.gen_output_directory(
                os.path.join(self.programs_output_dir, program_name))
            output_file_path = os.path.join(program_output_dir, 'index.md')
            open(output_file_path,
                 'w').write(SummaryHTMLOutput(d, r=run, p=progress).render())

            # create the program network, too
            program_network = ProgramsNetwork(argparse.Namespace(
                output_file=os.path.join(program_output_dir, "network.json"),
                program_names_to_focus=program_name),
                                              r=terminal.Run(verbose=False))
            program_network.generate()
コード例 #5
0
    def generate_pages_for_artifacts(self):
        """Generates static pages for artifacts in the output directory"""

        for artifact in ANVIO_ARTIFACTS:
            d = {
                'artifact': ANVIO_ARTIFACTS[artifact],
                'meta': {
                    'summary_type':
                    'artifact',
                    'version':
                    '\n'.join([
                        '|%s|%s|' % (t[0], t[1])
                        for t in anvio.get_version_tuples()
                    ]),
                    'date':
                    utils.get_date()
                }
            }

            d['artifact']['name'] = artifact
            d['artifact']['required_by'] = [
                (r, '../../programs/%s' % r)
                for r in self.artifacts_info[artifact]['required_by']
            ]
            d['artifact']['provided_by'] = [
                (r, '../../programs/%s' % r)
                for r in self.artifacts_info[artifact]['provided_by']
            ]
            d['artifact']['description'] = self.artifacts_info[artifact][
                'description']
            d['artifact'][
                'icon'] = '../../images/icons/%s.png' % ANVIO_ARTIFACTS[
                    artifact]['type']

            if anvio.DEBUG:
                run.warning(None, 'THE OUTPUT DICT')
                import json
                print(json.dumps(d, indent=2))

            artifact_output_dir = filesnpaths.gen_output_directory(
                os.path.join(self.artifacts_output_dir, artifact))
            output_file_path = os.path.join(artifact_output_dir, 'index.md')
            open(output_file_path,
                 'w').write(SummaryHTMLOutput(d, r=run, p=progress).render())
コード例 #6
0
    def generate_index_page(self):
        d = {
            'programs': [(p, 'programs/%s' % p,
                          self.programs[p].meta_info['description']['value'])
                         for p in self.programs],
            'artifacts':
            [(a, 'artifacts/%s' % a) for a in self.artifacts_info],
            'meta': {
                'summary_type': 'programs_and_artifacts_index',
                'version':
                '%s (%s)' % (anvio.anvio_version, anvio.anvio_codename),
                'date': utils.get_date()
            }
        }

        d['program_provides_requires'] = self.get_program_requires_provides_dict(
            prefix='')

        output_file_path = os.path.join(self.output_directory_path, 'index.md')
        open(output_file_path,
             'w').write(SummaryHTMLOutput(d, r=run, p=progress).render())
コード例 #7
0
    def process(self):
        # learn who you are:
        collection_dict = self.collections.get_collection_dict(
            self.collection_name)
        bins_info_dict = self.collections.get_bins_info_dict(
            self.collection_name)

        # init profile data for colletion.
        self.init_collection_profile(collection_dict)

        # load completeness information if available
        self.completeness = completeness.Completeness(self.contigs_db_path)
        if len(self.completeness.sources):
            self.completeness_data_available = True

        # load HMM sources for non-single-copy genes if available
        if self.non_singlecopy_gene_hmm_sources and not self.quick:
            self.init_non_singlecopy_gene_hmm_sources()
            self.non_single_copy_gene_hmm_data_available = True

        # load gene functions from contigs db superclass
        self.init_functions()

        # set up the initial summary dictionary
        self.summary['meta'] = {
            'quick':
            self.quick,
            'output_directory':
            self.output_directory,
            'collection':
            collection_dict.keys(),
            'num_bins':
            len(collection_dict.keys()),
            'collection_name':
            self.collection_name,
            'total_nts_in_collection':
            0,
            'num_contigs_in_collection':
            0,
            'anvio_version':
            __version__,
            'profile':
            self.p_meta,
            'contigs':
            self.a_meta,
            'gene_coverages_data_available':
            self.gene_coverages_data_available,
            'completeness_data_available':
            self.completeness_data_available,
            'non_single_copy_gene_hmm_data_available':
            self.non_single_copy_gene_hmm_data_available,
            'percent_contigs_nts_described_by_collection':
            0.0,
            'percent_profile_nts_described_by_collection':
            0.0,
            'percent_contigs_nts_described_by_profile':
            P(self.p_meta['total_length'], self.a_meta['total_length']),
            'percent_contigs_contigs_described_by_profile':
            P(self.p_meta['num_contigs'], self.a_meta['num_contigs']),
            'percent_contigs_splits_described_by_profile':
            P(self.p_meta['num_splits'], self.a_meta['num_splits']),
        }

        # I am not sure whether this is the best place to do this,
        self.summary['basics_pretty'] = {
            'profile': [
                ('Created on', self.p_meta['creation_date']),
                ('Version', self.p_meta['version']),
                ('Minimum conting length',
                 pretty(self.p_meta['min_contig_length'])),
                ('Number of contigs', pretty(int(self.p_meta['num_contigs']))),
                ('Number of splits', pretty(int(self.p_meta['num_splits']))),
                ('Total nucleotides',
                 humanize_n(int(self.p_meta['total_length']))),
            ],
            'contigs': [
                ('Created on', self.p_meta['creation_date']),
                ('Version', self.a_meta['version']),
                ('Split length', pretty(int(self.a_meta['split_length']))),
                ('Number of contigs', pretty(int(self.a_meta['num_contigs']))),
                ('Number of splits', pretty(int(self.a_meta['num_splits']))),
                ('Total nucleotides',
                 humanize_n(int(self.a_meta['total_length']))),
                ('K-mer size', self.a_meta['kmer_size']),
            ],
        }

        self.summary['max_shown_header_items'] = 10
        self.summary['slice_header_items_tmpl'] = '0:%d' % self.summary[
            'max_shown_header_items']
        self.summary['num_not_shown_samples'] = len(
            self.p_meta['samples']) - self.summary['max_shown_header_items']
        self.summary['num_not_shown_hmm_items'] = dict([
            (hmm_search_source,
             len(self.hmm_sources_info[hmm_search_source]['genes']) -
             self.summary['max_shown_header_items'])
            for hmm_search_type, hmm_search_source in self.hmm_searches_header
        ])

        self.summary['files'] = {}
        self.summary['collection'] = {}
        self.summary[
            'collection_profile'] = self.collection_profile  # reminder; collection_profile comes from ProfileSuperclass!
        self.summary[
            'collection_profile_items'] = self.collection_profile.values(
            )[0].keys()

        # add hmm items for each seach type:
        if self.non_single_copy_gene_hmm_data_available:
            self.summary['meta']['hmm_items'] = dict([
                (hmm_search_source,
                 self.hmm_sources_info[hmm_search_source]['genes']) for
                hmm_search_type, hmm_search_source in self.hmm_searches_header
            ])

        # summarize bins:
        for bin_id in collection_dict:
            bin = Bin(self, bin_id, collection_dict[bin_id], self.run,
                      self.progress)
            bin.output_directory = os.path.join(self.output_directory,
                                                'bin_by_bin', bin_id)
            bin.bin_profile = self.collection_profile[bin_id]

            self.summary['collection'][bin_id] = bin.create()
            self.summary['collection'][bin_id][
                'color'] = bins_info_dict[bin_id]['html_color'] or '#212121'
            self.summary['collection'][bin_id]['source'] = bins_info_dict[
                bin_id]['source'] or 'unknown_source'
            self.summary['meta']['total_nts_in_collection'] += self.summary[
                'collection'][bin_id]['total_length']
            self.summary['meta']['num_contigs_in_collection'] += self.summary[
                'collection'][bin_id]['num_contigs']

        # bins are computed, add some relevant meta info:
        self.summary['meta'][
            'percent_contigs_nts_described_by_collection'] = '%.2f' % (
                self.summary['meta']['total_nts_in_collection'] * 100.0 /
                int(self.a_meta['total_length']))
        self.summary['meta'][
            'percent_profile_nts_described_by_collection'] = '%.2f' % (
                self.summary['meta']['total_nts_in_collection'] * 100.0 /
                int(self.p_meta['total_length']))
        self.summary['meta'][
            'bins'] = self.get_bins_ordered_by_completeness_and_size()

        if not self.quick:
            # generate a TAB-delimited text output file for bin summaries
            summary_of_bins_matrix_output = {}
            properties = [
                'taxon', 'total_length', 'num_contigs', 'N50', 'GC_content',
                'percent_complete', 'percent_redundancy'
            ]

            for bin_name in self.summary['collection']:
                summary_of_bins_matrix_output[bin_name] = dict([
                    (prop, self.summary['collection'][bin_name][prop])
                    for prop in properties
                ])

            output_file_obj = self.get_output_file_handle(
                prefix='general_bins_summary.txt')
            utils.store_dict_as_TAB_delimited_file(
                summary_of_bins_matrix_output,
                None,
                headers=['bins'] + properties,
                file_obj=output_file_obj)

            # save merged matrices for bins x samples
            for table_name in self.collection_profile.values()[0].keys():
                d = {}
                for bin_id in self.collection_profile:
                    d[bin_id] = self.collection_profile[bin_id][table_name]

                output_file_obj = self.get_output_file_handle(
                    sub_directory='bins_across_samples',
                    prefix='%s.txt' % table_name)
                utils.store_dict_as_TAB_delimited_file(
                    d,
                    None,
                    headers=['bins'] + sorted(self.p_meta['samples']),
                    file_obj=output_file_obj)

            # merge and store matrices for hmm hits
            if self.non_single_copy_gene_hmm_data_available:
                for hmm_search_source in self.summary['meta']['hmm_items']:
                    # this is to keep numbers per hmm item:
                    d = {}

                    for bin_id in self.summary['meta']['bins']:
                        d[bin_id] = self.summary['collection'][bin_id]['hmms'][
                            hmm_search_source]

                    output_file_obj = self.get_output_file_handle(
                        sub_directory='bins_across_samples',
                        prefix='%s.txt' % hmm_search_source,
                        within='hmms')
                    utils.store_dict_as_TAB_delimited_file(
                        d,
                        None,
                        headers=['bins'] +
                        sorted(self.summary['meta']['hmm_items']
                               [hmm_search_source]),
                        file_obj=output_file_obj)

                # this is to keep number of hmm hits per bin:
                n = dict([(bin_id, {})
                          for bin_id in self.summary['meta']['bins']])
                for hmm_search_source in self.summary['meta']['hmm_items']:
                    for bin_id in self.summary['meta']['bins']:
                        n[bin_id][hmm_search_source] = sum(
                            self.summary['collection'][bin_id]['hmms']
                            [hmm_search_source].values())

                output_file_obj = self.get_output_file_handle(
                    sub_directory='bins_across_samples',
                    prefix='hmm_hit_totals.txt')
                utils.store_dict_as_TAB_delimited_file(
                    n,
                    None,
                    headers=['bins'] +
                    sorted(self.summary['meta']['hmm_items']),
                    file_obj=output_file_obj)

            # store percent abundance of each bin
            self.summary[
                'bin_percent_recruitment'] = self.bin_percent_recruitment_per_sample
            self.summary['bin_percent_abundance_items'] = sorted(
                self.bin_percent_recruitment_per_sample.values()[0].keys())
            output_file_obj = self.get_output_file_handle(
                sub_directory='bins_across_samples',
                prefix='bins_percent_recruitment.txt')
            utils.store_dict_as_TAB_delimited_file(
                self.bin_percent_recruitment_per_sample,
                None,
                headers=['samples'] + sorted(self.collection_profile.keys()) +
                ['__splits_not_binned__'],
                file_obj=output_file_obj)

        if self.debug:
            import json
            print json.dumps(self.summary, sort_keys=True, indent=4)

        self.index_html = SummaryHTMLOutput(
            self.summary, r=self.run,
            p=self.progress).generate(quick=self.quick)