def generate_index_page(self): """Generates the index page for help where all programs and artifacts are listed""" # let's add the 'path' for each artifact to simplify # access from the template: for artifact in self.artifacts_info: self.artifacts_info[artifact]['path'] = f"artifacts/{artifact}" # please note that artifacts get a fancy dictionary with everything, while programs get a crappy tuples list. # if we need to improve the functionality of the help index page, we may need to update programs # to a fancy dictionary, too. d = { 'programs': [(p, 'programs/%s' % p, self.programs[p].meta_info['description']['value']) for p in self.programs], 'artifacts': self.artifacts_info, 'artifact_types': self.artifact_types, 'meta': { 'summary_type': 'programs_and_artifacts_index', 'version': '%s (%s)' % (anvio.anvio_version, anvio.anvio_codename), 'date': utils.get_date() } } d['program_provides_requires'] = self.get_program_requires_provides_dict( prefix='') output_file_path = os.path.join(self.output_directory_path, 'index.md') open(output_file_path, 'w').write(SummaryHTMLOutput(d, r=run, p=progress).render())
def generate_pages_for_artifacts(self): """Generates static pages for artifacts in the output directory""" self.progress.new("Rendering artifact pages", progress_total_items=len(ANVIO_ARTIFACTS)) self.progress.update('...') for artifact in ANVIO_ARTIFACTS: self.progress.update(f"'{artifact}' ...", increment=True) d = { 'artifact': ANVIO_ARTIFACTS[artifact], 'meta': { 'summary_type': 'artifact', 'version': '\n'.join([ '|%s|%s|' % (t[0], t[1]) for t in anvio.get_version_tuples() ]), 'date': utils.get_date(), 'version_short_identifier': self.version_short_identifier } } d['artifact']['name'] = artifact d['artifact']['required_by'] = [ (r, '../../programs/%s' % r) for r in self.artifacts_info[artifact]['required_by'] ] d['artifact']['provided_by'] = [ (r, '../../programs/%s' % r) for r in self.artifacts_info[artifact]['provided_by'] ] d['artifact']['description'] = self.artifacts_info[artifact][ 'description'] d['artifact'][ 'icon'] = '../../images/icons/%s.png' % ANVIO_ARTIFACTS[ artifact]['type'] if anvio.DEBUG: self.progress.reset() run.warning(None, 'THE OUTPUT DICT') import json print(json.dumps(d, indent=2)) self.progress.update(f"'{artifact}' ... rendering ...", increment=False) artifact_output_dir = filesnpaths.gen_output_directory( os.path.join(self.artifacts_output_dir, artifact)) output_file_path = os.path.join(artifact_output_dir, 'index.md') open(output_file_path, 'w').write(SummaryHTMLOutput(d, r=run, p=progress).render()) self.progress.end()
def generate(self): d = {} log_file = filesnpaths.get_temp_file_path() num_all_programs = len(self.all_programs) for i in range(0, num_all_programs): program_path = self.all_programs[i] program_name = os.path.basename(program_path) if program_name in self.programs_to_skip: run.warning("Someone doesn't want %s to be in the output :/ Fine. Skipping." % (program_name)) progress.new('Bleep bloop') progress.update('%s (%d of %d)' % (program_name, i+1, num_all_programs)) output = utils.run_command_STDIN('%s --help' % (program_path), log_file, '').split('\n') if anvio.DEBUG: usage, description, params, output = parse_help_output(output) else: try: usage, description, params, output = parse_help_output(output) except Exception as e: progress.end() run.warning("The program '%s' does not seem to have the expected help menu output. Skipping to the next.\ For the curious, this was the error message: '%s'" % (program_name, str(e).strip())) continue d[program_name] = {'usage': usage, 'description': description, 'params': params, 'tags': get_meta_information_from_file(program_path, '__tags__'), 'resources': get_meta_information_from_file(program_path, '__resources__')} progress.end() os.remove(log_file) # generate output program_names = sorted([p for p in d if not p.startswith('anvi-script-')]) script_names = sorted([p for p in d if p.startswith('anvi-script-')]) vignette = {'vignette': d, 'program_names': program_names, 'script_names': script_names, 'all_names': program_names + script_names, 'meta': {'summary_type': 'vignette', 'version': '\n'.join(['|%s|%s|' % (t[0], t[1]) for t in anvio.get_version_tuples()]), 'date': utils.get_date()}} if anvio.DEBUG: run.warning(None, 'THE OUTPUT DICT') import json print(json.dumps(d, indent=2)) open(self.output_file_path, 'w').write(SummaryHTMLOutput(vignette, r=run, p=progress).render()) run.info('Output file', os.path.abspath(self.output_file_path))
def generate_pages_for_programs(self): """Generates static pages for programs in the output directory""" program_provides_requires_dict = self.get_program_requires_provides_dict( ) for program_name in self.programs: program = self.programs[program_name] d = { 'program': {}, 'meta': { 'summary_type': 'program', 'version': '\n'.join([ '|%s|%s|' % (t[0], t[1]) for t in anvio.get_version_tuples() ]), 'date': utils.get_date() } } d['program']['name'] = program_name d['program']['usage'] = program.usage d['program']['description'] = program.meta_info['description'][ 'value'] d['program']['resources'] = program.meta_info['resources']['value'] d['program']['requires'] = program_provides_requires_dict[ program_name]['requires'] d['program']['provides'] = program_provides_requires_dict[ program_name]['provides'] d['program']['icon'] = '../../images/icons/%s.png' % 'PROGRAM' d['artifacts'] = self.artifacts_info if anvio.DEBUG: run.warning(None, 'THE OUTPUT DICT') import json print(json.dumps(d, indent=2)) program_output_dir = filesnpaths.gen_output_directory( os.path.join(self.programs_output_dir, program_name)) output_file_path = os.path.join(program_output_dir, 'index.md') open(output_file_path, 'w').write(SummaryHTMLOutput(d, r=run, p=progress).render()) # create the program network, too program_network = ProgramsNetwork(argparse.Namespace( output_file=os.path.join(program_output_dir, "network.json"), program_names_to_focus=program_name), r=terminal.Run(verbose=False)) program_network.generate()
def generate_pages_for_artifacts(self): """Generates static pages for artifacts in the output directory""" for artifact in ANVIO_ARTIFACTS: d = { 'artifact': ANVIO_ARTIFACTS[artifact], 'meta': { 'summary_type': 'artifact', 'version': '\n'.join([ '|%s|%s|' % (t[0], t[1]) for t in anvio.get_version_tuples() ]), 'date': utils.get_date() } } d['artifact']['name'] = artifact d['artifact']['required_by'] = [ (r, '../../programs/%s' % r) for r in self.artifacts_info[artifact]['required_by'] ] d['artifact']['provided_by'] = [ (r, '../../programs/%s' % r) for r in self.artifacts_info[artifact]['provided_by'] ] d['artifact']['description'] = self.artifacts_info[artifact][ 'description'] d['artifact'][ 'icon'] = '../../images/icons/%s.png' % ANVIO_ARTIFACTS[ artifact]['type'] if anvio.DEBUG: run.warning(None, 'THE OUTPUT DICT') import json print(json.dumps(d, indent=2)) artifact_output_dir = filesnpaths.gen_output_directory( os.path.join(self.artifacts_output_dir, artifact)) output_file_path = os.path.join(artifact_output_dir, 'index.md') open(output_file_path, 'w').write(SummaryHTMLOutput(d, r=run, p=progress).render())
def generate_index_page(self): d = { 'programs': [(p, 'programs/%s' % p, self.programs[p].meta_info['description']['value']) for p in self.programs], 'artifacts': [(a, 'artifacts/%s' % a) for a in self.artifacts_info], 'meta': { 'summary_type': 'programs_and_artifacts_index', 'version': '%s (%s)' % (anvio.anvio_version, anvio.anvio_codename), 'date': utils.get_date() } } d['program_provides_requires'] = self.get_program_requires_provides_dict( prefix='') output_file_path = os.path.join(self.output_directory_path, 'index.md') open(output_file_path, 'w').write(SummaryHTMLOutput(d, r=run, p=progress).render())
def process(self): # learn who you are: collection_dict = self.collections.get_collection_dict( self.collection_name) bins_info_dict = self.collections.get_bins_info_dict( self.collection_name) # init profile data for colletion. self.init_collection_profile(collection_dict) # load completeness information if available self.completeness = completeness.Completeness(self.contigs_db_path) if len(self.completeness.sources): self.completeness_data_available = True # load HMM sources for non-single-copy genes if available if self.non_singlecopy_gene_hmm_sources and not self.quick: self.init_non_singlecopy_gene_hmm_sources() self.non_single_copy_gene_hmm_data_available = True # load gene functions from contigs db superclass self.init_functions() # set up the initial summary dictionary self.summary['meta'] = { 'quick': self.quick, 'output_directory': self.output_directory, 'collection': collection_dict.keys(), 'num_bins': len(collection_dict.keys()), 'collection_name': self.collection_name, 'total_nts_in_collection': 0, 'num_contigs_in_collection': 0, 'anvio_version': __version__, 'profile': self.p_meta, 'contigs': self.a_meta, 'gene_coverages_data_available': self.gene_coverages_data_available, 'completeness_data_available': self.completeness_data_available, 'non_single_copy_gene_hmm_data_available': self.non_single_copy_gene_hmm_data_available, 'percent_contigs_nts_described_by_collection': 0.0, 'percent_profile_nts_described_by_collection': 0.0, 'percent_contigs_nts_described_by_profile': P(self.p_meta['total_length'], self.a_meta['total_length']), 'percent_contigs_contigs_described_by_profile': P(self.p_meta['num_contigs'], self.a_meta['num_contigs']), 'percent_contigs_splits_described_by_profile': P(self.p_meta['num_splits'], self.a_meta['num_splits']), } # I am not sure whether this is the best place to do this, self.summary['basics_pretty'] = { 'profile': [ ('Created on', self.p_meta['creation_date']), ('Version', self.p_meta['version']), ('Minimum conting length', pretty(self.p_meta['min_contig_length'])), ('Number of contigs', pretty(int(self.p_meta['num_contigs']))), ('Number of splits', pretty(int(self.p_meta['num_splits']))), ('Total nucleotides', humanize_n(int(self.p_meta['total_length']))), ], 'contigs': [ ('Created on', self.p_meta['creation_date']), ('Version', self.a_meta['version']), ('Split length', pretty(int(self.a_meta['split_length']))), ('Number of contigs', pretty(int(self.a_meta['num_contigs']))), ('Number of splits', pretty(int(self.a_meta['num_splits']))), ('Total nucleotides', humanize_n(int(self.a_meta['total_length']))), ('K-mer size', self.a_meta['kmer_size']), ], } self.summary['max_shown_header_items'] = 10 self.summary['slice_header_items_tmpl'] = '0:%d' % self.summary[ 'max_shown_header_items'] self.summary['num_not_shown_samples'] = len( self.p_meta['samples']) - self.summary['max_shown_header_items'] self.summary['num_not_shown_hmm_items'] = dict([ (hmm_search_source, len(self.hmm_sources_info[hmm_search_source]['genes']) - self.summary['max_shown_header_items']) for hmm_search_type, hmm_search_source in self.hmm_searches_header ]) self.summary['files'] = {} self.summary['collection'] = {} self.summary[ 'collection_profile'] = self.collection_profile # reminder; collection_profile comes from ProfileSuperclass! self.summary[ 'collection_profile_items'] = self.collection_profile.values( )[0].keys() # add hmm items for each seach type: if self.non_single_copy_gene_hmm_data_available: self.summary['meta']['hmm_items'] = dict([ (hmm_search_source, self.hmm_sources_info[hmm_search_source]['genes']) for hmm_search_type, hmm_search_source in self.hmm_searches_header ]) # summarize bins: for bin_id in collection_dict: bin = Bin(self, bin_id, collection_dict[bin_id], self.run, self.progress) bin.output_directory = os.path.join(self.output_directory, 'bin_by_bin', bin_id) bin.bin_profile = self.collection_profile[bin_id] self.summary['collection'][bin_id] = bin.create() self.summary['collection'][bin_id][ 'color'] = bins_info_dict[bin_id]['html_color'] or '#212121' self.summary['collection'][bin_id]['source'] = bins_info_dict[ bin_id]['source'] or 'unknown_source' self.summary['meta']['total_nts_in_collection'] += self.summary[ 'collection'][bin_id]['total_length'] self.summary['meta']['num_contigs_in_collection'] += self.summary[ 'collection'][bin_id]['num_contigs'] # bins are computed, add some relevant meta info: self.summary['meta'][ 'percent_contigs_nts_described_by_collection'] = '%.2f' % ( self.summary['meta']['total_nts_in_collection'] * 100.0 / int(self.a_meta['total_length'])) self.summary['meta'][ 'percent_profile_nts_described_by_collection'] = '%.2f' % ( self.summary['meta']['total_nts_in_collection'] * 100.0 / int(self.p_meta['total_length'])) self.summary['meta'][ 'bins'] = self.get_bins_ordered_by_completeness_and_size() if not self.quick: # generate a TAB-delimited text output file for bin summaries summary_of_bins_matrix_output = {} properties = [ 'taxon', 'total_length', 'num_contigs', 'N50', 'GC_content', 'percent_complete', 'percent_redundancy' ] for bin_name in self.summary['collection']: summary_of_bins_matrix_output[bin_name] = dict([ (prop, self.summary['collection'][bin_name][prop]) for prop in properties ]) output_file_obj = self.get_output_file_handle( prefix='general_bins_summary.txt') utils.store_dict_as_TAB_delimited_file( summary_of_bins_matrix_output, None, headers=['bins'] + properties, file_obj=output_file_obj) # save merged matrices for bins x samples for table_name in self.collection_profile.values()[0].keys(): d = {} for bin_id in self.collection_profile: d[bin_id] = self.collection_profile[bin_id][table_name] output_file_obj = self.get_output_file_handle( sub_directory='bins_across_samples', prefix='%s.txt' % table_name) utils.store_dict_as_TAB_delimited_file( d, None, headers=['bins'] + sorted(self.p_meta['samples']), file_obj=output_file_obj) # merge and store matrices for hmm hits if self.non_single_copy_gene_hmm_data_available: for hmm_search_source in self.summary['meta']['hmm_items']: # this is to keep numbers per hmm item: d = {} for bin_id in self.summary['meta']['bins']: d[bin_id] = self.summary['collection'][bin_id]['hmms'][ hmm_search_source] output_file_obj = self.get_output_file_handle( sub_directory='bins_across_samples', prefix='%s.txt' % hmm_search_source, within='hmms') utils.store_dict_as_TAB_delimited_file( d, None, headers=['bins'] + sorted(self.summary['meta']['hmm_items'] [hmm_search_source]), file_obj=output_file_obj) # this is to keep number of hmm hits per bin: n = dict([(bin_id, {}) for bin_id in self.summary['meta']['bins']]) for hmm_search_source in self.summary['meta']['hmm_items']: for bin_id in self.summary['meta']['bins']: n[bin_id][hmm_search_source] = sum( self.summary['collection'][bin_id]['hmms'] [hmm_search_source].values()) output_file_obj = self.get_output_file_handle( sub_directory='bins_across_samples', prefix='hmm_hit_totals.txt') utils.store_dict_as_TAB_delimited_file( n, None, headers=['bins'] + sorted(self.summary['meta']['hmm_items']), file_obj=output_file_obj) # store percent abundance of each bin self.summary[ 'bin_percent_recruitment'] = self.bin_percent_recruitment_per_sample self.summary['bin_percent_abundance_items'] = sorted( self.bin_percent_recruitment_per_sample.values()[0].keys()) output_file_obj = self.get_output_file_handle( sub_directory='bins_across_samples', prefix='bins_percent_recruitment.txt') utils.store_dict_as_TAB_delimited_file( self.bin_percent_recruitment_per_sample, None, headers=['samples'] + sorted(self.collection_profile.keys()) + ['__splits_not_binned__'], file_obj=output_file_obj) if self.debug: import json print json.dumps(self.summary, sort_keys=True, indent=4) self.index_html = SummaryHTMLOutput( self.summary, r=self.run, p=self.progress).generate(quick=self.quick)