def main(environment_file, sample_mapping_file=None, unit_mapping_file=None, min_abundance=0, min_sum_normalized_percent=1): samples_dict = utils.get_samples_dict_from_environment_file( environment_file) oligos = utils.get_oligos_sorted_by_abundance(samples_dict, min_abundance=min_abundance) unit_counts, unit_percents = utils.get_unit_counts_and_percents( oligos, samples_dict) if sample_mapping_file: sample_mapping = utils.get_sample_mapping_dict(sample_mapping_file) if unit_mapping_file: unit_mapping = utils.get_sample_mapping_dict(unit_mapping_file) output_file = '.'.join(environment_file.split('.')[:-1]) + '.gexf' utils.generate_gexf_network_file( oligos, samples_dict, unit_percents, output_file, sample_mapping_dict=sample_mapping if sample_mapping_file else None, unit_mapping_dict=unit_mapping if unit_mapping_file else None)
def main( environment_file, sample_mapping_file=None, unit_mapping_file=None, min_abundance=0, min_sum_normalized_percent=1 ): samples_dict = utils.get_samples_dict_from_environment_file(environment_file) oligos = utils.get_oligos_sorted_by_abundance(samples_dict, min_abundance=min_abundance) unit_counts, unit_percents = utils.get_unit_counts_and_percents(oligos, samples_dict) if sample_mapping_file: sample_mapping = utils.get_sample_mapping_dict(sample_mapping_file) if unit_mapping_file: unit_mapping = utils.get_sample_mapping_dict(unit_mapping_file) output_file = ".".join(environment_file.split(".")[:-1]) + ".gexf" utils.generate_gexf_network_file( oligos, samples_dict, unit_percents, output_file, sample_mapping_dict=sample_mapping if sample_mapping_file else None, unit_mapping_dict=unit_mapping if unit_mapping_file else None, )
def generate_html_output(run_info_dict, html_output_directory=None): if not html_output_directory: html_output_directory = os.path.join(run_info_dict['output_directory'], 'HTML-OUTPUT') if not os.path.exists(html_output_directory): os.makedirs(html_output_directory) html_dict = copy.deepcopy(run_info_dict) shutil.copy2(os.path.join(absolute, 'static/style.css'), os.path.join(html_output_directory, 'style.css')) shutil.copy2(os.path.join(absolute, 'static/header_2.png'), os.path.join(html_output_directory, 'header.png')) shutil.copy2(os.path.join(absolute, 'static/missing_image.png'), os.path.join(html_output_directory, 'missing.png')) shutil.copy2(os.path.join(absolute, 'static/colorbar.png'), os.path.join(html_output_directory, 'colorbar.png')) def copy_as(source, dest_name): dest = os.path.join(html_output_directory, dest_name) try: shutil.copy2(source, dest) except: if source.endswith('png'): shutil.copy2( os.path.join(absolute, 'static/missing_image.png'), dest) return os.path.basename(dest) html_dict['matrix_count_file_path'] = copy_as( run_info_dict['matrix_count_file_path'], 'matrix_counts.txt') html_dict['matrix_percent_file_path'] = copy_as( run_info_dict['matrix_percent_file_path'], 'matrix_percents.txt') html_dict['environment_file_path'] = copy_as( run_info_dict['environment_file_path'], 'environment.txt') html_dict['read_distribution_table_path'] = copy_as( run_info_dict['read_distribution_table_path'], 'read_distribution.txt') def get_figures_dict(html_dict_prefix): html_dict_key = '%s_file_path' % html_dict_prefix if html_dict.has_key(html_dict_key): figures_dict = cPickle.load(open(html_dict[html_dict_key])) for _map in figures_dict: for _func in figures_dict[_map]: for _op in figures_dict[_map][_func]: if os.path.exists(figures_dict[_map][_func][_op] + '.pdf') or os.path.exists( figures_dict[_map][_func][_op] + '.png'): prefix = copy_as( figures_dict[_map][_func][_op] + '.png', '%s.png' % '-'.join([_map, _func, _op])) prefix = copy_as( figures_dict[_map][_func][_op] + '.pdf', '%s.pdf' % '-'.join([_map, _func, _op])) figures_dict[_map][_func][_op] = '.'.join( prefix.split('.')[:-1]) else: figures_dict[_map][_func][_op] = None return figures_dict else: return None html_dict['figures_dict'] = get_figures_dict('figures_dict') html_dict['exclusive_figures_dict'] = get_figures_dict( 'exclusive_figures_dict') if html_dict.has_key('node_representatives_file_path'): html_dict['node_representatives_file_path'] = copy_as( run_info_dict['node_representatives_file_path'], 'node-representatives.fa.txt') else: html_dict['node_representatives_file_path'] = None if run_info_dict.has_key('blast_ref_db') and os.path.exists( run_info_dict['blast_ref_db']): html_dict['blast_ref_db_path'] = copy_as(run_info_dict['blast_ref_db'], 'reference_db.fa') if run_info_dict['sample_mapping']: html_dict['sample_mapping'] = copy_as(run_info_dict['sample_mapping'], 'sample_mapping.txt') else: html_dict['sample_mapping'] = None if run_info_dict['gexf_network_file_path']: html_dict['gexf_network_file_path'] = copy_as( run_info_dict['gexf_network_file_path'], 'network.gexf') if run_info_dict['topology_gexf']: html_dict['topology_gexf'] = copy_as(run_info_dict['topology_gexf'], 'topology.gexf') html_dict['samples_dict'] = get_samples_dict_from_environment_file( run_info_dict['environment_file_path']) html_dict['samples'] = sorted(html_dict['samples_dict'].keys()) html_dict['blast_results_found'] = False # include pretty names html_dict['pretty_names'] = pretty_names # get javascript code for sample pie-charts html_dict['pie_charts_js'] = render_to_string('pie_charts_js.tmpl', html_dict) # generate index index_page = os.path.join(html_output_directory, 'index.html') rendered = render_to_string('index_for_decomposition.tmpl', html_dict) open(index_page, 'w').write(rendered.encode("utf-8")) return index_page
# any later version. # # Please read the COPYING file. import os import sys import cPickle from Oligotyping.lib.decomposer import Decomposer from Oligotyping.utils.utils import get_samples_dict_from_environment_file runinfo = cPickle.load(open(sys.argv[1])) sample_mapping = sys.argv[2] decomposer = Decomposer() decomposer.matrix_percent_file_path = runinfo['matrix_percent_file_path'] decomposer.matrix_count_file_path = runinfo['matrix_count_file_path'] decomposer.tmp_directory = runinfo['tmp_directory'] decomposer.output_directory = runinfo['output_directory'] decomposer.figures_directory = os.path.join(os.path.dirname(runinfo['figures_directory']), 'FIGURES') if not os.path.exists(decomposer.tmp_directory): os.makedirs(decomposer.tmp_directory) decomposer.sample_mapping = sample_mapping decomposer._init_logger('exclusive_figures.log') decomposer.samples_dict = get_samples_dict_from_environment_file(runinfo['environment_file_path']) decomposer.samples = sorted(decomposer.samples_dict.keys()) decomposer._generate_exclusive_figures()
def generate_html_output(run_info_dict, html_output_directory=None): if not html_output_directory: html_output_directory = os.path.join(run_info_dict["output_directory"], "HTML-OUTPUT") if not os.path.exists(html_output_directory): os.makedirs(html_output_directory) html_dict = copy.deepcopy(run_info_dict) shutil.copy2(os.path.join(absolute, "static/style.css"), os.path.join(html_output_directory, "style.css")) shutil.copy2(os.path.join(absolute, "static/header_2.png"), os.path.join(html_output_directory, "header.png")) shutil.copy2(os.path.join(absolute, "static/missing_image.png"), os.path.join(html_output_directory, "missing.png")) shutil.copy2(os.path.join(absolute, "static/colorbar.png"), os.path.join(html_output_directory, "colorbar.png")) def copy_as(source, dest_name): dest = os.path.join(html_output_directory, dest_name) try: shutil.copy2(source, dest) except: if source.endswith("png"): shutil.copy2(os.path.join(absolute, "static/missing_image.png"), dest) return os.path.basename(dest) html_dict["matrix_count_file_path"] = copy_as(run_info_dict["matrix_count_file_path"], "matrix_counts.txt") html_dict["matrix_percent_file_path"] = copy_as(run_info_dict["matrix_percent_file_path"], "matrix_percents.txt") html_dict["environment_file_path"] = copy_as(run_info_dict["environment_file_path"], "environment.txt") html_dict["read_distribution_table_path"] = copy_as( run_info_dict["read_distribution_table_path"], "read_distribution.txt" ) def get_figures_dict(html_dict_prefix): html_dict_key = "%s_file_path" % html_dict_prefix if html_dict.has_key(html_dict_key): figures_dict = cPickle.load(open(html_dict[html_dict_key])) for _map in figures_dict: for _func in figures_dict[_map]: for _op in figures_dict[_map][_func]: if os.path.exists(figures_dict[_map][_func][_op] + ".pdf") or os.path.exists( figures_dict[_map][_func][_op] + ".png" ): prefix = copy_as( figures_dict[_map][_func][_op] + ".png", "%s.png" % "-".join([_map, _func, _op]) ) prefix = copy_as( figures_dict[_map][_func][_op] + ".pdf", "%s.pdf" % "-".join([_map, _func, _op]) ) figures_dict[_map][_func][_op] = ".".join(prefix.split(".")[:-1]) else: figures_dict[_map][_func][_op] = None return figures_dict else: return None html_dict["figures_dict"] = get_figures_dict("figures_dict") html_dict["exclusive_figures_dict"] = get_figures_dict("exclusive_figures_dict") if html_dict.has_key("node_representatives_file_path"): html_dict["node_representatives_file_path"] = copy_as( run_info_dict["node_representatives_file_path"], "node-representatives.fa.txt" ) else: html_dict["node_representatives_file_path"] = None if run_info_dict.has_key("blast_ref_db") and os.path.exists(run_info_dict["blast_ref_db"]): html_dict["blast_ref_db_path"] = copy_as(run_info_dict["blast_ref_db"], "reference_db.fa") if run_info_dict["sample_mapping"]: html_dict["sample_mapping"] = copy_as(run_info_dict["sample_mapping"], "sample_mapping.txt") else: html_dict["sample_mapping"] = None if run_info_dict["gexf_network_file_path"]: html_dict["gexf_network_file_path"] = copy_as(run_info_dict["gexf_network_file_path"], "network.gexf") if run_info_dict["topology_gexf"]: html_dict["topology_gexf"] = copy_as(run_info_dict["topology_gexf"], "topology.gexf") html_dict["samples_dict"] = get_samples_dict_from_environment_file(run_info_dict["environment_file_path"]) html_dict["samples"] = sorted(html_dict["samples_dict"].keys()) html_dict["blast_results_found"] = False # include pretty names html_dict["pretty_names"] = pretty_names # get javascript code for sample pie-charts html_dict["pie_charts_js"] = render_to_string("pie_charts_js.tmpl", html_dict) # generate index index_page = os.path.join(html_output_directory, "index.html") rendered = render_to_string("index_for_decomposition.tmpl", html_dict) open(index_page, "w").write(rendered.encode("utf-8")) return index_page
def generate_html_output(run_info_dict, html_output_directory = None, entropy_figure = None): if not html_output_directory: html_output_directory = os.path.join(run_info_dict['output_directory'], 'HTML-OUTPUT') if not os.path.exists(html_output_directory): os.makedirs(html_output_directory) html_dict = copy.deepcopy(run_info_dict) shutil.copy2(os.path.join(absolute, 'static/style.css'), os.path.join(html_output_directory, 'style.css')) shutil.copy2(os.path.join(absolute, 'static/header_1.png'), os.path.join(html_output_directory, 'header.png')) shutil.copy2(os.path.join(absolute, 'static/missing_image.png'), os.path.join(html_output_directory, 'missing.png')) shutil.copy2(os.path.join(absolute, 'scripts/jquery-1.7.1.js'), os.path.join(html_output_directory, 'jquery-1.7.1.js')) shutil.copy2(os.path.join(absolute, 'scripts/popup.js'), os.path.join(html_output_directory, 'popup.js')) shutil.copy2(os.path.join(absolute, 'scripts/g.pie.js'), os.path.join(html_output_directory, 'g.pie.js')) shutil.copy2(os.path.join(absolute, 'scripts/g.raphael.js'), os.path.join(html_output_directory, 'g.raphael.js')) shutil.copy2(os.path.join(absolute, 'scripts/raphael.js'), os.path.join(html_output_directory, 'raphael.js')) shutil.copy2(os.path.join(absolute, 'scripts/morris.js'), os.path.join(html_output_directory, 'morris.js')) def copy_as(source, dest_name, essential = True): dest = os.path.join(html_output_directory, dest_name) if essential: shutil.copy2(source, dest) else: # it is ok if you fail to copy files that are not # essential.. try: shutil.copy2(source, dest) except: sys.stderr.write('\n\n[HTML] Warning: Source file not found\n\tSource: "%s"\n\tDest: "%s\n\n"' % (source, dest)) return os.path.basename(dest) # embarrassingly ad-hoc: if entropy_figure: if entropy_figure.endswith('.pdf') or entropy_figure.endswith('.png'): entropy_figure = entropy_figure[:-4] CP = lambda e, o: copy_as(os.path.join(e + ('.%s' % ext)), o, essential = True if ext == 'png' else False) for ext in ['png', 'pdf']: output_file = 'entropy.%s' % ext if entropy_figure: html_dict['entropy_figure_%s' % ext] = CP(entropy_figure, output_file) else: try: html_dict['entropy_figure_%s' % ext] = CP(run_info_dict['entropy'], output_file) except: html_dict['entropy_figure_%s' % ext] = CP(run_info_dict['entropy'][:-4], output_file) if run_info_dict['gexf_network_file_path']: html_dict['gexf_network_file_path'] = copy_as(run_info_dict['gexf_network_file_path'], 'network.gexf') if run_info_dict['sample_mapping']: html_dict['sample_mapping'] = copy_as(run_info_dict['sample_mapping'], 'sample_mapping.txt') else: html_dict['sample_mapping'] = None html_dict['matrix_count_file_path'] = copy_as(run_info_dict['matrix_count_file_path'], 'matrix_counts.txt') html_dict['matrix_percent_file_path'] = copy_as(run_info_dict['matrix_percent_file_path'], 'matrix_percents.txt') html_dict['read_distribution_table_path'] = copy_as(run_info_dict['read_distribution_table_path'], 'read_distribution.txt') html_dict['environment_file_path'] = copy_as(run_info_dict['environment_file_path'], 'environment.txt') html_dict['oligos_fasta_file_path'] = copy_as(run_info_dict['oligos_fasta_file_path'], 'oligos.fa.txt') html_dict['oligos_nexus_file_path'] = copy_as(run_info_dict['oligos_nexus_file_path'], 'oligos.nex.txt') def get_figures_dict(html_dict_prefix): html_dict_key = '%s_file_path' % html_dict_prefix if html_dict.has_key(html_dict_key): figures_dict = cPickle.load(open(html_dict[html_dict_key])) for _map in figures_dict: for _func in figures_dict[_map]: for _op in figures_dict[_map][_func]: if os.path.exists(figures_dict[_map][_func][_op] + '.pdf') and os.path.exists(figures_dict[_map][_func][_op] + '.png'): prefix = copy_as(figures_dict[_map][_func][_op] + '.pdf', '%s.pdf' % '-'.join([_map, _func, _op])) prefix = copy_as(figures_dict[_map][_func][_op] + '.png', '%s.png' % '-'.join([_map, _func, _op])) figures_dict[_map][_func][_op] = '.'.join(prefix.split('.')[:-1]) else: figures_dict[_map][_func][_op] = None return figures_dict else: return None html_dict['figures_dict'] = get_figures_dict('figures_dict') html_dict['exclusive_figures_dict'] = get_figures_dict('exclusive_figures_dict') if html_dict['generate_sets']: html_dict['across_samples_MN_file_path'] = copy_as(run_info_dict['across_samples_MN_file_path'], 'across_samples_max_normalized.txt') html_dict['across_samples_SN_file_path'] = copy_as(run_info_dict['across_samples_SN_file_path'], 'across_samples_sum_normalized.txt') html_dict['oligo_sets_stackbar_figure'] = copy_as(run_info_dict['stack_bar_with_agglomerated_oligos_file_path'], 'stackbar_with_oligo_sets.png') html_dict['oligos_across_samples_figure'] = copy_as(run_info_dict['oligos_across_samples_file_path'], 'oligos_across_samples.png') html_dict['oligotype_sets_figure'] = copy_as(run_info_dict['oligotype_sets_across_samples_figure_path'], 'oligotype_sets.png') html_dict['matrix_count_oligo_sets_file_path'] = copy_as(run_info_dict['matrix_count_oligo_sets_file_path'], 'matrix_counts_oligo_sets.txt') html_dict['matrix_percent_oligo_sets_file_path'] = copy_as(run_info_dict['matrix_percent_oligo_sets_file_path'], 'matrix_percents_oligo_sets.txt') html_dict['oligotype_sets_file'] = copy_as(run_info_dict['oligotype_sets_file_path'], 'oligotype_sets.txt') html_dict['oligotype_sets'] = [l.strip().split('\t')[1].split(',') for l in open(run_info_dict['oligotype_sets_file_path'])] if html_dict.has_key('representative_seqs_fasta_file_path'): html_dict['representative_seqs_fasta_file_path'] = copy_as(run_info_dict['representative_seqs_fasta_file_path'], 'oligo-representatives.fa.txt') else: html_dict['representative_seqs_fasta_file_path'] = None if run_info_dict.has_key('blast_ref_db') and os.path.exists(run_info_dict['blast_ref_db']): html_dict['blast_ref_db_path'] = copy_as(run_info_dict['blast_ref_db'], 'reference_db.fa') html_dict['entropy_components'] = [int(x) for x in html_dict['bases_of_interest_locs'].split(',')] html_dict['samples_dict'] = get_samples_dict_from_environment_file(run_info_dict['environment_file_path']) html_dict['samples'] = sorted(html_dict['samples_dict'].keys()) html_dict['blast_results_found'] = False # get alignment length html_dict['alignment_length'] = get_alignment_length(run_info_dict['alignment']) # include pretty names html_dict['pretty_names'] = pretty_names # get colors dict html_dict['color_dict'] = get_colors_dict(run_info_dict['colors_file_path']) # get abundant oligos list html_dict['oligos'] = get_oligos_list(run_info_dict['oligos_fasta_file_path']) # get oligo frequencies html_dict['frequency'] = {} for oligo in html_dict['oligos']: html_dict['frequency'][oligo] = pretty_print(sum([d[oligo] for d in html_dict['samples_dict'].values() if d.has_key(oligo)])) # get unique sequence dict (which will contain the most frequent unique sequence for given oligotype) if html_dict.has_key('output_directory_for_reps'): html_dict['rep_oligo_seqs_clean_dict'], html_dict['rep_oligo_seqs_fancy_dict'] = get_unique_sequences_dict(html_dict) html_dict['oligo_reps_dict'] = get_oligo_reps_dict(html_dict, html_output_directory) html_dict['component_reference'] = ''.join(['<a onmouseover="popup(\'\#%d\', 50)" href="">|</a>' % i for i in range(0, html_dict['alignment_length'])]) # get javascript code for sample pie-charts html_dict['pie_charts_js'] = render_to_string('pie_charts_js.tmpl', html_dict) # FIXME: code below is very inefficient and causes a huge # memory issue. fix it by not using deepcopy. # generate individual oligotype pages if html_dict.has_key('output_directory_for_reps'): for i in range(0, len(html_dict['oligos'])): oligo = html_dict['oligos'][i] tmp_dict = copy.deepcopy(html_dict) tmp_dict['oligo'] = oligo tmp_dict['distribution'] = get_oligo_distribution_dict(oligo, html_dict) oligo_page = os.path.join(html_output_directory, 'oligo_%s.html' % oligo) tmp_dict['index'] = i + 1 tmp_dict['total'] = len(html_dict['oligos']) tmp_dict['prev'] = None tmp_dict['next'] = None if i > 0: tmp_dict['prev'] = 'oligo_%s.html' % html_dict['oligos'][i - 1] if i < (len(html_dict['oligos']) - 1): tmp_dict['next'] = 'oligo_%s.html' % html_dict['oligos'][i + 1] rendered = render_to_string('single_oligo.tmpl', tmp_dict) open(oligo_page, 'w').write(rendered.encode("utf-8")) # generate index index_page = os.path.join(html_output_directory, 'index.html') rendered = render_to_string('index_for_oligo.tmpl', html_dict) open(index_page, 'w').write(rendered.encode("utf-8")) return index_page
def generate_html_output(run_info_dict, html_output_directory=None, entropy_figure=None): if not html_output_directory: html_output_directory = os.path.join(run_info_dict['output_directory'], 'HTML-OUTPUT') if not os.path.exists(html_output_directory): os.makedirs(html_output_directory) html_dict = copy.deepcopy(run_info_dict) shutil.copy2(os.path.join(absolute, 'static/style.css'), os.path.join(html_output_directory, 'style.css')) shutil.copy2(os.path.join(absolute, 'static/header_1.png'), os.path.join(html_output_directory, 'header.png')) shutil.copy2(os.path.join(absolute, 'static/missing_image.png'), os.path.join(html_output_directory, 'missing.png')) shutil.copy2(os.path.join(absolute, 'static/colorbar.png'), os.path.join(html_output_directory, 'colorbar.png')) shutil.copy2(os.path.join(absolute, 'scripts/jquery-1.7.1.js'), os.path.join(html_output_directory, 'jquery-1.7.1.js')) shutil.copy2(os.path.join(absolute, 'scripts/popup.js'), os.path.join(html_output_directory, 'popup.js')) shutil.copy2(os.path.join(absolute, 'scripts/g.pie.js'), os.path.join(html_output_directory, 'g.pie.js')) shutil.copy2(os.path.join(absolute, 'scripts/g.raphael.js'), os.path.join(html_output_directory, 'g.raphael.js')) shutil.copy2(os.path.join(absolute, 'scripts/raphael.js'), os.path.join(html_output_directory, 'raphael.js')) shutil.copy2(os.path.join(absolute, 'scripts/morris.js'), os.path.join(html_output_directory, 'morris.js')) def copy_as(source, dest_name, essential=True): dest = os.path.join(html_output_directory, dest_name) if essential: shutil.copy2(source, dest) else: # it is ok if you fail to copy files that are not # essential.. try: shutil.copy2(source, dest) except: sys.stderr.write( '\n\n[HTML] Warning: Source file not found\n\tSource: "%s"\n\tDest: "%s\n\n"' % (source, dest)) return os.path.basename(dest) # embarrassingly ad-hoc: if entropy_figure: if entropy_figure.endswith('.pdf') or entropy_figure.endswith('.png'): entropy_figure = entropy_figure[:-4] CP = lambda e, o: copy_as(os.path.join(e + ('.%s' % ext)), o, essential=True if ext == 'png' else False) for ext in ['png', 'pdf']: output_file = 'entropy.%s' % ext if entropy_figure: html_dict['entropy_figure_%s' % ext] = CP(entropy_figure, output_file) else: try: html_dict['entropy_figure_%s' % ext] = CP( run_info_dict['entropy'], output_file) except: html_dict['entropy_figure_%s' % ext] = CP( run_info_dict['entropy'][:-4], output_file) if run_info_dict['gexf_network_file_path']: html_dict['gexf_network_file_path'] = copy_as( run_info_dict['gexf_network_file_path'], 'network.gexf') if run_info_dict['sample_mapping']: html_dict['sample_mapping'] = copy_as(run_info_dict['sample_mapping'], 'sample_mapping.txt') else: html_dict['sample_mapping'] = None html_dict['matrix_count_file_path'] = copy_as( run_info_dict['matrix_count_file_path'], 'matrix_counts.txt') html_dict['matrix_percent_file_path'] = copy_as( run_info_dict['matrix_percent_file_path'], 'matrix_percents.txt') html_dict['read_distribution_table_path'] = copy_as( run_info_dict['read_distribution_table_path'], 'read_distribution.txt') html_dict['environment_file_path'] = copy_as( run_info_dict['environment_file_path'], 'environment.txt') html_dict['oligos_fasta_file_path'] = copy_as( run_info_dict['oligos_fasta_file_path'], 'oligos.fa.txt') html_dict['oligos_nexus_file_path'] = copy_as( run_info_dict['oligos_nexus_file_path'], 'oligos.nex.txt') def get_figures_dict(html_dict_prefix): html_dict_key = '%s_file_path' % html_dict_prefix if html_dict.has_key(html_dict_key): figures_dict = cPickle.load(open(html_dict[html_dict_key])) for _map in figures_dict: for _func in figures_dict[_map]: for _op in figures_dict[_map][_func]: if os.path.exists(figures_dict[_map][_func][_op] + '.pdf') and os.path.exists( figures_dict[_map][_func][_op] + '.png'): prefix = copy_as( figures_dict[_map][_func][_op] + '.pdf', '%s.pdf' % '-'.join([_map, _func, _op])) prefix = copy_as( figures_dict[_map][_func][_op] + '.png', '%s.png' % '-'.join([_map, _func, _op])) figures_dict[_map][_func][_op] = '.'.join( prefix.split('.')[:-1]) else: figures_dict[_map][_func][_op] = None return figures_dict else: return None html_dict['figures_dict'] = get_figures_dict('figures_dict') html_dict['exclusive_figures_dict'] = get_figures_dict( 'exclusive_figures_dict') if html_dict['generate_sets']: html_dict['across_samples_MN_file_path'] = copy_as( run_info_dict['across_samples_MN_file_path'], 'across_samples_max_normalized.txt') html_dict['across_samples_SN_file_path'] = copy_as( run_info_dict['across_samples_SN_file_path'], 'across_samples_sum_normalized.txt') html_dict['oligo_sets_stackbar_figure'] = copy_as( run_info_dict['stack_bar_with_agglomerated_oligos_file_path'], 'stackbar_with_oligo_sets.png') html_dict['oligos_across_samples_figure'] = copy_as( run_info_dict['oligos_across_samples_file_path'], 'oligos_across_samples.png') html_dict['oligotype_sets_figure'] = copy_as( run_info_dict['oligotype_sets_across_samples_figure_path'], 'oligotype_sets.png') html_dict['matrix_count_oligo_sets_file_path'] = copy_as( run_info_dict['matrix_count_oligo_sets_file_path'], 'matrix_counts_oligo_sets.txt') html_dict['matrix_percent_oligo_sets_file_path'] = copy_as( run_info_dict['matrix_percent_oligo_sets_file_path'], 'matrix_percents_oligo_sets.txt') html_dict['oligotype_sets_file'] = copy_as( run_info_dict['oligotype_sets_file_path'], 'oligotype_sets.txt') html_dict['oligotype_sets'] = [ l.strip().split('\t')[1].split(',') for l in open(run_info_dict['oligotype_sets_file_path']) ] if html_dict.has_key('representative_seqs_fasta_file_path'): html_dict['representative_seqs_fasta_file_path'] = copy_as( run_info_dict['representative_seqs_fasta_file_path'], 'oligo-representatives.fa.txt') else: html_dict['representative_seqs_fasta_file_path'] = None if run_info_dict.has_key('blast_ref_db') and os.path.exists( run_info_dict['blast_ref_db']): html_dict['blast_ref_db_path'] = copy_as(run_info_dict['blast_ref_db'], 'reference_db.fa') html_dict['entropy_components'] = [ int(x) for x in html_dict['bases_of_interest_locs'].split(',') ] html_dict['samples_dict'] = get_samples_dict_from_environment_file( run_info_dict['environment_file_path']) html_dict['samples'] = sorted(html_dict['samples_dict'].keys()) html_dict['blast_results_found'] = False # get alignment length html_dict['alignment_length'] = get_alignment_length( run_info_dict['alignment']) # include pretty names html_dict['pretty_names'] = pretty_names # get purity score colors dict html_dict['score_color_dict'] = {} gradient = get_list_of_colors(26, colormap='RdYlGn') for oligo in run_info_dict['final_purity_score_dict']: html_dict['score_color_dict'][oligo] = gradient[int( run_info_dict['final_purity_score_dict'][oligo] * 25)] # get total purity score color dict html_dict['total_score_color'] = gradient[int( float(run_info_dict['total_purity_score_dict']) * 25)] # get colors dict html_dict['color_dict'] = get_colors_dict( run_info_dict['colors_file_path']) # get abundant oligos list html_dict['oligos'] = get_oligos_list( run_info_dict['oligos_fasta_file_path']) # get oligo frequencies html_dict['frequency'] = {} for oligo in html_dict['oligos']: html_dict['frequency'][oligo] = pretty_print( sum([ d[oligo] for d in html_dict['samples_dict'].values() if d.has_key(oligo) ])) # get purity score html_dict['purity_score'] = run_info_dict['final_purity_score_dict'] # get total purity score html_dict['total_purity_score'] = run_info_dict['total_purity_score_dict'] # get unique sequence dict (which will contain the most frequent unique sequence for given oligotype) if html_dict.has_key('output_directory_for_reps'): html_dict['rep_oligo_seqs_clean_dict'], html_dict[ 'rep_oligo_seqs_fancy_dict'] = get_unique_sequences_dict(html_dict) html_dict['oligo_reps_dict'] = get_oligo_reps_dict( html_dict, html_output_directory) html_dict['component_reference'] = ''.join([ '<a onmouseover="popup(\'\#%d\', 50)" href="">|</a>' % i for i in range(0, html_dict['alignment_length']) ]) # get javascript code for sample pie-charts html_dict['pie_charts_js'] = render_to_string('pie_charts_js.tmpl', html_dict) # FIXME: code below is very inefficient and causes a huge # memory issue. fix it by not using deepcopy. # generate individual oligotype pages if html_dict.has_key('output_directory_for_reps'): for i in range(0, len(html_dict['oligos'])): oligo = html_dict['oligos'][i] tmp_dict = copy.deepcopy(html_dict) tmp_dict['oligo'] = oligo tmp_dict['distribution'] = get_oligo_distribution_dict( oligo, html_dict) oligo_page = os.path.join(html_output_directory, 'oligo_%s.html' % oligo) tmp_dict['index'] = i + 1 tmp_dict['total'] = len(html_dict['oligos']) tmp_dict['prev'] = None tmp_dict['next'] = None if i > 0: tmp_dict['prev'] = 'oligo_%s.html' % html_dict['oligos'][i - 1] if i < (len(html_dict['oligos']) - 1): tmp_dict['next'] = 'oligo_%s.html' % html_dict['oligos'][i + 1] rendered = render_to_string('single_oligo.tmpl', tmp_dict) open(oligo_page, 'w').write(rendered.encode("utf-8")) # generate index index_page = os.path.join(html_output_directory, 'index.html') rendered = render_to_string('index_for_oligo.tmpl', html_dict) open(index_page, 'w').write(rendered.encode("utf-8")) return index_page
import sys from Oligotyping.utils.utils import get_samples_dict_from_environment_file from Oligotyping.utils.utils import get_oligos_sorted_by_abundance from Oligotyping.utils.utils import get_units_across_samples_dicts from Oligotyping.utils.utils import get_unit_counts_and_percents from Oligotyping.utils.cosine_similarity import get_oligotype_sets from Oligotyping.utils.cosine_similarity import get_oligotype_sets_greedy from Oligotyping.visualization.oligotype_distribution_stack_bar import oligotype_distribution_stack_bar from Oligotyping.utils.utils import generate_ENVIRONMENT_file input_file_path = sys.argv[1] cosine_similarity_value = float(sys.argv[2]) sets_output_file_name = input_file_path + '-cos-%s-SETS' % cosine_similarity_value environ_output_file_name = input_file_path + '-cos-%s-SETS-ENVIRON' % cosine_similarity_value samples_dict = get_samples_dict_from_environment_file(input_file_path) oligos = get_oligos_sorted_by_abundance(samples_dict) unit_counts, unit_percents = get_unit_counts_and_percents(oligos, samples_dict) samples = samples_dict.keys() across_samples_sum_normalized, across_samples_max_normalized = get_units_across_samples_dicts( oligos, samples_dict.keys(), unit_percents) oligotype_sets = get_oligotype_sets_greedy(oligos, across_samples_sum_normalized, cosine_similarity_value, sets_output_file_name) print '%d sets from %d units' % (len(oligotype_sets), len(oligos)) samples_dict_with_agglomerated_oligos = {}
parser.add_argument('--output-file', default = None, metavar = 'OUTPUT_FILE',\ help = 'File name for the figure to be stored. File name\ must end with "png", "jpg", or "tiff".' ) parser.add_argument('--legend', action='store_true', default=False, help='Turn on legend') parser.add_argument('--colors-export', metavar='COLORS_LIST_FILE', help='Store the color list into a file') parser.add_argument('--project-title', default = None, metavar = 'PROJECT_TITLE',\ help = 'Project name for the samples.') args = parser.parse_args() samples_dict = get_samples_dict_from_environment_file( args.environment_file) if args.colors_file: colors_dict = {} for oligotype, color in [ line.strip().split('\t') for line in open(args.colors_file).readlines() ]: colors_dict[oligotype] = color elif args.color_list_file: colors_dict = {} colors = [ line.strip() for line in open(args.color_list_file).readlines() ] oligos = get_oligos_sorted_by_abundance(samples_dict, None) oligos.reverse()
# -*- coding: utf-8 -*- # takes an environment file and a generates matching percent and count matrices. import sys from Oligotyping.utils.utils import get_samples_dict_from_environment_file from Oligotyping.utils.utils import get_oligos_sorted_by_abundance from Oligotyping.utils.utils import get_units_across_samples_dicts from Oligotyping.utils.utils import get_unit_counts_and_percents from Oligotyping.utils.utils import generate_MATRIX_files samples_dict = get_samples_dict_from_environment_file(sys.argv[1]) oligos = get_oligos_sorted_by_abundance(samples_dict) oligos.reverse() unit_counts, unit_percents = get_unit_counts_and_percents(oligos, samples_dict) samples = sorted(samples_dict.keys()) generate_MATRIX_files(oligos, samples, unit_counts, unit_percents, sys.argv[1] + '-MATRIX-COUNT', sys.argv[1] + '-MATRIX-PERCENT')
# -*- coding: utf-8 -*- # takes an environment file and a generates matching percent and count matrices. import sys from Oligotyping.utils.utils import get_samples_dict_from_environment_file from Oligotyping.utils.utils import get_oligos_sorted_by_abundance from Oligotyping.utils.utils import get_units_across_samples_dicts from Oligotyping.utils.utils import get_unit_counts_and_percents from Oligotyping.utils.utils import generate_MATRIX_files samples_dict = get_samples_dict_from_environment_file(sys.argv[1]) oligos = get_oligos_sorted_by_abundance(samples_dict) oligos.reverse() unit_counts, unit_percents = get_unit_counts_and_percents(oligos, samples_dict) samples = sorted(samples_dict.keys()) generate_MATRIX_files(oligos, samples, unit_counts, unit_percents, sys.argv[1] + '-MATRIX-COUNT', sys.argv[1] + '-MATRIX-PERCENT')
# Please read the COPYING file. import os import sys import cPickle from Oligotyping.lib.decomposer import Decomposer from Oligotyping.utils.utils import get_samples_dict_from_environment_file runinfo = cPickle.load(open(sys.argv[1])) sample_mapping = sys.argv[2] decomposer = Decomposer() decomposer.matrix_percent_file_path = runinfo['matrix_percent_file_path'] decomposer.matrix_count_file_path = runinfo['matrix_count_file_path'] decomposer.tmp_directory = runinfo['tmp_directory'] decomposer.output_directory = runinfo['output_directory'] decomposer.figures_directory = os.path.join( os.path.dirname(runinfo['figures_directory']), 'FIGURES') if not os.path.exists(decomposer.tmp_directory): os.makedirs(decomposer.tmp_directory) decomposer.sample_mapping = sample_mapping decomposer._init_logger('exclusive_figures.log') decomposer.samples_dict = get_samples_dict_from_environment_file( runinfo['environment_file_path']) decomposer.samples = sorted(decomposer.samples_dict.keys()) decomposer._generate_exclusive_figures()
parser.add_argument('--color-list-file', metavar = 'COLORS_FILE', default = None,\ help = 'Single column file that contains a list of colors') parser.add_argument('--output-file', default = None, metavar = 'OUTPUT_FILE',\ help = 'File name for the figure to be stored. File name\ must end with "png", "jpg", or "tiff".') parser.add_argument('--legend', action = 'store_true', default = False, help = 'Turn on legend') parser.add_argument('--colors-export', metavar = 'COLORS_LIST_FILE', help = 'Store the color list into a file') parser.add_argument('--project-title', default = None, metavar = 'PROJECT_TITLE',\ help = 'Project name for the samples.') args = parser.parse_args() samples_dict = get_samples_dict_from_environment_file(args.environment_file) if args.colors_file: colors_dict = {} for oligotype, color in [line.strip().split('\t') for line in open(args.colors_file).readlines()]: colors_dict[oligotype] = color elif args.color_list_file: colors_dict = {} colors = [line.strip() for line in open(args.color_list_file).readlines()] oligos = get_oligos_sorted_by_abundance(samples_dict, None) oligos.reverse() if len(oligos) > len(colors): sys.stderr.write('Error: number of colors in file is less than number of oligos. Quiting.\n') sys.exit() for oligo in oligos: colors_dict[oligo] = colors[oligos.index(oligo)]
from Oligotyping.utils.utils import get_samples_dict_from_environment_file from Oligotyping.utils.utils import get_oligos_sorted_by_abundance from Oligotyping.utils.utils import get_units_across_samples_dicts from Oligotyping.utils.utils import get_unit_counts_and_percents from Oligotyping.utils.cosine_similarity import get_oligotype_sets from Oligotyping.utils.cosine_similarity import get_oligotype_sets_greedy from Oligotyping.visualization.oligotype_distribution_stack_bar import oligotype_distribution_stack_bar from Oligotyping.utils.utils import generate_ENVIRONMENT_file input_file_path = sys.argv[1] cosine_similarity_value = float(sys.argv[2]) sets_output_file_name = input_file_path + '-cos-%s-SETS' % cosine_similarity_value environ_output_file_name = input_file_path + '-cos-%s-SETS-ENVIRON' % cosine_similarity_value samples_dict = get_samples_dict_from_environment_file(input_file_path) oligos = get_oligos_sorted_by_abundance(samples_dict) unit_counts, unit_percents = get_unit_counts_and_percents(oligos, samples_dict) samples = samples_dict.keys() across_samples_sum_normalized, across_samples_max_normalized = get_units_across_samples_dicts(oligos, samples_dict.keys(), unit_percents) oligotype_sets = get_oligotype_sets_greedy(oligos, across_samples_sum_normalized, cosine_similarity_value, sets_output_file_name) print '%d sets from %d units' % (len(oligotype_sets), len(oligos)) samples_dict_with_agglomerated_oligos = {} for sample in samples:
def generate_html_output(run_info_dict, html_output_directory = None): if not html_output_directory: html_output_directory = os.path.join(run_info_dict['output_directory'], 'HTML-OUTPUT') if not os.path.exists(html_output_directory): os.makedirs(html_output_directory) html_dict = copy.deepcopy(run_info_dict) shutil.copy2(os.path.join(absolute, 'static/style.css'), os.path.join(html_output_directory, 'style.css')) shutil.copy2(os.path.join(absolute, 'static/header_2.png'), os.path.join(html_output_directory, 'header.png')) shutil.copy2(os.path.join(absolute, 'static/missing_image.png'), os.path.join(html_output_directory, 'missing.png')) shutil.copy2(os.path.join(absolute, 'static/colorbar.png'), os.path.join(html_output_directory, 'colorbar.png')) def copy_as(source, dest_name): dest = os.path.join(html_output_directory, dest_name) try: shutil.copy2(source, dest) except: if source.endswith('png'): shutil.copy2(os.path.join(absolute, 'static/missing_image.png'), dest) return os.path.basename(dest) html_dict['matrix_count_file_path'] = copy_as(run_info_dict['matrix_count_file_path'], 'matrix_counts.txt') html_dict['matrix_percent_file_path'] = copy_as(run_info_dict['matrix_percent_file_path'], 'matrix_percents.txt') html_dict['environment_file_path'] = copy_as(run_info_dict['environment_file_path'], 'environment.txt') html_dict['read_distribution_table_path'] = copy_as(run_info_dict['read_distribution_table_path'], 'read_distribution.txt') def get_figures_dict(html_dict_prefix): html_dict_key = '%s_file_path' % html_dict_prefix if html_dict.has_key(html_dict_key): figures_dict = cPickle.load(open(html_dict[html_dict_key])) for _map in figures_dict: for _func in figures_dict[_map]: for _op in figures_dict[_map][_func]: if os.path.exists(figures_dict[_map][_func][_op] + '.pdf') or os.path.exists(figures_dict[_map][_func][_op] + '.png'): prefix = copy_as(figures_dict[_map][_func][_op] + '.png', '%s.png' % '-'.join([_map, _func, _op])) prefix = copy_as(figures_dict[_map][_func][_op] + '.pdf', '%s.pdf' % '-'.join([_map, _func, _op])) figures_dict[_map][_func][_op] = '.'.join(prefix.split('.')[:-1]) else: figures_dict[_map][_func][_op] = None return figures_dict else: return None html_dict['figures_dict'] = get_figures_dict('figures_dict') html_dict['exclusive_figures_dict'] = get_figures_dict('exclusive_figures_dict') if html_dict.has_key('node_representatives_file_path'): html_dict['node_representatives_file_path'] = copy_as(run_info_dict['node_representatives_file_path'], 'node-representatives.fa.txt') else: html_dict['node_representatives_file_path'] = None if run_info_dict.has_key('blast_ref_db') and os.path.exists(run_info_dict['blast_ref_db']): html_dict['blast_ref_db_path'] = copy_as(run_info_dict['blast_ref_db'], 'reference_db.fa') if run_info_dict['sample_mapping']: html_dict['sample_mapping'] = copy_as(run_info_dict['sample_mapping'], 'sample_mapping.txt') else: html_dict['sample_mapping'] = None if run_info_dict['gexf_network_file_path']: html_dict['gexf_network_file_path'] = copy_as(run_info_dict['gexf_network_file_path'], 'network.gexf') if run_info_dict['topology_gexf']: html_dict['topology_gexf'] = copy_as(run_info_dict['topology_gexf'], 'topology.gexf') html_dict['samples_dict'] = get_samples_dict_from_environment_file(run_info_dict['environment_file_path']) html_dict['samples'] = sorted(html_dict['samples_dict'].keys()) html_dict['blast_results_found'] = False # include pretty names html_dict['pretty_names'] = pretty_names # get javascript code for sample pie-charts html_dict['pie_charts_js'] = render_to_string('pie_charts_js.tmpl', html_dict) # generate index index_page = os.path.join(html_output_directory, 'index.html') rendered = render_to_string('index_for_decomposition.tmpl', html_dict) open(index_page, 'w').write(rendered.encode("utf-8")) return index_page