def get_colors(number_of_colors): return get_list_of_colors(number_of_colors, colormap="Dark2")
def entropy_distribution_bar(alignment, entropy_values, output_file, quick = False, no_display = False, qual_stats_dict = None, weighted = False, verbose = False): progress.verbose = verbose progress.new('Entropy Distribution Figure') progress.update('Computing ') y_maximum = max(entropy_values) + (max(entropy_values) / 10.0) y_maximum = 1 if y_maximum < 1 else y_maximum number_of_uniques_to_show = int(y_maximum * 100) if alignment == None: quick = True colors_dict = {} if not quick: unique_sequences = get_unique_sequences_from_FASTA(alignment, limit = number_of_uniques_to_show) chars = [] for seq in unique_sequences: chars += seq[0] chars = set(chars) colors_dict = NUCL_COLORS missing_chars = [char for char in chars if char not in NUCL_COLORS.keys()] if missing_chars: colors_for_missing_chars = get_list_of_colors(len(missing_chars), colormap="RdYlGn") for i in range(0, len(missing_chars)): char = missing_chars[i] colors_dict[char] = colors_for_missing_chars[i] else: unique_sequences = None fig = plt.figure(figsize = (len(entropy_values) / 20, 10)) plt.rcParams.update({'axes.linewidth' : 0.1}) plt.rc('grid', color='0.70', linestyle='-', linewidth=0.1) plt.grid(True) plt.subplots_adjust(hspace = 0, wspace = 0, right = 0.995, left = 0.050, top = 0.92, bottom = 0.10) ax = fig.add_subplot(111) if not quick: current = 0 for y in range(number_of_uniques_to_show - 1, 0, -3): progress.append('.') unique_sequence = unique_sequences[current][0].upper() count = unique_sequences[current][1] frequency = unique_sequences[current][2] for i in range(0, len(unique_sequence)): plt.text(i, y / 100.0, unique_sequence[i],\ fontsize = 5, color = colors_dict[unique_sequence[i]]) percent = int(round(frequency * len(unique_sequence))) or 1 plt.fill_between(range(0, percent), (y + 1.15) / 100.0, (y - 0.85) / 100.0, color="green", alpha = 0.2) plt.text(percent + 0.8, (y - 1.2) / 100.0, count, fontsize = 5, color = 'gray') current += 1 if current + 1 > len(unique_sequences): break if not quick and qual_stats_dict: # add mean quality values in the background of the figure. colors = get_list_of_colors(21, colormap="RdYlGn") colors = [colors[0] for _ in range(0, 20)] + colors max_count = max([qual_stats_dict[q]['count'] for q in qual_stats_dict if qual_stats_dict[q]]) for pos in range(0, len(entropy_values)): if not qual_stats_dict[pos]: continue mean = int(round(qual_stats_dict[pos]['mean'])) count = qual_stats_dict[pos]['count'] plt.fill_between([pos, pos + 1], y1 = 0, y2 = y_maximum, color = colors[mean], alpha = (log(count) / log(max_count)) / 5) ind = np.arange(len(entropy_values)) ax.bar(ind, entropy_values, color = 'black', lw = 0.5) ax.set_xlim([0, len(entropy_values)]) ax.set_ylim([0, y_maximum]) plt.xlabel('Position in the Alignment') if weighted: plt.ylabel('Weighted Shannon Entropy') else: plt.ylabel('Shannon Entropy') progress.update('Saving into "%s"' % output_file) plt.savefig(output_file + '.png') plt.savefig(output_file + '.pdf') if verbose: progress.reset() run.info('Entropy figure output path', output_file + '.{png, pdf}') if not no_display: try: progress.update('Entropy figure is being shown (you do not have display? you can avoid this step by using --no-display))') plt.show() except: pass progress.end()
def generate_html_output(run_info_dict, html_output_directory=None, entropy_figure=None): if not html_output_directory: html_output_directory = os.path.join(run_info_dict['output_directory'], 'HTML-OUTPUT') if not os.path.exists(html_output_directory): os.makedirs(html_output_directory) html_dict = copy.deepcopy(run_info_dict) shutil.copy2(os.path.join(absolute, 'static/style.css'), os.path.join(html_output_directory, 'style.css')) shutil.copy2(os.path.join(absolute, 'static/header_1.png'), os.path.join(html_output_directory, 'header.png')) shutil.copy2(os.path.join(absolute, 'static/missing_image.png'), os.path.join(html_output_directory, 'missing.png')) shutil.copy2(os.path.join(absolute, 'static/colorbar.png'), os.path.join(html_output_directory, 'colorbar.png')) shutil.copy2(os.path.join(absolute, 'scripts/jquery-1.7.1.js'), os.path.join(html_output_directory, 'jquery-1.7.1.js')) shutil.copy2(os.path.join(absolute, 'scripts/popup.js'), os.path.join(html_output_directory, 'popup.js')) shutil.copy2(os.path.join(absolute, 'scripts/g.pie.js'), os.path.join(html_output_directory, 'g.pie.js')) shutil.copy2(os.path.join(absolute, 'scripts/g.raphael.js'), os.path.join(html_output_directory, 'g.raphael.js')) shutil.copy2(os.path.join(absolute, 'scripts/raphael.js'), os.path.join(html_output_directory, 'raphael.js')) shutil.copy2(os.path.join(absolute, 'scripts/morris.js'), os.path.join(html_output_directory, 'morris.js')) def copy_as(source, dest_name, essential=True): dest = os.path.join(html_output_directory, dest_name) if essential: shutil.copy2(source, dest) else: # it is ok if you fail to copy files that are not # essential.. try: shutil.copy2(source, dest) except: sys.stderr.write( '\n\n[HTML] Warning: Source file not found\n\tSource: "%s"\n\tDest: "%s\n\n"' % (source, dest)) return os.path.basename(dest) # embarrassingly ad-hoc: if entropy_figure: if entropy_figure.endswith('.pdf') or entropy_figure.endswith('.png'): entropy_figure = entropy_figure[:-4] CP = lambda e, o: copy_as(os.path.join(e + ('.%s' % ext)), o, essential=True if ext == 'png' else False) for ext in ['png', 'pdf']: output_file = 'entropy.%s' % ext if entropy_figure: html_dict['entropy_figure_%s' % ext] = CP(entropy_figure, output_file) else: try: html_dict['entropy_figure_%s' % ext] = CP( run_info_dict['entropy'], output_file) except: html_dict['entropy_figure_%s' % ext] = CP( run_info_dict['entropy'][:-4], output_file) if run_info_dict['gexf_network_file_path']: html_dict['gexf_network_file_path'] = copy_as( run_info_dict['gexf_network_file_path'], 'network.gexf') if run_info_dict['sample_mapping']: html_dict['sample_mapping'] = copy_as(run_info_dict['sample_mapping'], 'sample_mapping.txt') else: html_dict['sample_mapping'] = None html_dict['matrix_count_file_path'] = copy_as( run_info_dict['matrix_count_file_path'], 'matrix_counts.txt') html_dict['matrix_percent_file_path'] = copy_as( run_info_dict['matrix_percent_file_path'], 'matrix_percents.txt') html_dict['read_distribution_table_path'] = copy_as( run_info_dict['read_distribution_table_path'], 'read_distribution.txt') html_dict['environment_file_path'] = copy_as( run_info_dict['environment_file_path'], 'environment.txt') html_dict['oligos_fasta_file_path'] = copy_as( run_info_dict['oligos_fasta_file_path'], 'oligos.fa.txt') html_dict['oligos_nexus_file_path'] = copy_as( run_info_dict['oligos_nexus_file_path'], 'oligos.nex.txt') def get_figures_dict(html_dict_prefix): html_dict_key = '%s_file_path' % html_dict_prefix if html_dict.has_key(html_dict_key): figures_dict = cPickle.load(open(html_dict[html_dict_key])) for _map in figures_dict: for _func in figures_dict[_map]: for _op in figures_dict[_map][_func]: if os.path.exists(figures_dict[_map][_func][_op] + '.pdf') and os.path.exists( figures_dict[_map][_func][_op] + '.png'): prefix = copy_as( figures_dict[_map][_func][_op] + '.pdf', '%s.pdf' % '-'.join([_map, _func, _op])) prefix = copy_as( figures_dict[_map][_func][_op] + '.png', '%s.png' % '-'.join([_map, _func, _op])) figures_dict[_map][_func][_op] = '.'.join( prefix.split('.')[:-1]) else: figures_dict[_map][_func][_op] = None return figures_dict else: return None html_dict['figures_dict'] = get_figures_dict('figures_dict') html_dict['exclusive_figures_dict'] = get_figures_dict( 'exclusive_figures_dict') if html_dict['generate_sets']: html_dict['across_samples_MN_file_path'] = copy_as( run_info_dict['across_samples_MN_file_path'], 'across_samples_max_normalized.txt') html_dict['across_samples_SN_file_path'] = copy_as( run_info_dict['across_samples_SN_file_path'], 'across_samples_sum_normalized.txt') html_dict['oligo_sets_stackbar_figure'] = copy_as( run_info_dict['stack_bar_with_agglomerated_oligos_file_path'], 'stackbar_with_oligo_sets.png') html_dict['oligos_across_samples_figure'] = copy_as( run_info_dict['oligos_across_samples_file_path'], 'oligos_across_samples.png') html_dict['oligotype_sets_figure'] = copy_as( run_info_dict['oligotype_sets_across_samples_figure_path'], 'oligotype_sets.png') html_dict['matrix_count_oligo_sets_file_path'] = copy_as( run_info_dict['matrix_count_oligo_sets_file_path'], 'matrix_counts_oligo_sets.txt') html_dict['matrix_percent_oligo_sets_file_path'] = copy_as( run_info_dict['matrix_percent_oligo_sets_file_path'], 'matrix_percents_oligo_sets.txt') html_dict['oligotype_sets_file'] = copy_as( run_info_dict['oligotype_sets_file_path'], 'oligotype_sets.txt') html_dict['oligotype_sets'] = [ l.strip().split('\t')[1].split(',') for l in open(run_info_dict['oligotype_sets_file_path']) ] if html_dict.has_key('representative_seqs_fasta_file_path'): html_dict['representative_seqs_fasta_file_path'] = copy_as( run_info_dict['representative_seqs_fasta_file_path'], 'oligo-representatives.fa.txt') else: html_dict['representative_seqs_fasta_file_path'] = None if run_info_dict.has_key('blast_ref_db') and os.path.exists( run_info_dict['blast_ref_db']): html_dict['blast_ref_db_path'] = copy_as(run_info_dict['blast_ref_db'], 'reference_db.fa') html_dict['entropy_components'] = [ int(x) for x in html_dict['bases_of_interest_locs'].split(',') ] html_dict['samples_dict'] = get_samples_dict_from_environment_file( run_info_dict['environment_file_path']) html_dict['samples'] = sorted(html_dict['samples_dict'].keys()) html_dict['blast_results_found'] = False # get alignment length html_dict['alignment_length'] = get_alignment_length( run_info_dict['alignment']) # include pretty names html_dict['pretty_names'] = pretty_names # get purity score colors dict html_dict['score_color_dict'] = {} gradient = get_list_of_colors(26, colormap='RdYlGn') for oligo in run_info_dict['final_purity_score_dict']: html_dict['score_color_dict'][oligo] = gradient[int( run_info_dict['final_purity_score_dict'][oligo] * 25)] # get total purity score color dict html_dict['total_score_color'] = gradient[int( float(run_info_dict['total_purity_score_dict']) * 25)] # get colors dict html_dict['color_dict'] = get_colors_dict( run_info_dict['colors_file_path']) # get abundant oligos list html_dict['oligos'] = get_oligos_list( run_info_dict['oligos_fasta_file_path']) # get oligo frequencies html_dict['frequency'] = {} for oligo in html_dict['oligos']: html_dict['frequency'][oligo] = pretty_print( sum([ d[oligo] for d in html_dict['samples_dict'].values() if d.has_key(oligo) ])) # get purity score html_dict['purity_score'] = run_info_dict['final_purity_score_dict'] # get total purity score html_dict['total_purity_score'] = run_info_dict['total_purity_score_dict'] # get unique sequence dict (which will contain the most frequent unique sequence for given oligotype) if html_dict.has_key('output_directory_for_reps'): html_dict['rep_oligo_seqs_clean_dict'], html_dict[ 'rep_oligo_seqs_fancy_dict'] = get_unique_sequences_dict(html_dict) html_dict['oligo_reps_dict'] = get_oligo_reps_dict( html_dict, html_output_directory) html_dict['component_reference'] = ''.join([ '<a onmouseover="popup(\'\#%d\', 50)" href="">|</a>' % i for i in range(0, html_dict['alignment_length']) ]) # get javascript code for sample pie-charts html_dict['pie_charts_js'] = render_to_string('pie_charts_js.tmpl', html_dict) # FIXME: code below is very inefficient and causes a huge # memory issue. fix it by not using deepcopy. # generate individual oligotype pages if html_dict.has_key('output_directory_for_reps'): for i in range(0, len(html_dict['oligos'])): oligo = html_dict['oligos'][i] tmp_dict = copy.deepcopy(html_dict) tmp_dict['oligo'] = oligo tmp_dict['distribution'] = get_oligo_distribution_dict( oligo, html_dict) oligo_page = os.path.join(html_output_directory, 'oligo_%s.html' % oligo) tmp_dict['index'] = i + 1 tmp_dict['total'] = len(html_dict['oligos']) tmp_dict['prev'] = None tmp_dict['next'] = None if i > 0: tmp_dict['prev'] = 'oligo_%s.html' % html_dict['oligos'][i - 1] if i < (len(html_dict['oligos']) - 1): tmp_dict['next'] = 'oligo_%s.html' % html_dict['oligos'][i + 1] rendered = render_to_string('single_oligo.tmpl', tmp_dict) open(oligo_page, 'w').write(rendered.encode("utf-8")) # generate index index_page = os.path.join(html_output_directory, 'index.html') rendered = render_to_string('index_for_oligo.tmpl', html_dict) open(index_page, 'w').write(rendered.encode("utf-8")) return index_page
def entropy_distribution_bar(alignment, entropy_values, output_file, quick = False, no_display = False, qual_stats_dict = None, weighted = False, verbose = False): progress.verbose = verbose progress.new('Entropy Distribution Figure') progress.update('Computing ') y_maximum = max(entropy_values) + (max(entropy_values) / 10.0) y_maximum = 1 if y_maximum < 1 else y_maximum number_of_uniques_to_show = int(y_maximum * 100) if alignment == None: quick = True colors_dict = {} if not quick: unique_sequences = get_unique_sequences_from_FASTA(alignment, limit = number_of_uniques_to_show) chars = [] for seq in unique_sequences: chars += seq[0] chars = set(chars) colors_dict = NUCL_COLORS missing_chars = [char for char in chars if char not in NUCL_COLORS.keys()] if missing_chars: colors_for_missing_chars = get_list_of_colors(len(missing_chars), colormap="RdYlGn") for i in range(0, len(missing_chars)): char = missing_chars[i] colors_dict[char] = colors_for_missing_chars[i] else: unique_sequences = None fig = plt.figure(figsize = (len(entropy_values) / 20, 10)) plt.rcParams.update({'axes.linewidth' : 0.1}) plt.rc('grid', color='0.70', linestyle='-', linewidth=0.1) plt.grid(True) plt.subplots_adjust(hspace = 0, wspace = 0, right = 0.995, left = 0.050, top = 0.92, bottom = 0.10) ax = fig.add_subplot(111) if not quick: current = 0 for y in range(number_of_uniques_to_show - 1, 0, -3): progress.append('.') unique_sequence = unique_sequences[current][0].upper() count = unique_sequences[current][1] frequency = unique_sequences[current][2] for i in range(0, len(unique_sequence)): plt.text(i, y / 100.0, unique_sequence[i],\ fontsize = 5, color = colors_dict[unique_sequence[i]]) percent = int(round(frequency * len(unique_sequence))) or 1 plt.fill_between(range(0, percent), (y + 1.15) / 100.0, (y - 0.85) / 100.0, color="green", alpha = 0.2) plt.text(percent + 0.8, (y - 1.2) / 100.0, count, fontsize = 5, color = 'gray') current += 1 if current + 1 > len(unique_sequences): break if not quick and qual_stats_dict: # add mean quality values in the background of the figure. colors = get_list_of_colors(21, colormap="RdYlGn") colors = [colors[0] for _ in range(0, 20)] + colors max_count = max([qual_stats_dict[q]['count'] for q in qual_stats_dict if qual_stats_dict[q]]) for pos in range(0, len(entropy_values)): if not qual_stats_dict[pos]: continue mean = int(round(qual_stats_dict[pos]['mean'])) count = qual_stats_dict[pos]['count'] plt.fill_between([pos, pos + 1], y1 = 0, y2 = y_maximum, color = colors[mean], alpha = (log(count) / log(max_count)) / 5) ind = np.arange(len(entropy_values)) ax.bar(ind, entropy_values, color = 'black', lw = 0.5) ax.set_xlim([0, len(entropy_values)]) ax.set_ylim([0, y_maximum]) plt.xlabel('Position in the Alignment') if weighted: plt.ylabel('Weighted Shannon Entropy') else: plt.ylabel('Shannon Entropy') progress.update('Saving into "%s"' % output_file) plt.savefig(output_file + '.png') plt.savefig(output_file + '.pdf') if verbose: progress.clear() run.info('Entropy figure output path', output_file + '.{png, pdf}') if not no_display: try: progress.update('Entropy figure is being shown (you do not have display? you can avoid this step by using --no-display))') plt.show() except: pass progress.end()
def generate_html_output(run_info_dict, html_output_directory = None, entropy_figure = None): if not html_output_directory: html_output_directory = os.path.join(run_info_dict['output_directory'], 'HTML-OUTPUT') if not os.path.exists(html_output_directory): os.makedirs(html_output_directory) html_dict = copy.deepcopy(run_info_dict) shutil.copy2(os.path.join(absolute, 'static/style.css'), os.path.join(html_output_directory, 'style.css')) shutil.copy2(os.path.join(absolute, 'static/header_1.png'), os.path.join(html_output_directory, 'header.png')) shutil.copy2(os.path.join(absolute, 'static/missing_image.png'), os.path.join(html_output_directory, 'missing.png')) shutil.copy2(os.path.join(absolute, 'static/colorbar.png'), os.path.join(html_output_directory, 'colorbar.png')) shutil.copy2(os.path.join(absolute, 'scripts/jquery-1.7.1.js'), os.path.join(html_output_directory, 'jquery-1.7.1.js')) shutil.copy2(os.path.join(absolute, 'scripts/popup.js'), os.path.join(html_output_directory, 'popup.js')) shutil.copy2(os.path.join(absolute, 'scripts/g.pie.js'), os.path.join(html_output_directory, 'g.pie.js')) shutil.copy2(os.path.join(absolute, 'scripts/g.raphael.js'), os.path.join(html_output_directory, 'g.raphael.js')) shutil.copy2(os.path.join(absolute, 'scripts/raphael.js'), os.path.join(html_output_directory, 'raphael.js')) shutil.copy2(os.path.join(absolute, 'scripts/morris.js'), os.path.join(html_output_directory, 'morris.js')) def copy_as(source, dest_name, essential = True): dest = os.path.join(html_output_directory, dest_name) if essential: shutil.copy2(source, dest) else: # it is ok if you fail to copy files that are not # essential.. try: shutil.copy2(source, dest) except: sys.stderr.write('\n\n[HTML] Warning: Source file not found\n\tSource: "%s"\n\tDest: "%s\n\n"' % (source, dest)) return os.path.basename(dest) # embarrassingly ad-hoc: if entropy_figure: if entropy_figure.endswith('.pdf') or entropy_figure.endswith('.png'): entropy_figure = entropy_figure[:-4] CP = lambda e, o: copy_as(os.path.join(e + ('.%s' % ext)), o, essential = True if ext == 'png' else False) for ext in ['png', 'pdf']: output_file = 'entropy.%s' % ext if entropy_figure: html_dict['entropy_figure_%s' % ext] = CP(entropy_figure, output_file) else: try: html_dict['entropy_figure_%s' % ext] = CP(run_info_dict['entropy'], output_file) except: html_dict['entropy_figure_%s' % ext] = CP(run_info_dict['entropy'][:-4], output_file) if run_info_dict['gexf_network_file_path']: html_dict['gexf_network_file_path'] = copy_as(run_info_dict['gexf_network_file_path'], 'network.gexf') if run_info_dict['sample_mapping']: html_dict['sample_mapping'] = copy_as(run_info_dict['sample_mapping'], 'sample_mapping.txt') else: html_dict['sample_mapping'] = None html_dict['matrix_count_file_path'] = copy_as(run_info_dict['matrix_count_file_path'], 'matrix_counts.txt') html_dict['matrix_percent_file_path'] = copy_as(run_info_dict['matrix_percent_file_path'], 'matrix_percents.txt') html_dict['read_distribution_table_path'] = copy_as(run_info_dict['read_distribution_table_path'], 'read_distribution.txt') html_dict['environment_file_path'] = copy_as(run_info_dict['environment_file_path'], 'environment.txt') html_dict['oligos_fasta_file_path'] = copy_as(run_info_dict['oligos_fasta_file_path'], 'oligos.fa.txt') html_dict['oligos_nexus_file_path'] = copy_as(run_info_dict['oligos_nexus_file_path'], 'oligos.nex.txt') def get_figures_dict(html_dict_prefix): html_dict_key = '%s_file_path' % html_dict_prefix if html_dict.has_key(html_dict_key): figures_dict = cPickle.load(open(html_dict[html_dict_key])) for _map in figures_dict: for _func in figures_dict[_map]: for _op in figures_dict[_map][_func]: if os.path.exists(figures_dict[_map][_func][_op] + '.pdf') and os.path.exists(figures_dict[_map][_func][_op] + '.png'): prefix = copy_as(figures_dict[_map][_func][_op] + '.pdf', '%s.pdf' % '-'.join([_map, _func, _op])) prefix = copy_as(figures_dict[_map][_func][_op] + '.png', '%s.png' % '-'.join([_map, _func, _op])) figures_dict[_map][_func][_op] = '.'.join(prefix.split('.')[:-1]) else: figures_dict[_map][_func][_op] = None return figures_dict else: return None html_dict['figures_dict'] = get_figures_dict('figures_dict') html_dict['exclusive_figures_dict'] = get_figures_dict('exclusive_figures_dict') if html_dict['generate_sets']: html_dict['across_samples_MN_file_path'] = copy_as(run_info_dict['across_samples_MN_file_path'], 'across_samples_max_normalized.txt') html_dict['across_samples_SN_file_path'] = copy_as(run_info_dict['across_samples_SN_file_path'], 'across_samples_sum_normalized.txt') html_dict['oligo_sets_stackbar_figure'] = copy_as(run_info_dict['stack_bar_with_agglomerated_oligos_file_path'], 'stackbar_with_oligo_sets.png') html_dict['oligos_across_samples_figure'] = copy_as(run_info_dict['oligos_across_samples_file_path'], 'oligos_across_samples.png') html_dict['oligotype_sets_figure'] = copy_as(run_info_dict['oligotype_sets_across_samples_figure_path'], 'oligotype_sets.png') html_dict['matrix_count_oligo_sets_file_path'] = copy_as(run_info_dict['matrix_count_oligo_sets_file_path'], 'matrix_counts_oligo_sets.txt') html_dict['matrix_percent_oligo_sets_file_path'] = copy_as(run_info_dict['matrix_percent_oligo_sets_file_path'], 'matrix_percents_oligo_sets.txt') html_dict['oligotype_sets_file'] = copy_as(run_info_dict['oligotype_sets_file_path'], 'oligotype_sets.txt') html_dict['oligotype_sets'] = [l.strip().split('\t')[1].split(',') for l in open(run_info_dict['oligotype_sets_file_path'])] if html_dict.has_key('representative_seqs_fasta_file_path'): html_dict['representative_seqs_fasta_file_path'] = copy_as(run_info_dict['representative_seqs_fasta_file_path'], 'oligo-representatives.fa.txt') else: html_dict['representative_seqs_fasta_file_path'] = None if run_info_dict.has_key('blast_ref_db') and os.path.exists(run_info_dict['blast_ref_db']): html_dict['blast_ref_db_path'] = copy_as(run_info_dict['blast_ref_db'], 'reference_db.fa') html_dict['entropy_components'] = [int(x) for x in html_dict['bases_of_interest_locs'].split(',')] html_dict['samples_dict'] = get_samples_dict_from_environment_file(run_info_dict['environment_file_path']) html_dict['samples'] = sorted(html_dict['samples_dict'].keys()) html_dict['blast_results_found'] = False # get alignment length html_dict['alignment_length'] = get_alignment_length(run_info_dict['alignment']) # include pretty names html_dict['pretty_names'] = pretty_names # get purity score colors dict html_dict['score_color_dict'] = {} gradient = get_list_of_colors(26, colormap = 'RdYlGn') for oligo in run_info_dict['final_purity_score_dict']: html_dict['score_color_dict'][oligo] = gradient[int(run_info_dict['final_purity_score_dict'][oligo] * 25)] # get total purity score color dict html_dict['total_score_color'] = gradient[int(float(run_info_dict['total_purity_score_dict']) * 25)] # get colors dict html_dict['color_dict'] = get_colors_dict(run_info_dict['colors_file_path']) # get abundant oligos list html_dict['oligos'] = get_oligos_list(run_info_dict['oligos_fasta_file_path']) # get oligo frequencies html_dict['frequency'] = {} for oligo in html_dict['oligos']: html_dict['frequency'][oligo] = pretty_print(sum([d[oligo] for d in html_dict['samples_dict'].values() if d.has_key(oligo)])) # get purity score html_dict['purity_score'] = run_info_dict['final_purity_score_dict'] # get total purity score html_dict['total_purity_score'] = run_info_dict['total_purity_score_dict'] # get unique sequence dict (which will contain the most frequent unique sequence for given oligotype) if html_dict.has_key('output_directory_for_reps'): html_dict['rep_oligo_seqs_clean_dict'], html_dict['rep_oligo_seqs_fancy_dict'] = get_unique_sequences_dict(html_dict) html_dict['oligo_reps_dict'] = get_oligo_reps_dict(html_dict, html_output_directory) html_dict['component_reference'] = ''.join(['<a onmouseover="popup(\'\#%d\', 50)" href="">|</a>' % i for i in range(0, html_dict['alignment_length'])]) # get javascript code for sample pie-charts html_dict['pie_charts_js'] = render_to_string('pie_charts_js.tmpl', html_dict) # FIXME: code below is very inefficient and causes a huge # memory issue. fix it by not using deepcopy. # generate individual oligotype pages if html_dict.has_key('output_directory_for_reps'): for i in range(0, len(html_dict['oligos'])): oligo = html_dict['oligos'][i] tmp_dict = copy.deepcopy(html_dict) tmp_dict['oligo'] = oligo tmp_dict['distribution'] = get_oligo_distribution_dict(oligo, html_dict) oligo_page = os.path.join(html_output_directory, 'oligo_%s.html' % oligo) tmp_dict['index'] = i + 1 tmp_dict['total'] = len(html_dict['oligos']) tmp_dict['prev'] = None tmp_dict['next'] = None if i > 0: tmp_dict['prev'] = 'oligo_%s.html' % html_dict['oligos'][i - 1] if i < (len(html_dict['oligos']) - 1): tmp_dict['next'] = 'oligo_%s.html' % html_dict['oligos'][i + 1] rendered = render_to_string('single_oligo.tmpl', tmp_dict) open(oligo_page, 'w').write(rendered.encode("utf-8")) # generate index index_page = os.path.join(html_output_directory, 'index.html') rendered = render_to_string('index_for_oligo.tmpl', html_dict) open(index_page, 'w').write(rendered.encode("utf-8")) return index_page
quals_dict_filtered = {} ids_in_alignment_file = [] while alignment.next(): ids_in_alignment_file.append(alignment.id) ids_in_alignment_file = set(ids_in_alignment_file) for read_id in quals_dict: if read_id in ids_in_alignment_file: quals_dict_filtered[read_id] = quals_dict[read_id] ids_in_alignment_file.remove(read_id) qual_stats_dict = get_qual_stats_dict(quals_dict_filtered) colors = get_list_of_colors(21, colormap="RdYlGn") colors = [colors[0] for _ in range(0, 20)] + colors max_count = max([ qual_stats_dict[q]['count'] for q in qual_stats_dict if qual_stats_dict[q] ]) alignment_length = len(quals_dict.values()[0]) fig = plt.figure(figsize=(25, 8)) plt.rc('grid', color='0.50', linestyle='-', linewidth=0.1) plt.grid(True) plt.subplots_adjust(left=0.02, bottom=0.09, top=0.95, right=0.98) for position in range(0, alignment_length): print position
def vis_oligotype_sets_distribution(partitions, vectors, samples, colors_dict = None, output_file = None, legend = False, project_title = None, display = True): if colors_dict == None: colors_dict = {} list_of_colors = get_list_of_colors(len(partitions), colormap = 'Accent') for i in range(0, len(partitions)): colors_dict[partitions[i][0]] = list_of_colors[i] #for oligo in partitions[i]: # print '%s,%s' % (oligo, list_of_colors[i]) # figure.. plt.figure(figsize=(20, 7)) if legend: plt.subplots_adjust(left=0.03, bottom = 0.25, top = 0.95, right = 0.87) else: plt.subplots_adjust(left=0.03, bottom = 0.25, top = 0.95, right = 0.99) plt.rcParams.update({'axes.linewidth' : 0.1}) plt.rc('grid', color='0.70', linestyle='-', linewidth=0.1) plt.grid(True) N = len(samples) ind = np.arange(N) width = 0.75 number_of_dimensions = len(vectors.values()[0]) for i in range(0, len(partitions)): group = partitions[i] vector = [] mins = [] maxs = [] for d in range(0, number_of_dimensions): vector.append(np.mean([vectors[oligo][d] for oligo in group])) mins.append(np.min([vectors[oligo][d] for oligo in group])) maxs.append(np.max([vectors[oligo][d] for oligo in group])) try: color = HTMLColorToRGB(colors_dict[group[0]]) except: color = 'black' plt.fill_between(range(0, len(vector)), maxs, mins, color=color, alpha = 0.1) plt.plot(vector, color=color, linewidth = 1, alpha = 0.95, label = 'Set #%d' % i) if len(vector) < 50: plt.plot(vector, color=color, linewidth = 3, alpha = 0.7, label = '_nolegend_') plt.plot(vector, color=color, linewidth = 7, alpha = 0.6, zorder = i, label = '_nolegend_') plt.ylabel('Oligotype Set Abundance', size='large') plt.title(project_title if project_title else 'Oligotype Sets Across Samples') plt.xticks(ind, samples, rotation=90, size='small') plt.yticks([]) plt.ylim(ymax = 100) plt.xlim(xmin = -(width) / 2, xmax = len(samples) - 0.5) if legend: plt.legend(bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.0, shadow=True, fancybox=True) leg = plt.gca().get_legend() ltext = leg.get_texts() llines = leg.get_lines() frame = leg.get_frame() frame.set_facecolor('0.80') plt.setp(ltext, fontsize='small', fontname='arial', family='monospace') plt.setp(llines, linewidth=1.5) if output_file: plt.savefig(output_file) if display: try: plt.show() except: pass
def vis_oligotype_sets_distribution(partitions, vectors, samples, colors_dict=None, output_file=None, legend=False, project_title=None, display=True): if colors_dict == None: colors_dict = {} list_of_colors = get_list_of_colors(len(partitions), colormap='Accent') for i in range(0, len(partitions)): colors_dict[partitions[i][0]] = list_of_colors[i] #for oligo in partitions[i]: # print '%s,%s' % (oligo, list_of_colors[i]) # figure.. plt.figure(figsize=(20, 7)) if legend: plt.subplots_adjust(left=0.03, bottom=0.25, top=0.95, right=0.87) else: plt.subplots_adjust(left=0.03, bottom=0.25, top=0.95, right=0.99) plt.rcParams.update({'axes.linewidth': 0.1}) plt.rc('grid', color='0.70', linestyle='-', linewidth=0.1) plt.grid(True) N = len(samples) ind = np.arange(N) width = 0.75 number_of_dimensions = len(vectors.values()[0]) for i in range(0, len(partitions)): group = partitions[i] vector = [] mins = [] maxs = [] for d in range(0, number_of_dimensions): vector.append(np.mean([vectors[oligo][d] for oligo in group])) mins.append(np.min([vectors[oligo][d] for oligo in group])) maxs.append(np.max([vectors[oligo][d] for oligo in group])) try: color = HTMLColorToRGB(colors_dict[group[0]]) except: color = 'black' plt.fill_between(range(0, len(vector)), maxs, mins, color=color, alpha=0.1) plt.plot(vector, color=color, linewidth=1, alpha=0.95, label='Set #%d' % i) if len(vector) < 50: plt.plot(vector, color=color, linewidth=3, alpha=0.7, label='_nolegend_') plt.plot(vector, color=color, linewidth=7, alpha=0.6, zorder=i, label='_nolegend_') plt.ylabel('Oligotype Set Abundance', size='large') plt.title( project_title if project_title else 'Oligotype Sets Across Samples') plt.xticks(ind, samples, rotation=90, size='small') plt.yticks([]) plt.ylim(ymax=100) plt.xlim(xmin=-(width) / 2, xmax=len(samples) - 0.5) if legend: plt.legend(bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.0, shadow=True, fancybox=True) leg = plt.gca().get_legend() ltext = leg.get_texts() llines = leg.get_lines() frame = leg.get_frame() frame.set_facecolor('0.80') plt.setp(ltext, fontsize='small', fontname='arial', family='monospace') plt.setp(llines, linewidth=1.5) if output_file: plt.savefig(output_file) if display: try: plt.show() except: pass
quals_dict_filtered = {} ids_in_alignment_file = [] while alignment.next(): ids_in_alignment_file.append(alignment.id) ids_in_alignment_file = set(ids_in_alignment_file) for read_id in quals_dict: if read_id in ids_in_alignment_file: quals_dict_filtered[read_id] = quals_dict[read_id] ids_in_alignment_file.remove(read_id) qual_stats_dict = get_qual_stats_dict(quals_dict_filtered) colors = get_list_of_colors(21, colormap="RdYlGn") colors = [colors[0] for _ in range(0, 20)] + colors max_count = max([qual_stats_dict[q]['count'] for q in qual_stats_dict if qual_stats_dict[q]]) alignment_length = len(quals_dict.values()[0]) fig = plt.figure(figsize = (25, 8)) plt.rc('grid', color='0.50', linestyle='-', linewidth=0.1) plt.grid(True) plt.subplots_adjust(left=0.02, bottom = 0.09, top = 0.95, right = 0.98) for position in range(0, alignment_length): print position if not qual_stats_dict[position]: