예제 #1
0
def get_colors(number_of_colors):
    return get_list_of_colors(number_of_colors, colormap="Dark2")
def entropy_distribution_bar(alignment, entropy_values, output_file, quick = False, no_display = False, qual_stats_dict = None, weighted = False, verbose = False):
    progress.verbose = verbose
    progress.new('Entropy Distribution Figure')
    progress.update('Computing ')

    y_maximum = max(entropy_values) + (max(entropy_values) / 10.0)
    y_maximum = 1 if y_maximum < 1 else y_maximum

    number_of_uniques_to_show = int(y_maximum * 100)

    if alignment == None:
        quick = True

    colors_dict = {}
    if not quick:
        unique_sequences = get_unique_sequences_from_FASTA(alignment, limit = number_of_uniques_to_show)
        
        chars = []
        for seq in unique_sequences:
            chars += seq[0]
        chars = set(chars)
      
        colors_dict = NUCL_COLORS
        
        missing_chars = [char for char in chars if char not in NUCL_COLORS.keys()]
            
        if missing_chars:
            colors_for_missing_chars = get_list_of_colors(len(missing_chars), colormap="RdYlGn")
            for i in range(0, len(missing_chars)):
                char = missing_chars[i]
                colors_dict[char] = colors_for_missing_chars[i]
    else:
        unique_sequences = None

    fig = plt.figure(figsize = (len(entropy_values) / 20, 10))

    plt.rcParams.update({'axes.linewidth' : 0.1})
    plt.rc('grid', color='0.70', linestyle='-', linewidth=0.1)
    plt.grid(True)

    plt.subplots_adjust(hspace = 0, wspace = 0, right = 0.995, left = 0.050, top = 0.92, bottom = 0.10)

    ax = fig.add_subplot(111)

    if not quick:
        current = 0
        for y in range(number_of_uniques_to_show - 1, 0, -3):
            progress.append('.')
            unique_sequence = unique_sequences[current][0].upper()
            count = unique_sequences[current][1]
            frequency = unique_sequences[current][2]
            for i in range(0, len(unique_sequence)):
                plt.text(i, y / 100.0, unique_sequence[i],\
                                    fontsize = 5, color = colors_dict[unique_sequence[i]])

            percent = int(round(frequency * len(unique_sequence))) or 1
            plt.fill_between(range(0, percent), (y + 1.15) / 100.0, (y - 0.85) / 100.0, color="green", alpha = 0.2)
            plt.text(percent + 0.8, (y - 1.2) / 100.0, count, fontsize = 5, color = 'gray')

            current += 1
            if current + 1 > len(unique_sequences):
                break

    if not quick and qual_stats_dict:
        # add mean quality values in the background of the figure.
        colors = get_list_of_colors(21, colormap="RdYlGn")
        colors = [colors[0] for _ in range(0, 20)] + colors

        max_count = max([qual_stats_dict[q]['count'] for q in qual_stats_dict if qual_stats_dict[q]])

        for pos in range(0, len(entropy_values)):
            if not qual_stats_dict[pos]:
                continue

            mean = int(round(qual_stats_dict[pos]['mean']))
            count = qual_stats_dict[pos]['count']
            plt.fill_between([pos, pos + 1], y1 = 0, y2 = y_maximum, color = colors[mean], alpha = (log(count) / log(max_count)) / 5)

    ind = np.arange(len(entropy_values))
    ax.bar(ind, entropy_values, color = 'black', lw = 0.5)
    ax.set_xlim([0, len(entropy_values)])
    ax.set_ylim([0, y_maximum])
    plt.xlabel('Position in the Alignment')
    if weighted:
        plt.ylabel('Weighted Shannon Entropy')
    else:
        plt.ylabel('Shannon Entropy')

    progress.update('Saving into "%s"' % output_file)
    plt.savefig(output_file + '.png')
    plt.savefig(output_file + '.pdf')

    if verbose:
        progress.reset()
        run.info('Entropy figure output path', output_file + '.{png, pdf}')

    if not no_display:
        try:
            progress.update('Entropy figure is being shown (you do not have display? you can avoid this step by using --no-display))')
            plt.show()
        except:
            pass

    progress.end()
def get_colors(number_of_colors):
    return get_list_of_colors(number_of_colors, colormap="Dark2")
예제 #4
0
def generate_html_output(run_info_dict,
                         html_output_directory=None,
                         entropy_figure=None):
    if not html_output_directory:
        html_output_directory = os.path.join(run_info_dict['output_directory'],
                                             'HTML-OUTPUT')

    if not os.path.exists(html_output_directory):
        os.makedirs(html_output_directory)

    html_dict = copy.deepcopy(run_info_dict)

    shutil.copy2(os.path.join(absolute, 'static/style.css'),
                 os.path.join(html_output_directory, 'style.css'))
    shutil.copy2(os.path.join(absolute, 'static/header_1.png'),
                 os.path.join(html_output_directory, 'header.png'))
    shutil.copy2(os.path.join(absolute, 'static/missing_image.png'),
                 os.path.join(html_output_directory, 'missing.png'))
    shutil.copy2(os.path.join(absolute, 'static/colorbar.png'),
                 os.path.join(html_output_directory, 'colorbar.png'))
    shutil.copy2(os.path.join(absolute, 'scripts/jquery-1.7.1.js'),
                 os.path.join(html_output_directory, 'jquery-1.7.1.js'))
    shutil.copy2(os.path.join(absolute, 'scripts/popup.js'),
                 os.path.join(html_output_directory, 'popup.js'))
    shutil.copy2(os.path.join(absolute, 'scripts/g.pie.js'),
                 os.path.join(html_output_directory, 'g.pie.js'))
    shutil.copy2(os.path.join(absolute, 'scripts/g.raphael.js'),
                 os.path.join(html_output_directory, 'g.raphael.js'))
    shutil.copy2(os.path.join(absolute, 'scripts/raphael.js'),
                 os.path.join(html_output_directory, 'raphael.js'))
    shutil.copy2(os.path.join(absolute, 'scripts/morris.js'),
                 os.path.join(html_output_directory, 'morris.js'))

    def copy_as(source, dest_name, essential=True):
        dest = os.path.join(html_output_directory, dest_name)

        if essential:
            shutil.copy2(source, dest)
        else:
            # it is ok if you fail to copy files that are not
            # essential..
            try:
                shutil.copy2(source, dest)
            except:
                sys.stderr.write(
                    '\n\n[HTML] Warning: Source file not found\n\tSource: "%s"\n\tDest: "%s\n\n"'
                    % (source, dest))

        return os.path.basename(dest)

    # embarrassingly ad-hoc:
    if entropy_figure:
        if entropy_figure.endswith('.pdf') or entropy_figure.endswith('.png'):
            entropy_figure = entropy_figure[:-4]

    CP = lambda e, o: copy_as(os.path.join(e + ('.%s' % ext)),
                              o,
                              essential=True if ext == 'png' else False)
    for ext in ['png', 'pdf']:
        output_file = 'entropy.%s' % ext
        if entropy_figure:
            html_dict['entropy_figure_%s' % ext] = CP(entropy_figure,
                                                      output_file)
        else:
            try:
                html_dict['entropy_figure_%s' % ext] = CP(
                    run_info_dict['entropy'], output_file)
            except:
                html_dict['entropy_figure_%s' % ext] = CP(
                    run_info_dict['entropy'][:-4], output_file)

    if run_info_dict['gexf_network_file_path']:
        html_dict['gexf_network_file_path'] = copy_as(
            run_info_dict['gexf_network_file_path'], 'network.gexf')

    if run_info_dict['sample_mapping']:
        html_dict['sample_mapping'] = copy_as(run_info_dict['sample_mapping'],
                                              'sample_mapping.txt')
    else:
        html_dict['sample_mapping'] = None

    html_dict['matrix_count_file_path'] = copy_as(
        run_info_dict['matrix_count_file_path'], 'matrix_counts.txt')
    html_dict['matrix_percent_file_path'] = copy_as(
        run_info_dict['matrix_percent_file_path'], 'matrix_percents.txt')
    html_dict['read_distribution_table_path'] = copy_as(
        run_info_dict['read_distribution_table_path'], 'read_distribution.txt')
    html_dict['environment_file_path'] = copy_as(
        run_info_dict['environment_file_path'], 'environment.txt')
    html_dict['oligos_fasta_file_path'] = copy_as(
        run_info_dict['oligos_fasta_file_path'], 'oligos.fa.txt')
    html_dict['oligos_nexus_file_path'] = copy_as(
        run_info_dict['oligos_nexus_file_path'], 'oligos.nex.txt')

    def get_figures_dict(html_dict_prefix):
        html_dict_key = '%s_file_path' % html_dict_prefix
        if html_dict.has_key(html_dict_key):
            figures_dict = cPickle.load(open(html_dict[html_dict_key]))
            for _map in figures_dict:
                for _func in figures_dict[_map]:
                    for _op in figures_dict[_map][_func]:
                        if os.path.exists(figures_dict[_map][_func][_op] +
                                          '.pdf') and os.path.exists(
                                              figures_dict[_map][_func][_op] +
                                              '.png'):
                            prefix = copy_as(
                                figures_dict[_map][_func][_op] + '.pdf',
                                '%s.pdf' % '-'.join([_map, _func, _op]))
                            prefix = copy_as(
                                figures_dict[_map][_func][_op] + '.png',
                                '%s.png' % '-'.join([_map, _func, _op]))
                            figures_dict[_map][_func][_op] = '.'.join(
                                prefix.split('.')[:-1])
                        else:
                            figures_dict[_map][_func][_op] = None
            return figures_dict
        else:
            return None

    html_dict['figures_dict'] = get_figures_dict('figures_dict')
    html_dict['exclusive_figures_dict'] = get_figures_dict(
        'exclusive_figures_dict')

    if html_dict['generate_sets']:
        html_dict['across_samples_MN_file_path'] = copy_as(
            run_info_dict['across_samples_MN_file_path'],
            'across_samples_max_normalized.txt')
        html_dict['across_samples_SN_file_path'] = copy_as(
            run_info_dict['across_samples_SN_file_path'],
            'across_samples_sum_normalized.txt')
        html_dict['oligo_sets_stackbar_figure'] = copy_as(
            run_info_dict['stack_bar_with_agglomerated_oligos_file_path'],
            'stackbar_with_oligo_sets.png')
        html_dict['oligos_across_samples_figure'] = copy_as(
            run_info_dict['oligos_across_samples_file_path'],
            'oligos_across_samples.png')
        html_dict['oligotype_sets_figure'] = copy_as(
            run_info_dict['oligotype_sets_across_samples_figure_path'],
            'oligotype_sets.png')
        html_dict['matrix_count_oligo_sets_file_path'] = copy_as(
            run_info_dict['matrix_count_oligo_sets_file_path'],
            'matrix_counts_oligo_sets.txt')
        html_dict['matrix_percent_oligo_sets_file_path'] = copy_as(
            run_info_dict['matrix_percent_oligo_sets_file_path'],
            'matrix_percents_oligo_sets.txt')
        html_dict['oligotype_sets_file'] = copy_as(
            run_info_dict['oligotype_sets_file_path'], 'oligotype_sets.txt')
        html_dict['oligotype_sets'] = [
            l.strip().split('\t')[1].split(',')
            for l in open(run_info_dict['oligotype_sets_file_path'])
        ]

    if html_dict.has_key('representative_seqs_fasta_file_path'):
        html_dict['representative_seqs_fasta_file_path'] = copy_as(
            run_info_dict['representative_seqs_fasta_file_path'],
            'oligo-representatives.fa.txt')
    else:
        html_dict['representative_seqs_fasta_file_path'] = None
    if run_info_dict.has_key('blast_ref_db') and os.path.exists(
            run_info_dict['blast_ref_db']):
        html_dict['blast_ref_db_path'] = copy_as(run_info_dict['blast_ref_db'],
                                                 'reference_db.fa')
    html_dict['entropy_components'] = [
        int(x) for x in html_dict['bases_of_interest_locs'].split(',')
    ]
    html_dict['samples_dict'] = get_samples_dict_from_environment_file(
        run_info_dict['environment_file_path'])
    html_dict['samples'] = sorted(html_dict['samples_dict'].keys())
    html_dict['blast_results_found'] = False

    # get alignment length
    html_dict['alignment_length'] = get_alignment_length(
        run_info_dict['alignment'])
    # include pretty names
    html_dict['pretty_names'] = pretty_names
    # get purity score colors dict
    html_dict['score_color_dict'] = {}
    gradient = get_list_of_colors(26, colormap='RdYlGn')
    for oligo in run_info_dict['final_purity_score_dict']:
        html_dict['score_color_dict'][oligo] = gradient[int(
            run_info_dict['final_purity_score_dict'][oligo] * 25)]
    # get total purity score color dict
    html_dict['total_score_color'] = gradient[int(
        float(run_info_dict['total_purity_score_dict']) * 25)]
    # get colors dict
    html_dict['color_dict'] = get_colors_dict(
        run_info_dict['colors_file_path'])
    # get abundant oligos list
    html_dict['oligos'] = get_oligos_list(
        run_info_dict['oligos_fasta_file_path'])
    # get oligo frequencies
    html_dict['frequency'] = {}
    for oligo in html_dict['oligos']:
        html_dict['frequency'][oligo] = pretty_print(
            sum([
                d[oligo] for d in html_dict['samples_dict'].values()
                if d.has_key(oligo)
            ]))
    # get purity score
    html_dict['purity_score'] = run_info_dict['final_purity_score_dict']
    # get total purity score
    html_dict['total_purity_score'] = run_info_dict['total_purity_score_dict']
    # get unique sequence dict (which will contain the most frequent unique sequence for given oligotype)
    if html_dict.has_key('output_directory_for_reps'):
        html_dict['rep_oligo_seqs_clean_dict'], html_dict[
            'rep_oligo_seqs_fancy_dict'] = get_unique_sequences_dict(html_dict)
        html_dict['oligo_reps_dict'] = get_oligo_reps_dict(
            html_dict, html_output_directory)
        html_dict['component_reference'] = ''.join([
            '<a onmouseover="popup(\'\#%d\', 50)" href="">|</a>' % i
            for i in range(0, html_dict['alignment_length'])
        ])

    # get javascript code for sample pie-charts
    html_dict['pie_charts_js'] = render_to_string('pie_charts_js.tmpl',
                                                  html_dict)

    # FIXME: code below is very inefficient and causes a huge
    # memory issue. fix it by not using deepcopy.
    # generate individual oligotype pages
    if html_dict.has_key('output_directory_for_reps'):
        for i in range(0, len(html_dict['oligos'])):
            oligo = html_dict['oligos'][i]
            tmp_dict = copy.deepcopy(html_dict)
            tmp_dict['oligo'] = oligo
            tmp_dict['distribution'] = get_oligo_distribution_dict(
                oligo, html_dict)
            oligo_page = os.path.join(html_output_directory,
                                      'oligo_%s.html' % oligo)

            tmp_dict['index'] = i + 1
            tmp_dict['total'] = len(html_dict['oligos'])
            tmp_dict['prev'] = None
            tmp_dict['next'] = None
            if i > 0:
                tmp_dict['prev'] = 'oligo_%s.html' % html_dict['oligos'][i - 1]
            if i < (len(html_dict['oligos']) - 1):
                tmp_dict['next'] = 'oligo_%s.html' % html_dict['oligos'][i + 1]

            rendered = render_to_string('single_oligo.tmpl', tmp_dict)

            open(oligo_page, 'w').write(rendered.encode("utf-8"))

    # generate index
    index_page = os.path.join(html_output_directory, 'index.html')
    rendered = render_to_string('index_for_oligo.tmpl', html_dict)

    open(index_page, 'w').write(rendered.encode("utf-8"))

    return index_page
def entropy_distribution_bar(alignment, entropy_values, output_file, quick = False, no_display = False, qual_stats_dict = None, weighted = False, verbose = False):
    progress.verbose = verbose
    progress.new('Entropy Distribution Figure')
    progress.update('Computing ')

    y_maximum = max(entropy_values) + (max(entropy_values) / 10.0)
    y_maximum = 1 if y_maximum < 1 else y_maximum

    number_of_uniques_to_show = int(y_maximum * 100)

    if alignment == None:
        quick = True

    colors_dict = {}
    if not quick:
        unique_sequences = get_unique_sequences_from_FASTA(alignment, limit = number_of_uniques_to_show)
        
        chars = []
        for seq in unique_sequences:
            chars += seq[0]
        chars = set(chars)
      
        colors_dict = NUCL_COLORS
        
        missing_chars = [char for char in chars if char not in NUCL_COLORS.keys()]
            
        if missing_chars:
            colors_for_missing_chars = get_list_of_colors(len(missing_chars), colormap="RdYlGn")
            for i in range(0, len(missing_chars)):
                char = missing_chars[i]
                colors_dict[char] = colors_for_missing_chars[i]
    else:
        unique_sequences = None

    fig = plt.figure(figsize = (len(entropy_values) / 20, 10))

    plt.rcParams.update({'axes.linewidth' : 0.1})
    plt.rc('grid', color='0.70', linestyle='-', linewidth=0.1)
    plt.grid(True)

    plt.subplots_adjust(hspace = 0, wspace = 0, right = 0.995, left = 0.050, top = 0.92, bottom = 0.10)

    ax = fig.add_subplot(111)

    if not quick:
        current = 0
        for y in range(number_of_uniques_to_show - 1, 0, -3):
            progress.append('.')
            unique_sequence = unique_sequences[current][0].upper()
            count = unique_sequences[current][1]
            frequency = unique_sequences[current][2]
            for i in range(0, len(unique_sequence)):
                plt.text(i, y / 100.0, unique_sequence[i],\
                                    fontsize = 5, color = colors_dict[unique_sequence[i]])

            percent = int(round(frequency * len(unique_sequence))) or 1
            plt.fill_between(range(0, percent), (y + 1.15) / 100.0, (y - 0.85) / 100.0, color="green", alpha = 0.2)
            plt.text(percent + 0.8, (y - 1.2) / 100.0, count, fontsize = 5, color = 'gray')

            current += 1
            if current + 1 > len(unique_sequences):
                break

    if not quick and qual_stats_dict:
        # add mean quality values in the background of the figure.
        colors = get_list_of_colors(21, colormap="RdYlGn")
        colors = [colors[0] for _ in range(0, 20)] + colors

        max_count = max([qual_stats_dict[q]['count'] for q in qual_stats_dict if qual_stats_dict[q]])

        for pos in range(0, len(entropy_values)):
            if not qual_stats_dict[pos]:
                continue

            mean = int(round(qual_stats_dict[pos]['mean']))
            count = qual_stats_dict[pos]['count']
            plt.fill_between([pos, pos + 1], y1 = 0, y2 = y_maximum, color = colors[mean], alpha = (log(count) / log(max_count)) / 5)

    ind = np.arange(len(entropy_values))
    ax.bar(ind, entropy_values, color = 'black', lw = 0.5)
    ax.set_xlim([0, len(entropy_values)])
    ax.set_ylim([0, y_maximum])
    plt.xlabel('Position in the Alignment')
    if weighted:
        plt.ylabel('Weighted Shannon Entropy')
    else:
        plt.ylabel('Shannon Entropy')

    progress.update('Saving into "%s"' % output_file)
    plt.savefig(output_file + '.png')
    plt.savefig(output_file + '.pdf')

    if verbose:
        progress.clear()
        run.info('Entropy figure output path', output_file + '.{png, pdf}')

    if not no_display:
        try:
            progress.update('Entropy figure is being shown (you do not have display? you can avoid this step by using --no-display))')
            plt.show()
        except:
            pass

    progress.end()
예제 #6
0
def generate_html_output(run_info_dict, html_output_directory = None, entropy_figure = None):
    if not html_output_directory:    
        html_output_directory = os.path.join(run_info_dict['output_directory'], 'HTML-OUTPUT')
        
    if not os.path.exists(html_output_directory):
        os.makedirs(html_output_directory)
    
    html_dict = copy.deepcopy(run_info_dict)

    shutil.copy2(os.path.join(absolute, 'static/style.css'), os.path.join(html_output_directory, 'style.css'))
    shutil.copy2(os.path.join(absolute, 'static/header_1.png'), os.path.join(html_output_directory, 'header.png'))
    shutil.copy2(os.path.join(absolute, 'static/missing_image.png'), os.path.join(html_output_directory, 'missing.png'))
    shutil.copy2(os.path.join(absolute, 'static/colorbar.png'), os.path.join(html_output_directory, 'colorbar.png'))
    shutil.copy2(os.path.join(absolute, 'scripts/jquery-1.7.1.js'), os.path.join(html_output_directory, 'jquery-1.7.1.js'))
    shutil.copy2(os.path.join(absolute, 'scripts/popup.js'), os.path.join(html_output_directory, 'popup.js'))
    shutil.copy2(os.path.join(absolute, 'scripts/g.pie.js'), os.path.join(html_output_directory, 'g.pie.js'))
    shutil.copy2(os.path.join(absolute, 'scripts/g.raphael.js'), os.path.join(html_output_directory, 'g.raphael.js'))
    shutil.copy2(os.path.join(absolute, 'scripts/raphael.js'), os.path.join(html_output_directory, 'raphael.js'))
    shutil.copy2(os.path.join(absolute, 'scripts/morris.js'), os.path.join(html_output_directory, 'morris.js'))

    def copy_as(source, dest_name, essential = True):
        dest = os.path.join(html_output_directory, dest_name)

        if essential:
            shutil.copy2(source, dest)
        else:
            # it is ok if you fail to copy files that are not
            # essential.. 
            try:
                shutil.copy2(source, dest)
            except:
                sys.stderr.write('\n\n[HTML] Warning: Source file not found\n\tSource: "%s"\n\tDest: "%s\n\n"' % (source, dest))

        return os.path.basename(dest)

    # embarrassingly ad-hoc:
    if entropy_figure:
        if entropy_figure.endswith('.pdf') or entropy_figure.endswith('.png'):
            entropy_figure = entropy_figure[:-4]
            
    CP = lambda e, o:  copy_as(os.path.join(e + ('.%s' % ext)), o, essential = True if ext == 'png' else False)
    for ext in ['png', 'pdf']:
        output_file = 'entropy.%s' % ext
        if entropy_figure:
            html_dict['entropy_figure_%s' % ext] = CP(entropy_figure, output_file)
        else:
            try:
                html_dict['entropy_figure_%s' % ext] = CP(run_info_dict['entropy'], output_file)
            except:
                html_dict['entropy_figure_%s' % ext] = CP(run_info_dict['entropy'][:-4], output_file)

 
    if run_info_dict['gexf_network_file_path']:
        html_dict['gexf_network_file_path'] = copy_as(run_info_dict['gexf_network_file_path'], 'network.gexf')

    if run_info_dict['sample_mapping']:
        html_dict['sample_mapping'] = copy_as(run_info_dict['sample_mapping'], 'sample_mapping.txt')
    else:
        html_dict['sample_mapping'] = None

    html_dict['matrix_count_file_path'] = copy_as(run_info_dict['matrix_count_file_path'], 'matrix_counts.txt')
    html_dict['matrix_percent_file_path'] = copy_as(run_info_dict['matrix_percent_file_path'], 'matrix_percents.txt')
    html_dict['read_distribution_table_path'] = copy_as(run_info_dict['read_distribution_table_path'], 'read_distribution.txt')
    html_dict['environment_file_path'] = copy_as(run_info_dict['environment_file_path'], 'environment.txt')
    html_dict['oligos_fasta_file_path'] = copy_as(run_info_dict['oligos_fasta_file_path'], 'oligos.fa.txt')
    html_dict['oligos_nexus_file_path'] = copy_as(run_info_dict['oligos_nexus_file_path'], 'oligos.nex.txt')


    def get_figures_dict(html_dict_prefix):
        html_dict_key = '%s_file_path' % html_dict_prefix
        if html_dict.has_key(html_dict_key):
            figures_dict = cPickle.load(open(html_dict[html_dict_key]))
            for _map in figures_dict:
                for _func in figures_dict[_map]:
                    for _op in figures_dict[_map][_func]:
                        if os.path.exists(figures_dict[_map][_func][_op] + '.pdf') and os.path.exists(figures_dict[_map][_func][_op] + '.png'):
                            prefix = copy_as(figures_dict[_map][_func][_op] + '.pdf', '%s.pdf' % '-'.join([_map, _func, _op]))
                            prefix = copy_as(figures_dict[_map][_func][_op] + '.png', '%s.png' % '-'.join([_map, _func, _op]))
                            figures_dict[_map][_func][_op] = '.'.join(prefix.split('.')[:-1])
                        else:
                            figures_dict[_map][_func][_op] = None
            return figures_dict
        else:
            return None
        
    
    html_dict['figures_dict'] = get_figures_dict('figures_dict')
    html_dict['exclusive_figures_dict'] = get_figures_dict('exclusive_figures_dict')


    if html_dict['generate_sets']:
        html_dict['across_samples_MN_file_path'] = copy_as(run_info_dict['across_samples_MN_file_path'], 'across_samples_max_normalized.txt')
        html_dict['across_samples_SN_file_path'] = copy_as(run_info_dict['across_samples_SN_file_path'], 'across_samples_sum_normalized.txt')
        html_dict['oligo_sets_stackbar_figure'] = copy_as(run_info_dict['stack_bar_with_agglomerated_oligos_file_path'], 'stackbar_with_oligo_sets.png')
        html_dict['oligos_across_samples_figure'] = copy_as(run_info_dict['oligos_across_samples_file_path'], 'oligos_across_samples.png')
        html_dict['oligotype_sets_figure'] = copy_as(run_info_dict['oligotype_sets_across_samples_figure_path'], 'oligotype_sets.png')
        html_dict['matrix_count_oligo_sets_file_path'] = copy_as(run_info_dict['matrix_count_oligo_sets_file_path'], 'matrix_counts_oligo_sets.txt')
        html_dict['matrix_percent_oligo_sets_file_path'] = copy_as(run_info_dict['matrix_percent_oligo_sets_file_path'], 'matrix_percents_oligo_sets.txt')
        html_dict['oligotype_sets_file'] = copy_as(run_info_dict['oligotype_sets_file_path'], 'oligotype_sets.txt')
        html_dict['oligotype_sets'] = [l.strip().split('\t')[1].split(',') for l in open(run_info_dict['oligotype_sets_file_path'])]
 
    if html_dict.has_key('representative_seqs_fasta_file_path'):
        html_dict['representative_seqs_fasta_file_path'] = copy_as(run_info_dict['representative_seqs_fasta_file_path'], 'oligo-representatives.fa.txt')
    else:
        html_dict['representative_seqs_fasta_file_path'] = None
    if run_info_dict.has_key('blast_ref_db') and os.path.exists(run_info_dict['blast_ref_db']):
        html_dict['blast_ref_db_path'] = copy_as(run_info_dict['blast_ref_db'], 'reference_db.fa')
    html_dict['entropy_components'] = [int(x) for x in html_dict['bases_of_interest_locs'].split(',')]
    html_dict['samples_dict'] = get_samples_dict_from_environment_file(run_info_dict['environment_file_path'])
    html_dict['samples'] = sorted(html_dict['samples_dict'].keys())
    html_dict['blast_results_found'] = False

    # get alignment length
    html_dict['alignment_length'] = get_alignment_length(run_info_dict['alignment'])
    # include pretty names
    html_dict['pretty_names'] = pretty_names
    # get purity score colors dict
    html_dict['score_color_dict'] = {}
    gradient = get_list_of_colors(26, colormap = 'RdYlGn')
    for oligo in run_info_dict['final_purity_score_dict']:
        html_dict['score_color_dict'][oligo] = gradient[int(run_info_dict['final_purity_score_dict'][oligo] * 25)]
    # get total purity score color dict
    html_dict['total_score_color'] = gradient[int(float(run_info_dict['total_purity_score_dict']) * 25)]
    # get colors dict
    html_dict['color_dict'] = get_colors_dict(run_info_dict['colors_file_path'])
    # get abundant oligos list
    html_dict['oligos'] = get_oligos_list(run_info_dict['oligos_fasta_file_path'])
    # get oligo frequencies
    html_dict['frequency'] = {}
    for oligo in html_dict['oligos']:
        html_dict['frequency'][oligo] = pretty_print(sum([d[oligo] for d in html_dict['samples_dict'].values() if d.has_key(oligo)]))
    # get purity score
    html_dict['purity_score'] = run_info_dict['final_purity_score_dict']
    # get total purity score
    html_dict['total_purity_score'] = run_info_dict['total_purity_score_dict']
    # get unique sequence dict (which will contain the most frequent unique sequence for given oligotype)
    if html_dict.has_key('output_directory_for_reps'):
        html_dict['rep_oligo_seqs_clean_dict'], html_dict['rep_oligo_seqs_fancy_dict'] = get_unique_sequences_dict(html_dict)
        html_dict['oligo_reps_dict'] = get_oligo_reps_dict(html_dict, html_output_directory)
        html_dict['component_reference'] = ''.join(['<a onmouseover="popup(\'\#%d\', 50)" href="">|</a>' % i for i in range(0, html_dict['alignment_length'])])

    # get javascript code for sample pie-charts
    html_dict['pie_charts_js'] = render_to_string('pie_charts_js.tmpl', html_dict)

    # FIXME: code below is very inefficient and causes a huge
    # memory issue. fix it by not using deepcopy.
    # generate individual oligotype pages
    if html_dict.has_key('output_directory_for_reps'):
        for i in range(0, len(html_dict['oligos'])):
            oligo = html_dict['oligos'][i]
            tmp_dict = copy.deepcopy(html_dict)
            tmp_dict['oligo'] = oligo
            tmp_dict['distribution'] = get_oligo_distribution_dict(oligo, html_dict)
            oligo_page = os.path.join(html_output_directory, 'oligo_%s.html' % oligo)
            
            tmp_dict['index'] = i + 1
            tmp_dict['total'] = len(html_dict['oligos'])
            tmp_dict['prev'] = None
            tmp_dict['next'] = None
            if i > 0:
                tmp_dict['prev'] = 'oligo_%s.html' % html_dict['oligos'][i - 1]
            if i < (len(html_dict['oligos']) - 1):
                tmp_dict['next'] = 'oligo_%s.html' % html_dict['oligos'][i + 1]
            
            rendered = render_to_string('single_oligo.tmpl', tmp_dict)
    
            open(oligo_page, 'w').write(rendered.encode("utf-8"))


    # generate index
    index_page = os.path.join(html_output_directory, 'index.html')
    rendered = render_to_string('index_for_oligo.tmpl', html_dict)

    open(index_page, 'w').write(rendered.encode("utf-8"))

    return index_page
예제 #7
0
quals_dict_filtered = {}

ids_in_alignment_file = []
while alignment.next():
    ids_in_alignment_file.append(alignment.id)
ids_in_alignment_file = set(ids_in_alignment_file)

for read_id in quals_dict:
    if read_id in ids_in_alignment_file:
        quals_dict_filtered[read_id] = quals_dict[read_id]
        ids_in_alignment_file.remove(read_id)

qual_stats_dict = get_qual_stats_dict(quals_dict_filtered)

colors = get_list_of_colors(21, colormap="RdYlGn")
colors = [colors[0] for _ in range(0, 20)] + colors
max_count = max([
    qual_stats_dict[q]['count'] for q in qual_stats_dict if qual_stats_dict[q]
])

alignment_length = len(quals_dict.values()[0])

fig = plt.figure(figsize=(25, 8))
plt.rc('grid', color='0.50', linestyle='-', linewidth=0.1)
plt.grid(True)

plt.subplots_adjust(left=0.02, bottom=0.09, top=0.95, right=0.98)

for position in range(0, alignment_length):
    print position
def vis_oligotype_sets_distribution(partitions, vectors, samples, colors_dict = None, output_file = None, legend = False, project_title = None, display = True):
    if colors_dict == None:
        colors_dict = {}
        list_of_colors = get_list_of_colors(len(partitions), colormap = 'Accent')
        for i in range(0, len(partitions)):
            colors_dict[partitions[i][0]] = list_of_colors[i]
            #for oligo in partitions[i]:
            #    print '%s,%s' % (oligo, list_of_colors[i])

    # figure.. 
    plt.figure(figsize=(20, 7))
    
    if legend:
        plt.subplots_adjust(left=0.03, bottom = 0.25, top = 0.95, right = 0.87)
    else:
        plt.subplots_adjust(left=0.03, bottom = 0.25, top = 0.95, right = 0.99)

    plt.rcParams.update({'axes.linewidth' : 0.1})
    plt.rc('grid', color='0.70', linestyle='-', linewidth=0.1)
    plt.grid(True) 

    N = len(samples)
    ind = np.arange(N)
    width = 0.75
    
    number_of_dimensions = len(vectors.values()[0])

    for i in range(0, len(partitions)):
        group = partitions[i]
        vector = [] 
        mins = []
        maxs = []

        for d in range(0, number_of_dimensions):
            vector.append(np.mean([vectors[oligo][d] for oligo in group]))
            mins.append(np.min([vectors[oligo][d] for oligo in group]))
            maxs.append(np.max([vectors[oligo][d] for oligo in group]))
            
        try:
            color = HTMLColorToRGB(colors_dict[group[0]])
        except:
            color = 'black'

        plt.fill_between(range(0, len(vector)), maxs, mins, color=color, alpha = 0.1)
        plt.plot(vector, color=color, linewidth = 1, alpha = 0.95, label = 'Set #%d' % i)
        if len(vector) < 50:
            plt.plot(vector, color=color, linewidth = 3, alpha = 0.7, label = '_nolegend_')
            plt.plot(vector, color=color, linewidth = 7, alpha = 0.6, zorder = i, label = '_nolegend_')
    
    plt.ylabel('Oligotype Set Abundance', size='large')
    plt.title(project_title if project_title else 'Oligotype Sets Across Samples')

    plt.xticks(ind, samples, rotation=90, size='small')
    plt.yticks([])
    plt.ylim(ymax = 100)
    plt.xlim(xmin = -(width) / 2, xmax = len(samples) - 0.5)
    
    if legend:
        plt.legend(bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.0, shadow=True, fancybox=True)
        
        leg = plt.gca().get_legend()
        ltext  = leg.get_texts()
        llines = leg.get_lines()
        frame  = leg.get_frame()
        
        frame.set_facecolor('0.80')
        plt.setp(ltext, fontsize='small', fontname='arial', family='monospace')
        plt.setp(llines, linewidth=1.5)


    if output_file:
        plt.savefig(output_file)
    if display:
        try:
            plt.show()
        except:
            pass
예제 #9
0
def vis_oligotype_sets_distribution(partitions,
                                    vectors,
                                    samples,
                                    colors_dict=None,
                                    output_file=None,
                                    legend=False,
                                    project_title=None,
                                    display=True):
    if colors_dict == None:
        colors_dict = {}
        list_of_colors = get_list_of_colors(len(partitions), colormap='Accent')
        for i in range(0, len(partitions)):
            colors_dict[partitions[i][0]] = list_of_colors[i]
            #for oligo in partitions[i]:
            #    print '%s,%s' % (oligo, list_of_colors[i])

    # figure..
    plt.figure(figsize=(20, 7))

    if legend:
        plt.subplots_adjust(left=0.03, bottom=0.25, top=0.95, right=0.87)
    else:
        plt.subplots_adjust(left=0.03, bottom=0.25, top=0.95, right=0.99)

    plt.rcParams.update({'axes.linewidth': 0.1})
    plt.rc('grid', color='0.70', linestyle='-', linewidth=0.1)
    plt.grid(True)

    N = len(samples)
    ind = np.arange(N)
    width = 0.75

    number_of_dimensions = len(vectors.values()[0])

    for i in range(0, len(partitions)):
        group = partitions[i]
        vector = []
        mins = []
        maxs = []

        for d in range(0, number_of_dimensions):
            vector.append(np.mean([vectors[oligo][d] for oligo in group]))
            mins.append(np.min([vectors[oligo][d] for oligo in group]))
            maxs.append(np.max([vectors[oligo][d] for oligo in group]))

        try:
            color = HTMLColorToRGB(colors_dict[group[0]])
        except:
            color = 'black'

        plt.fill_between(range(0, len(vector)),
                         maxs,
                         mins,
                         color=color,
                         alpha=0.1)
        plt.plot(vector,
                 color=color,
                 linewidth=1,
                 alpha=0.95,
                 label='Set #%d' % i)
        if len(vector) < 50:
            plt.plot(vector,
                     color=color,
                     linewidth=3,
                     alpha=0.7,
                     label='_nolegend_')
            plt.plot(vector,
                     color=color,
                     linewidth=7,
                     alpha=0.6,
                     zorder=i,
                     label='_nolegend_')

    plt.ylabel('Oligotype Set Abundance', size='large')
    plt.title(
        project_title if project_title else 'Oligotype Sets Across Samples')

    plt.xticks(ind, samples, rotation=90, size='small')
    plt.yticks([])
    plt.ylim(ymax=100)
    plt.xlim(xmin=-(width) / 2, xmax=len(samples) - 0.5)

    if legend:
        plt.legend(bbox_to_anchor=(1.01, 1),
                   loc=2,
                   borderaxespad=0.0,
                   shadow=True,
                   fancybox=True)

        leg = plt.gca().get_legend()
        ltext = leg.get_texts()
        llines = leg.get_lines()
        frame = leg.get_frame()

        frame.set_facecolor('0.80')
        plt.setp(ltext, fontsize='small', fontname='arial', family='monospace')
        plt.setp(llines, linewidth=1.5)

    if output_file:
        plt.savefig(output_file)
    if display:
        try:
            plt.show()
        except:
            pass
quals_dict_filtered = {}

ids_in_alignment_file = []
while alignment.next():
    ids_in_alignment_file.append(alignment.id)
ids_in_alignment_file = set(ids_in_alignment_file)

for read_id in quals_dict:
    if read_id in ids_in_alignment_file:
        quals_dict_filtered[read_id] = quals_dict[read_id]
        ids_in_alignment_file.remove(read_id)

qual_stats_dict = get_qual_stats_dict(quals_dict_filtered)

colors = get_list_of_colors(21, colormap="RdYlGn")
colors = [colors[0] for _ in range(0, 20)] + colors
max_count = max([qual_stats_dict[q]['count'] for q in qual_stats_dict if qual_stats_dict[q]])

alignment_length = len(quals_dict.values()[0])

fig = plt.figure(figsize = (25, 8))
plt.rc('grid', color='0.50', linestyle='-', linewidth=0.1)
plt.grid(True)

plt.subplots_adjust(left=0.02, bottom = 0.09, top = 0.95, right = 0.98)

for position in range(0, alignment_length):
    print position
    
    if not qual_stats_dict[position]: