Example #1
0
def make_rseqc_summary_plots(rd_file, gc_file, do_qc=True, min_exonmap=60.0, max_three_prime_map=10.0):
    """Make rseqc summary plots"""
    df_rd = pd.read_csv(rd_file, index_col=0)
    df_gc = pd.read_csv(gc_file, index_col=0)
    samples = list(df_gc.index)
    # Use tags for formula 
    df = df_rd.pivot_table(columns=["Group"], values=["Tag_count"], index="sample")
    df['Tag_count', "ExonMap"] = 100.0 * (df['Tag_count', "CDS_Exons"] + df['Tag_count', "3'UTR_Exons"] + df['Tag_count', "5'UTR_Exons"]) / df['Tag_count', "Total_Assigned_Tags"]

    df.columns = df.columns.droplevel()
    df['i'] = list(range(0, len(df.index)))
    df['samples'] = samples
    df_gc["three_prime_map"] = 100.0 * df_gc.loc[:, "91":"100"].sum(axis=1) / df_gc.loc[:, "1":"100"].sum(axis=1)
    df = pd.concat([df, df_gc], axis=1)

    colors = brewer["PiYG"][3]
    colormap = {'False' : colors[0], 'True' : colors[2]}
    columns = [
        TableColumn(field="samples", title="Sample"),
        TableColumn(field="ExonMap", title="Tags mapping to exons (%)"),
        TableColumn(field="3' Map", title="Tags mapping to 3' end (%)"),
    ]
    source = ColumnDataSource(df)

    # Default tools, plot_config and tooltips
    TOOLS="pan,box_zoom,box_select,lasso_select,reset,save,hover"
    plot_config=dict(plot_width=300, plot_height=300, 
                     tools=TOOLS, title_text_font_size='12pt',
                     x_range=[0, len(samples)], y_range=[0, 105],
                     x_axis_type=None, y_axis_type="linear", 
                     xaxis={'axis_label' : "sample", 'major_label_orientation' : np.pi/3, 'axis_label_text_font_size' : '10pt'}, 
                     yaxis={'axis_label' : "percent (%)", 'major_label_orientation' : 1, 'axis_label_text_font_size' : '10pt'})

    # Exonmap plot
    qc = QCArgs(x=[0,len(samples)], 
                y=[min_exonmap, min_exonmap], 
                line_dash=[2,4]) if do_qc else None
    c1 = list(map(lambda x: colormap[str(x)], 
                  df['ExonMap'] < min_exonmap)) if do_qc else colors[0]
    p1 = scatterplot(x='i', y='ExonMap', 
                     source=source, color=c1, qc=qc, 
                     tooltips = [{'type':HoverTool, 'tips' : [
                         ('Sample', '@samples'),('ExonMap', '@ExonMap'),]}], 
                     title="Tags mapping to exons", **plot_config)
    # Fraction reads mapping to the 10% right-most end
    qc = QCArgs(x=[0,len(samples)], 
                y=[max_three_prime_map, max_three_prime_map], 
                line_dash=[2,4]) if do_qc else None
    c2 = list(map(lambda x: colormap[str(x)], 
                  df['three_prime_map'] > max_three_prime_map)) if do_qc else colors[0]
    p2 = scatterplot(x = 'i', y = 'three_prime_map', 
                     color = c2, source = source, 
                     qc=qc,
                     tooltips = [{'type':HoverTool, 'tips' : [
                         ('Sample', '@samples'),('ExonMap', '@ExonMap'),]}], 
                     title="Reads mapping to 3' end", **plot_config)

    return {'fig' : gridplot([[p1, p2]]),
            'uri' : [data_uri(rd_file), data_uri(gc_file)],
            'file' : [rd_file, gc_file]}
Example #2
0
def make_star_alignment_plots(df, samples, do_qc=False, min_reads=200000, min_map=40, max_unmap=20):
    """Make star alignment plots"""
    # Currently hover tool and categorical variables don't play
    # nicely together in bokeh: see
    # https://github.com/bokeh/bokeh/issues/624

    # Workaround as long as categorical variables don't work with HoverTool
    df['i'] = list(range(0, len(df.index)))
    df['samples'] = samples
    df['mismatch_sum'] = df['Mismatch_rate_per_base__PCT'] + df['Deletion_rate_per_base'] + df['Insertion_rate_per_base']
    df['PCT_of_reads_unmapped'] = df['PCT_of_reads_unmapped:_other'] + df['PCT_of_reads_unmapped:_too_many_mismatches'] + df['PCT_of_reads_unmapped:_too_short']

    colors = brewer["PiYG"][3]
    colormap = {'False' : colors[0], 'True' : colors[1]}
    
    columns = [
        TableColumn(field="samples", title="Sample"),
        TableColumn(field="Number_of_input_reads", title="Number of input reads"),
        TableColumn(field="Uniquely_mapped_reads_PCT", title="Uniquely mapped reads (%)"),
        TableColumn(field="Mismatch_rate_per_base__PCT", title="Mismatch rate per base (%)"),
        TableColumn(field="Insertion_rate_per_base", title="Insertion rate per base (%)"),
        TableColumn(field="Deletion_rate_per_base", title="Deletion rate per base (%)"),
        TableColumn(field="PCT_of_reads_unmapped", title="Unmapped reads (%)"),
    ]
        
    source = ColumnDataSource(df)
    # Generate the table
    table = DataTable(source=source, columns=columns, editable=False, width = 1000)

    # Default tools, plot_config and tooltips
    TOOLS="pan,box_zoom,box_select,lasso_select,reset,save,hover"
    plot_config=dict(plot_width=300, plot_height=300, tools=TOOLS, title_text_font_size='12pt',
                     x_axis_type = None, y_axis_type = "log",
                     xaxis = {'axis_label' : 'sample', 'axis_label_text_font_size' : '10pt', 'major_label_orientation' : np.pi/3},
                     yaxis = {'axis_label' : 'reads', 'axis_label_text_font_size' : '10pt', 'major_label_orientation' : np.pi/3},
                     x_range = [0, len(samples)]
                     )

    # Number of input reads
    c1 = list(map(lambda x: colormap[str(x)], df['Number_of_input_reads'] < min_reads)) if do_qc else "blue"
    qc = QCArgs(x=[0,len(samples)], y=[min_reads, min_reads], line_dash=[2,4]) if do_qc else None
    p1 = scatterplot(x='i', y='Number_of_input_reads', source=source, color=c1, qc=qc,
                     title="Number of input reads",
                     tooltips = [{'type':HoverTool, 'tips' : [('Sample', '@samples'),('Reads', '@Number_of_input_reads'),]}],
                     **plot_config)

    # Uniquely mapped reads
    plot_config.update({'yaxis_type' : 'linear', 'axis_label' : 'percent (%)'})
    c2 = list(map(lambda x: colormap[str(x)], df['Uniquely_mapped_reads_PCT'] < min_map))  if do_qc else "blue"
    qc = QCArgs(x=[0,len(samples)], y=[min_map, min_map], line_dash=[2,4]) if do_qc else None
    p2 = scatterplot(x='i', y='Uniquely_mapped_reads_PCT', source=source, color=c2, qc=qc,
                     title="Uniquely mapping reads",
                     tooltips = [{'type':HoverTool, 'tips' : [('Sample', '@samples'),('Pct_mapped', '@Uniquely_mapped_reads_PCT'),]}],
                     **plot_config)

    # Mapping reads in general
    c3 = list(map(lambda x: colormap[str(x)], df['PCT_of_reads_unmapped'] > max_unmap))  if do_qc else "blue"
    qc = QCArgs(x=[0,len(samples)], y=[max_unmap, max_unmap], line_dash=[2,4]) if do_qc else None
    p3 = scatterplot(x='i', y='PCT_of_reads_unmapped',
                     source=source, color=c3, qc=qc, title="Unmapped reads",
                     tooltips = [{'type':HoverTool, 'tips' : [('Sample', '@samples'),('Pct_unmapped', '@PCT_of_reads_unmapped'),]}], **plot_config)
    
    # Mismatch/indel rate
    plot_config['tools'] = TOOLS.replace("lasso_select,", "")
    plot_config['yaxis'].update({'axis_label' : 'Rate per base'})
    p4 = scatterplot(x='i', y = ['Mismatch_rate_per_base__PCT', 'Insertion_rate_per_base', 'Deletion_rate_per_base'],
                color = ["blue", "red", "green"], source = source,
                title="Mismatch and indel rates",
                tooltips =  [{'type':HoverTool, 'tips' : [('Sample', '@samples'),
                                                            ('Mismatch rate per base', '@Mismatch_rate_per_base__PCT'),
                                                            ('Insertion rate per base', '@Insertion_rate_per_base'),
                                                            ('Deletion rate per base', '@Deletion_rate_per_base'),
                                                          ]},
                                                          ],
                **plot_config)
    select_tool = p4.select(dict(type=BoxSelectTool))
    select_tool.dimensions=['width']

    # Plot sum
    plot_config['yaxis'].update({'axis_label' : 'Mismatch/indel sum'})
    c5 = list(map(lambda x: colormap[str(x)], df['mismatch_sum'] > 1.0))  if do_qc else "blue"
    qc = QCArgs(x=[0,len(samples)], y=[1.0, 1.0], line_dash=[2,4]) if do_qc else None
    p5 = scatterplot(x='i', y='mismatch_sum',
                     source=source, color=c5, qc=qc, title="Mismatch / indel sum",
                     tooltips = [{'type':HoverTool, 'tips' : [('Sample', '@samples'),('Mismatch/indel rate per base', '@mismatch_sum'),]}], **plot_config)
    select_tool = p5.select(dict(type=BoxSelectTool))
    select_tool.dimensions=['width']

    # Plot histogram of ratio
    # plot_config['tools'] = "pan,box_zoom,reset,save"
    # p6 = figure(title="Histogram of mismatch and indel rates", **plot_config)
    # hist, edges = np.histogram(df['mismatch_sum'], density=False, bins=50)
    # p6.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
    #    fill_color="#036564", line_color="#033649")
    # p6.xaxis.axis_label = "Mismatch/indel sum"
    # p6.yaxis.axis_label = "Count"

    df_qc = None
    if do_qc:
        # QC summary table
        d = {'samples':samples,
            'read_filter' : df['Number_of_input_reads'] < min_reads,
            'map_filter' : df['Uniquely_mapped_reads_PCT'] < min_map,
            'mismatch_filter' : df['mismatch_sum'] > 1.0,
            }
        d['filter'] = d['read_filter'] | d['map_filter'] | d['mismatch_filter']
        df_qc = pd.DataFrame(data=d, index=df.samples)
    
    return {'plots' : VBox(children=[gridplot([[p1, p2, p3]]), HBox(children=[gridplot([[p4, p5]])])]), 'table' : table, 'qctable' : df_qc}