Python scatterplot Examples

Programming Language: Python

Namespace/Package Name: snakemakelib.bokeh.plot

Method/Function: scatterplot

Examples at hotexamples.com: 2

Python scatterplot - 2 examples found. These are the top rated real world Python examples of snakemakelib.bokeh.plot.scatterplot extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: rseqc.py Project: elijahc/snakemakelib

def make_rseqc_summary_plots(rd_file, gc_file, do_qc=True, min_exonmap=60.0, max_three_prime_map=10.0):
    """Make rseqc summary plots"""
    df_rd = pd.read_csv(rd_file, index_col=0)
    df_gc = pd.read_csv(gc_file, index_col=0)
    samples = list(df_gc.index)
    # Use tags for formula 
    df = df_rd.pivot_table(columns=["Group"], values=["Tag_count"], index="sample")
    df['Tag_count', "ExonMap"] = 100.0 * (df['Tag_count', "CDS_Exons"] + df['Tag_count', "3'UTR_Exons"] + df['Tag_count', "5'UTR_Exons"]) / df['Tag_count', "Total_Assigned_Tags"]

    df.columns = df.columns.droplevel()
    df['i'] = list(range(0, len(df.index)))
    df['samples'] = samples
    df_gc["three_prime_map"] = 100.0 * df_gc.loc[:, "91":"100"].sum(axis=1) / df_gc.loc[:, "1":"100"].sum(axis=1)
    df = pd.concat([df, df_gc], axis=1)

    colors = brewer["PiYG"][3]
    colormap = {'False' : colors[0], 'True' : colors[2]}
    columns = [
        TableColumn(field="samples", title="Sample"),
        TableColumn(field="ExonMap", title="Tags mapping to exons (%)"),
        TableColumn(field="3' Map", title="Tags mapping to 3' end (%)"),
    ]
    source = ColumnDataSource(df)

    # Default tools, plot_config and tooltips
    TOOLS="pan,box_zoom,box_select,lasso_select,reset,save,hover"
    plot_config=dict(plot_width=300, plot_height=300, 
                     tools=TOOLS, title_text_font_size='12pt',
                     x_range=[0, len(samples)], y_range=[0, 105],
                     x_axis_type=None, y_axis_type="linear", 
                     xaxis={'axis_label' : "sample", 'major_label_orientation' : np.pi/3, 'axis_label_text_font_size' : '10pt'}, 
                     yaxis={'axis_label' : "percent (%)", 'major_label_orientation' : 1, 'axis_label_text_font_size' : '10pt'})

    # Exonmap plot
    qc = QCArgs(x=[0,len(samples)], 
                y=[min_exonmap, min_exonmap], 
                line_dash=[2,4]) if do_qc else None
    c1 = list(map(lambda x: colormap[str(x)], 
                  df['ExonMap'] < min_exonmap)) if do_qc else colors[0]
    p1 = scatterplot(x='i', y='ExonMap', 
                     source=source, color=c1, qc=qc, 
                     tooltips = [{'type':HoverTool, 'tips' : [
                         ('Sample', '@samples'),('ExonMap', '@ExonMap'),]}], 
                     title="Tags mapping to exons", **plot_config)
    # Fraction reads mapping to the 10% right-most end
    qc = QCArgs(x=[0,len(samples)], 
                y=[max_three_prime_map, max_three_prime_map], 
                line_dash=[2,4]) if do_qc else None
    c2 = list(map(lambda x: colormap[str(x)], 
                  df['three_prime_map'] > max_three_prime_map)) if do_qc else colors[0]
    p2 = scatterplot(x = 'i', y = 'three_prime_map', 
                     color = c2, source = source, 
                     qc=qc,
                     tooltips = [{'type':HoverTool, 'tips' : [
                         ('Sample', '@samples'),('ExonMap', '@ExonMap'),]}], 
                     title="Reads mapping to 3' end", **plot_config)

    return {'fig' : gridplot([[p1, p2]]),
            'uri' : [data_uri(rd_file), data_uri(gc_file)],
            'file' : [rd_file, gc_file]}

Example #2

Show file

File: star.py Project: b97jre/snakemakelib

def make_star_alignment_plots(df, samples, do_qc=False, min_reads=200000, min_map=40, max_unmap=20):
    """Make star alignment plots"""
    # Currently hover tool and categorical variables don't play
    # nicely together in bokeh: see
    # https://github.com/bokeh/bokeh/issues/624

    # Workaround as long as categorical variables don't work with HoverTool
    df['i'] = list(range(0, len(df.index)))
    df['samples'] = samples
    df['mismatch_sum'] = df['Mismatch_rate_per_base__PCT'] + df['Deletion_rate_per_base'] + df['Insertion_rate_per_base']
    df['PCT_of_reads_unmapped'] = df['PCT_of_reads_unmapped:_other'] + df['PCT_of_reads_unmapped:_too_many_mismatches'] + df['PCT_of_reads_unmapped:_too_short']

    colors = brewer["PiYG"][3]
    colormap = {'False' : colors[0], 'True' : colors[1]}
    
    columns = [
        TableColumn(field="samples", title="Sample"),
        TableColumn(field="Number_of_input_reads", title="Number of input reads"),
        TableColumn(field="Uniquely_mapped_reads_PCT", title="Uniquely mapped reads (%)"),
        TableColumn(field="Mismatch_rate_per_base__PCT", title="Mismatch rate per base (%)"),
        TableColumn(field="Insertion_rate_per_base", title="Insertion rate per base (%)"),
        TableColumn(field="Deletion_rate_per_base", title="Deletion rate per base (%)"),
        TableColumn(field="PCT_of_reads_unmapped", title="Unmapped reads (%)"),
    ]
        
    source = ColumnDataSource(df)
    # Generate the table
    table = DataTable(source=source, columns=columns, editable=False, width = 1000)

    # Default tools, plot_config and tooltips
    TOOLS="pan,box_zoom,box_select,lasso_select,reset,save,hover"
    plot_config=dict(plot_width=300, plot_height=300, tools=TOOLS, title_text_font_size='12pt',
                     x_axis_type = None, y_axis_type = "log",
                     xaxis = {'axis_label' : 'sample', 'axis_label_text_font_size' : '10pt', 'major_label_orientation' : np.pi/3},
                     yaxis = {'axis_label' : 'reads', 'axis_label_text_font_size' : '10pt', 'major_label_orientation' : np.pi/3},
                     x_range = [0, len(samples)]
                     )

    # Number of input reads
    c1 = list(map(lambda x: colormap[str(x)], df['Number_of_input_reads'] < min_reads)) if do_qc else "blue"
    qc = QCArgs(x=[0,len(samples)], y=[min_reads, min_reads], line_dash=[2,4]) if do_qc else None
    p1 = scatterplot(x='i', y='Number_of_input_reads', source=source, color=c1, qc=qc,
                     title="Number of input reads",
                     tooltips = [{'type':HoverTool, 'tips' : [('Sample', '@samples'),('Reads', '@Number_of_input_reads'),]}],
                     **plot_config)

    # Uniquely mapped reads
    plot_config.update({'yaxis_type' : 'linear', 'axis_label' : 'percent (%)'})
    c2 = list(map(lambda x: colormap[str(x)], df['Uniquely_mapped_reads_PCT'] < min_map))  if do_qc else "blue"
    qc = QCArgs(x=[0,len(samples)], y=[min_map, min_map], line_dash=[2,4]) if do_qc else None
    p2 = scatterplot(x='i', y='Uniquely_mapped_reads_PCT', source=source, color=c2, qc=qc,
                     title="Uniquely mapping reads",
                     tooltips = [{'type':HoverTool, 'tips' : [('Sample', '@samples'),('Pct_mapped', '@Uniquely_mapped_reads_PCT'),]}],
                     **plot_config)

    # Mapping reads in general
    c3 = list(map(lambda x: colormap[str(x)], df['PCT_of_reads_unmapped'] > max_unmap))  if do_qc else "blue"
    qc = QCArgs(x=[0,len(samples)], y=[max_unmap, max_unmap], line_dash=[2,4]) if do_qc else None
    p3 = scatterplot(x='i', y='PCT_of_reads_unmapped',
                     source=source, color=c3, qc=qc, title="Unmapped reads",
                     tooltips = [{'type':HoverTool, 'tips' : [('Sample', '@samples'),('Pct_unmapped', '@PCT_of_reads_unmapped'),]}], **plot_config)
    
    # Mismatch/indel rate
    plot_config['tools'] = TOOLS.replace("lasso_select,", "")
    plot_config['yaxis'].update({'axis_label' : 'Rate per base'})
    p4 = scatterplot(x='i', y = ['Mismatch_rate_per_base__PCT', 'Insertion_rate_per_base', 'Deletion_rate_per_base'],
                color = ["blue", "red", "green"], source = source,
                title="Mismatch and indel rates",
                tooltips =  [{'type':HoverTool, 'tips' : [('Sample', '@samples'),
                                                            ('Mismatch rate per base', '@Mismatch_rate_per_base__PCT'),
                                                            ('Insertion rate per base', '@Insertion_rate_per_base'),
                                                            ('Deletion rate per base', '@Deletion_rate_per_base'),
                                                          ]},
                                                          ],
                **plot_config)
    select_tool = p4.select(dict(type=BoxSelectTool))
    select_tool.dimensions=['width']

    # Plot sum
    plot_config['yaxis'].update({'axis_label' : 'Mismatch/indel sum'})
    c5 = list(map(lambda x: colormap[str(x)], df['mismatch_sum'] > 1.0))  if do_qc else "blue"
    qc = QCArgs(x=[0,len(samples)], y=[1.0, 1.0], line_dash=[2,4]) if do_qc else None
    p5 = scatterplot(x='i', y='mismatch_sum',
                     source=source, color=c5, qc=qc, title="Mismatch / indel sum",
                     tooltips = [{'type':HoverTool, 'tips' : [('Sample', '@samples'),('Mismatch/indel rate per base', '@mismatch_sum'),]}], **plot_config)
    select_tool = p5.select(dict(type=BoxSelectTool))
    select_tool.dimensions=['width']

    # Plot histogram of ratio
    # plot_config['tools'] = "pan,box_zoom,reset,save"
    # p6 = figure(title="Histogram of mismatch and indel rates", **plot_config)
    # hist, edges = np.histogram(df['mismatch_sum'], density=False, bins=50)
    # p6.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
    #    fill_color="#036564", line_color="#033649")
    # p6.xaxis.axis_label = "Mismatch/indel sum"
    # p6.yaxis.axis_label = "Count"

    df_qc = None
    if do_qc:
        # QC summary table
        d = {'samples':samples,
            'read_filter' : df['Number_of_input_reads'] < min_reads,
            'map_filter' : df['Uniquely_mapped_reads_PCT'] < min_map,
            'mismatch_filter' : df['mismatch_sum'] > 1.0,
            }
        d['filter'] = d['read_filter'] | d['map_filter'] | d['mismatch_filter']
        df_qc = pd.DataFrame(data=d, index=df.samples)
    
    return {'plots' : VBox(children=[gridplot([[p1, p2, p3]]), HBox(children=[gridplot([[p4, p5]])])]), 'table' : table, 'qctable' : df_qc}