Exemple #1
0
 def test_dotplot(self):
     # BUG: must currently set x_range to categorical axis,
     # otherwise we would need to recreate the figure in the
     # dotplot function
     f = figure(x_range=list(self._data["treatment"]), plot_height=200,
                plot_width=200)
     dotplot(f, "treatment", "y", self._data, line_color='gray',
             size=20, color="red", legend="y")
 def test_dotplot_fig(self):
     dotplot(self._fig,
             "treatment",
             "y",
             self._data,
             line_color='gray',
             size=20,
             color="red")
Exemple #3
0
def mdotplot(fig, x, y, df=None, source=None,
             color=False, legend=False, binaxis="x", **kwargs):
    """mdotplot: make a mdotplot.

    In this implementation, the explanatory variable is treated as a
    factor.

    Args:
      fig (:py:class:`~bokeh.plotting.Plot`): bokeh Plot object
      x (str): string for x component
      y (str): string for y component
      df (:py:class:`~pandas.DataFrame`): pandas DataFram
      source (:py:class:`~bokeh.models.sources.ColumnDataSource`): bokeh sources.ColumnDataSource object
      color (bool): set color
      legend (bool): set legend
      binaxis (str): axis to bin dots on
      kwargs: keyword arguments to pass to glyph drawing function

    Example:

      .. bokeh-plot::
          :source-position: above

          import pandas as pd
          from bokeh.plotting import figure, show
          from bokehutils.mgeom import mdotplot

          df = pd.DataFrame([[1,2,"A"], [2,5,"B"], [3,9,"A"]],
                            columns=["x", "y", "foo"])
          # NB: currently *must* set the range here
          f = figure(title="Dotplot", plot_width=400, plot_height=400,
                     x_range=list(set(df["foo"])))
          mdotplot(f, "foo", ["y", "x"], df)

          show(f)

    Note that we in the example we must set the range in the call to
    figure, otherwise figure will use linear axis by default. It is
    currently cumbersome to change axes types in an existing figure.
    See `categorical axes
    <http://bokeh.pydata.org/en/latest/docs/user_guide/plotting.html#categorical-axes>`_
    for more information.
    """
    logger.debug("Adding dotplot to figure {}".format(fig))
    if com.is_numeric_dtype(source.to_df()[x]) == True:
        raise TypeError("{}: dependant variable must not be numerical type".format(__name__))
    for i in range(len(y)):
        dotplot(fig=fig, x=x, y=y[i], source=source, **kwargs)
        if color:
            # Add color here
            # color = brewer["PiYG"][min(max(3, len(y)), 10)]
            pass
        if legend:
            # Add legend here via legend function
            pass
 def test_dotplot(self):
     # BUG: must currently set x_range to categorical axis,
     # otherwise we would need to recreate the figure in the
     # dotplot function
     f = figure(x_range=list(self._data["treatment"]),
                plot_height=200,
                plot_width=200)
     dotplot(f,
             "treatment",
             "y",
             self._data,
             line_color='gray',
             size=20,
             color="red",
             legend="y")
Exemple #5
0
    def plot_metrics(self, **kwargs):
        """Plot metrics wrapper

        Returns:
          plist (list): list of bokeh plot objects
        """
        plist = []
        for kw in self.plots:
            kwargs.update(kw['figure'])
            fig = figure(**kwargs)
            dotplot(fig, df=self, **kw['renderer'])
            xaxis(fig, **kw['xaxis'])
            yaxis(fig, **kw['yaxis'])
            plist.append(fig)
        return plist
Exemple #6
0
 def test_dotplot_fig(self):
     dotplot(self._fig, "treatment", "y", self._data, line_color='gray', size=20, color="red")
Exemple #7
0
 def test_dotplot_int(self):
     dotplot(self._fig, "x", "y", self._data)
 def test_dotplot_int(self):
     dotplot(self._fig, "x", "y", self._data)
Exemple #9
0
def mdotplot(fig,
             x,
             y,
             df=None,
             source=None,
             color=False,
             legend=False,
             binaxis="x",
             **kwargs):
    """mdotplot: make a mdotplot.

    In this implementation, the explanatory variable is treated as a
    factor.

    Args:
      fig (:py:class:`~bokeh.plotting.Plot`): bokeh Plot object
      x (str): string for x component
      y (str): string for y component
      df (:py:class:`~pandas.DataFrame`): pandas DataFram
      source (:py:class:`~bokeh.models.sources.ColumnDataSource`): bokeh sources.ColumnDataSource object
      color (bool): set color
      legend (bool): set legend
      binaxis (str): axis to bin dots on
      kwargs: keyword arguments to pass to glyph drawing function

    Example:

      .. bokeh-plot::
          :source-position: above

          import pandas as pd
          from bokeh.plotting import figure, show
          from bokehutils.mgeom import mdotplot

          df = pd.DataFrame([[1,2,"A"], [2,5,"B"], [3,9,"A"]],
                            columns=["x", "y", "foo"])
          # NB: currently *must* set the range here
          f = figure(title="Dotplot", plot_width=400, plot_height=400,
                     x_range=list(set(df["foo"])))
          mdotplot(f, "foo", ["y", "x"], df)

          show(f)

    Note that we in the example we must set the range in the call to
    figure, otherwise figure will use linear axis by default. It is
    currently cumbersome to change axes types in an existing figure.
    See `categorical axes
    <http://bokeh.pydata.org/en/latest/docs/user_guide/plotting.html#categorical-axes>`_
    for more information.
    """
    logger.debug("Adding dotplot to figure {}".format(fig))
    if com.is_numeric_dtype(source.to_df()[x]) == True:
        raise TypeError(
            "{}: dependant variable must not be numerical type".format(
                __name__))
    for i in range(len(y)):
        dotplot(fig=fig, x=x, y=y[i], source=source, **kwargs)
        if color:
            # Add color here
            # color = brewer["PiYG"][min(max(3, len(y)), 10)]
            pass
        if legend:
            # Add legend here via legend function
            pass
Exemple #10
0
def scrnaseq_pca_plots(pca_results_file=None, metadata=None, pcacomp=(1,2), pcaobjfile=None):
    """Make PCA QC plots for scrnaseq workflow

    Args:
      pca_results_file (str): pca results file
      metadata (str): metadata file name
      pcacomp (int): tuple of ints corresponding to components to draw
      pcaobjfile (str): file name containing pickled pca object

    Returns: 
      dict: dictionary with keys 'fig' pointing to a (:py:class:`~bokeh.models.GridPlot`) Bokeh GridPlot object and key 'table' pointing to a (:py:class:`~bokeh.widgets.DataTable`) DataTable

    """
    if not metadata is None:
        md = pd.read_csv(metadata, index_col=0)
    if not pcaobjfile is None:
        with open(pcaobjfile, 'rb') as fh:
            pcaobj = pickle.load(fh)
    df_pca = pd.read_csv(pca_results_file, index_col="sample")
    df_pca['color'] = ['red'] * df_pca.shape[0]
    df_pca['x'] = df_pca['0']
    df_pca['y'] = df_pca['1']

    source = ColumnDataSource(df_pca)
    TOOLS = "pan,wheel_zoom,box_zoom,box_select,resize,reset,save,hover"

    # Add radio button group
    cmap = colorbrewer(datalen = df_pca.shape[0], palette="RdYlBu")
    callback_rbg = CustomJS(args=dict(source=source), code="""
        var data = source.get('data');
        var active = cb_obj.get('active')
        var label = cb_obj.get('label')[active]
        var RdYlBu = {
    3: ["#fc8d59","#ffffbf","#91bfdb"],
    4: ["#d7191c","#fdae61","#abd9e9","#2c7bb6"],
    5: ["#d7191c","#fdae61","#ffffbf","#abd9e9","#2c7bb6"],
    6: ["#d73027","#fc8d59","#fee090","#e0f3f8","#91bfdb","#4575b4"],
    7: ["#d73027","#fc8d59","#fee090","#ffffbf","#e0f3f8","#91bfdb","#4575b4"],
    8: ["#d73027","#f46d43","#fdae61","#fee090","#e0f3f8","#abd9e9","#74add1","#4575b4"],
    9: ["#d73027","#f46d43","#fdae61","#fee090","#ffffbf","#e0f3f8","#abd9e9","#74add1","#4575b4"],
    10: ["#a50026","#d73027","#f46d43","#fdae61","#fee090","#e0f3f8","#abd9e9","#74add1","#4575b4","#313695"],
    11: ["#a50026","#d73027","#f46d43","#fdae61","#fee090","#ffffbf","#e0f3f8","#abd9e9","#74add1","#4575b4","#313695"]};
        var colormap = {};

        var j = 0;
        for (i = 0; i < data['sample'].length; i++) {
            if (data[label][i] in colormap) {
            } else {
                colormap[data[label][i]] = j;
                j++;
            }
        }
        var nfac = Object.keys(colormap).length;
        if (nfac > 11) {
            nfac = 11;
        } 
        if (nfac < 3) {
           nfac = 3;
        }
        var colors = RdYlBu[nfac];
        for (i = 0; i < data[label].length; i++) {
              data['color'][i] = colors[colormap[data[label][i]]]
        }
        source.trigger('change');
    """)
    callback  = CustomJS(args=dict(source=source), code="""
        var data = source.get('data');
        var active = cb_obj.get('active');
        var label = cb_obj.get('label');
        var RdYlBu = {
    3: ["#fc8d59","#ffffbf","#91bfdb"],
    4: ["#d7191c","#fdae61","#abd9e9","#2c7bb6"],
    5: ["#d7191c","#fdae61","#ffffbf","#abd9e9","#2c7bb6"],
    6: ["#d73027","#fc8d59","#fee090","#e0f3f8","#91bfdb","#4575b4"],
    7: ["#d73027","#fc8d59","#fee090","#ffffbf","#e0f3f8","#91bfdb","#4575b4"],
    8: ["#d73027","#f46d43","#fdae61","#fee090","#e0f3f8","#abd9e9","#74add1","#4575b4"],
    9: ["#d73027","#f46d43","#fdae61","#fee090","#ffffbf","#e0f3f8","#abd9e9","#74add1","#4575b4"],
    10: ["#a50026","#d73027","#f46d43","#fdae61","#fee090","#e0f3f8","#abd9e9","#74add1","#4575b4","#313695"],
    11: ["#a50026","#d73027","#f46d43","#fdae61","#fee090","#ffffbf","#e0f3f8","#abd9e9","#74add1","#4575b4","#313695"]};
        var colormap = {};
    if (!active) {
        var j = 0;
        for (i = 0; i < data['sample'].length; i++) {
            if (data[label][i] in colormap) {
            } else {
                colormap[data[label][i]] = j;
                j++;
            }
        }
        var nfac = Object.keys(colormap).length;
        if (nfac > 11) {
            nfac = 11;
        } 
        if (nfac < 3) {
           nfac = 3;
        }
        var colors = RdYlBu[nfac];
        for (i = 0; i < data[label].length; i++) {
              data['color'][i] = colors[colormap[data[label][i]]]
        }
        source.trigger('change');
    }
    """)
    if not md is None:
        # Waiting for callbacks to be implemented upstream in bokeh
        # rbg = RadioButtonGroup(labels=list(md.columns),
        #                        callback=callback)
        toggle_buttons = [Toggle(label=x, callback=callback) for x in list(md.columns)]
    else:
        toggle_buttons = []
        # rbg = RadioButtonGroup()
    # PC components
    xcallback = CustomJS(args=dict(source=source), code="""
        var data = source.get('data');
        var active = cb_obj.get('active')
        var value = cb_obj.get('value')
        x = data['x']
        for (i = 0; i < x.length; i++) {
              x[i] = data[value][i]
              data['sample'][i] = value
              data['FileID'][i] = active
        }

        source.trigger('change');
    """)
    ycallback = CustomJS(args=dict(source=source), code="""
        var data = source.get('data');
        var value = cb_obj.get('value')
        y = data['y']
        for (i = 0; i < y.length; i++) {
             y[i] = data[value][i]
        }
        source.trigger('change');
    """)

    pca_components = sorted([int(x) + 1 for x in source.column_names if re.match("\d+", x)])
    menulist = [(str(x), str(x)) for x in pca_components]
    component_x = Dropdown(label = "PCA component x", menu = menulist, default_value="1",
                           callback=xcallback)
    component_y = Dropdown(label = "PCA component y", menu = menulist, default_value="2",
                           callback=ycallback)

    # Make the pca plot
    kwfig = {'plot_width': 400, 'plot_height': 400,
             'title_text_font_size': "12pt"}


    p1 = figure(title="Principal component analysis",
                tools=TOOLS, **kwfig)

    points(p1, 'x', 'y', source=source, color='color', size=10,
           alpha=.7)
    kwxaxis = {'axis_label': "Component {} ({:.2f}%)".format(
        pcacomp[0], 100.0 * pcaobj.explained_variance_ratio_[pcacomp[0] - 1]),
               'axis_label_text_font_size': '10pt',
               'major_label_orientation': np.pi/3}
    kwyaxis = {'axis_label': "Component {} ({:.2f}%)".format(
        pcacomp[1], 100.0 * pcaobj.explained_variance_ratio_[pcacomp[1] - 1]),
               'axis_label_text_font_size': '10pt',
               'major_label_orientation': np.pi/3}
    xaxis(p1, **kwxaxis)
    yaxis(p1, **kwyaxis)
    tooltiplist = [("sample", "@sample")] if "sample" in source.column_names else []
    if not md is None:
        tooltiplist = tooltiplist + [(str(x), "@{}".format(x)) for x
                                     in md.columns]
    tooltips(p1, HoverTool, tooltiplist)

    # Detected genes, FPKM and TPM
    p2 = figure(title="Number of detected genes",
                x_range=list(df_pca.index), tools=TOOLS,
                **kwfig)
    kwxaxis.update({'axis_label': "Sample"})
    kwyaxis.update({'axis_label': "Detected genes"})
    dotplot(p2, "sample", "FPKM", source=source)
    xaxis(p2, **kwxaxis)
    yaxis(p2, **kwyaxis)
    tooltips(p2, HoverTool, [('sample', '@sample'),
                             ('# genes (FPKM)', '@FPKM')])
    return {'fig':vform(*(toggle_buttons + [gridplot([[p1, p2]])]))}
Exemple #11
0
def scrnaseq_alignment_qc_plots(rseqc_read_distribution=None, rseqc_gene_coverage=None,
                      star_results=None):
    """Make alignment QC plots for scrnaseq workflow

    Args:
      rseqc_read_distribution (str): RSeQC read distribution results csv file
      rseqc_gene_coverage (str): RSeQC gene coverage results csv file
      star_results (str): star alignment results csv file

    Returns: 
      dict: dictionary with keys 'fig' pointing to a (:py:class:`~bokeh.models.GridPlot`) Bokeh GridPlot object and key 'table' pointing to a (:py:class:`~bokeh.widgets.DataTable`) DataTable

    """
    df_star = pd.read_csv(star_results, index_col="Sample")
    df_rseqc_rd = pd.read_csv(rseqc_read_distribution, index_col="Sample").reset_index().pivot_table(columns=["Group"], values=["Tag_count"], index=["Sample"])
    df_rseqc_rd.columns = ["_".join(x) if isinstance(x, tuple) else x for x in df_rseqc_rd.columns]
    df_rseqc_gc = pd.read_csv(rseqc_gene_coverage, index_col="Sample")
    df_all = df_star.join(df_rseqc_rd)
    df_all = df_all.join(df_rseqc_gc['three_prime_map'])
    source = ColumnDataSource(df_all)
    columns = [
        TableColumn(field="Sample", title="Sample"),
        TableColumn(field="Number_of_input_reads",
                    title="Number of input reads"),
        TableColumn(field="Uniquely_mapped_reads_PCT",
                    title="Uniquely mapped reads (%)"),
        TableColumn(field="Mismatch_rate_per_base__PCT",
                    title="Mismatch rate per base (%)"),
        TableColumn(field="Insertion_rate_per_base",
                    title="Insertion rate per base (%)"),
        TableColumn(field="Deletion_rate_per_base",
                    title="Deletion rate per base (%)"),
        TableColumn(field="PCT_of_reads_unmapped",
                    title="Unmapped reads (%)"),
    ]
    table = DataTable(source=source, columns=columns,
                      editable=False, width=1000)
    TOOLS = "pan,wheel_zoom,box_zoom,box_select,lasso_select,resize,reset,save,hover"
    kwfig = {'plot_width': 400, 'plot_height': 400, 
             'title_text_font_size': "12pt"}
    kwxaxis = {'axis_label': 'Sample',
               'major_label_orientation': np.pi/3}
    kwyaxis = {'axis_label_text_font_size': '10pt',
               'major_label_orientation': np.pi/3}

    # Input reads
    p1 = figure(title="Number of input reads",
                x_range=list(df_all.index), tools=TOOLS,
                y_axis_type="log", **kwfig)
    dotplot(p1, "Sample", "Number_of_input_reads", source=source)
    xaxis(p1, **kwxaxis)
    yaxis(p1, axis_label="Reads", **kwyaxis)
    tooltips(p1, HoverTool, [('Sample', '@Sample'),
                             ('Reads', '@Number_of_input_reads')])

    # Uniquely mapping
    p2 = figure(title="Uniquely mapping reads",
                x_range=p1.x_range,
                y_range=[0, 100],
                tools=TOOLS,
                **kwfig)
    dotplot(p2, "Sample", "Uniquely_mapped_reads_PCT", source=source)
    xaxis(p2, **kwxaxis)
    yaxis(p2, axis_label="Percent", **kwyaxis)
    tooltips(p2, HoverTool, [('Sample', '@Sample'),
                             ('Pct_mapped', '@Uniquely_mapped_reads_PCT')])

    # Unmapped
    p3 = figure(title="Unmapped reads",
                x_range=p1.x_range,
                y_range=[0, 100],
                tools=TOOLS,
                **kwfig)
    dotplot(p3, "Sample", "PCT_of_reads_unmapped", source=source)
    xaxis(p3, **kwxaxis)
    yaxis(p3, axis_label="Percent", **kwyaxis)
    tooltips(p3, HoverTool, [('Sample', '@Sample'),
                             ('Pct_unmapped', '@PCT_of_reads_unmapped')])

    # Mismatch/indel rate
    p4 = figure(title="Mismatch and indel rates",
                x_range=p1.x_range,
                tools=TOOLS,
                **kwfig)
    dotplot(p4, "Sample", "Mismatch_rate_per_base__PCT", source=source, legend="Mismatch")
    dotplot(p4, "Sample", "Insertion_rate_per_base", source=source, legend="Insertion", color="red")
    dotplot(p4, "Sample", "Deletion_rate_per_base", source=source, legend="Deletion", color="green")
    xaxis(p4, **kwxaxis)
    yaxis(p4, axis_label="Percent", **kwyaxis)
    tooltips(p4, HoverTool,  [('Sample', '@samples'),
                                              ('Mismatch rate per base',
                                               '@Mismatch_rate_per_base__PCT'),
                                              ('Insertion rate per base',
                                               '@Insertion_rate_per_base'),
                                              ('Deletion rate per base',
                                               '@Deletion_rate_per_base'), ])
    select_tool = p4.select(dict(type=BoxSelectTool))
    select_tool.dimensions = ['width']

    

             
    # Unmapped
    p5 = figure(title="Mismatch/indel sum",
                x_range=p1.x_range,
                tools=TOOLS,
                **kwfig)
    dotplot(p5, "Sample", "mismatch_sum", source=source)
    xaxis(p5, **kwxaxis)
    yaxis(p5, axis_label="Percent", **kwyaxis)
    tooltips(p5, HoverTool, [('Sample', '@Sample'),
                             ('Mismatch/indel rate per base',
                              '@mismatch_sum'), ])
    select_tool = p5.select(dict(type=BoxSelectTool))
    select_tool.dimensions = ['width']

    # Fraction reads mapping to 10% right-most end
    p6 = figure(title="Tags mapping to exons",
                x_range=p1.x_range,
                tools=TOOLS,
                **kwfig)
    dotplot(p6, "Sample", "Tag_count_ExonMap", source=source)
    xaxis(p6, **kwxaxis)
    yaxis(p6, axis_label="Percent", **kwyaxis)
    tooltips(p6, HoverTool, [('Sample', '@Sample'),
                             ('ExonMap', '@Tag_count_ExonMap'), ])

    # Fraction reads mapping to 10% right-most end
    p7 = figure(title="Reads mapping to 3' end",
                x_range=p1.x_range,
                tools=TOOLS,
                **kwfig)
    dotplot(p7, "Sample", "three_prime_map", source=source)
    xaxis(p7, **kwxaxis)
    yaxis(p7, axis_label="Percent", **kwyaxis)
    tooltips(p7, HoverTool, [('Sample', '@Sample'),
                             ("3' map", '@three_prime_map'), ])


    return {'fig': gridplot([[p1, p2, p3], [p4, p5, p6], [p7, None, None]]),
            'table': table}