def test_dotplot(self): # BUG: must currently set x_range to categorical axis, # otherwise we would need to recreate the figure in the # dotplot function f = figure(x_range=list(self._data["treatment"]), plot_height=200, plot_width=200) dotplot(f, "treatment", "y", self._data, line_color='gray', size=20, color="red", legend="y")
def test_dotplot_fig(self): dotplot(self._fig, "treatment", "y", self._data, line_color='gray', size=20, color="red")
def mdotplot(fig, x, y, df=None, source=None, color=False, legend=False, binaxis="x", **kwargs): """mdotplot: make a mdotplot. In this implementation, the explanatory variable is treated as a factor. Args: fig (:py:class:`~bokeh.plotting.Plot`): bokeh Plot object x (str): string for x component y (str): string for y component df (:py:class:`~pandas.DataFrame`): pandas DataFram source (:py:class:`~bokeh.models.sources.ColumnDataSource`): bokeh sources.ColumnDataSource object color (bool): set color legend (bool): set legend binaxis (str): axis to bin dots on kwargs: keyword arguments to pass to glyph drawing function Example: .. bokeh-plot:: :source-position: above import pandas as pd from bokeh.plotting import figure, show from bokehutils.mgeom import mdotplot df = pd.DataFrame([[1,2,"A"], [2,5,"B"], [3,9,"A"]], columns=["x", "y", "foo"]) # NB: currently *must* set the range here f = figure(title="Dotplot", plot_width=400, plot_height=400, x_range=list(set(df["foo"]))) mdotplot(f, "foo", ["y", "x"], df) show(f) Note that we in the example we must set the range in the call to figure, otherwise figure will use linear axis by default. It is currently cumbersome to change axes types in an existing figure. See `categorical axes <http://bokeh.pydata.org/en/latest/docs/user_guide/plotting.html#categorical-axes>`_ for more information. """ logger.debug("Adding dotplot to figure {}".format(fig)) if com.is_numeric_dtype(source.to_df()[x]) == True: raise TypeError("{}: dependant variable must not be numerical type".format(__name__)) for i in range(len(y)): dotplot(fig=fig, x=x, y=y[i], source=source, **kwargs) if color: # Add color here # color = brewer["PiYG"][min(max(3, len(y)), 10)] pass if legend: # Add legend here via legend function pass
def plot_metrics(self, **kwargs): """Plot metrics wrapper Returns: plist (list): list of bokeh plot objects """ plist = [] for kw in self.plots: kwargs.update(kw['figure']) fig = figure(**kwargs) dotplot(fig, df=self, **kw['renderer']) xaxis(fig, **kw['xaxis']) yaxis(fig, **kw['yaxis']) plist.append(fig) return plist
def test_dotplot_int(self): dotplot(self._fig, "x", "y", self._data)
def mdotplot(fig, x, y, df=None, source=None, color=False, legend=False, binaxis="x", **kwargs): """mdotplot: make a mdotplot. In this implementation, the explanatory variable is treated as a factor. Args: fig (:py:class:`~bokeh.plotting.Plot`): bokeh Plot object x (str): string for x component y (str): string for y component df (:py:class:`~pandas.DataFrame`): pandas DataFram source (:py:class:`~bokeh.models.sources.ColumnDataSource`): bokeh sources.ColumnDataSource object color (bool): set color legend (bool): set legend binaxis (str): axis to bin dots on kwargs: keyword arguments to pass to glyph drawing function Example: .. bokeh-plot:: :source-position: above import pandas as pd from bokeh.plotting import figure, show from bokehutils.mgeom import mdotplot df = pd.DataFrame([[1,2,"A"], [2,5,"B"], [3,9,"A"]], columns=["x", "y", "foo"]) # NB: currently *must* set the range here f = figure(title="Dotplot", plot_width=400, plot_height=400, x_range=list(set(df["foo"]))) mdotplot(f, "foo", ["y", "x"], df) show(f) Note that we in the example we must set the range in the call to figure, otherwise figure will use linear axis by default. It is currently cumbersome to change axes types in an existing figure. See `categorical axes <http://bokeh.pydata.org/en/latest/docs/user_guide/plotting.html#categorical-axes>`_ for more information. """ logger.debug("Adding dotplot to figure {}".format(fig)) if com.is_numeric_dtype(source.to_df()[x]) == True: raise TypeError( "{}: dependant variable must not be numerical type".format( __name__)) for i in range(len(y)): dotplot(fig=fig, x=x, y=y[i], source=source, **kwargs) if color: # Add color here # color = brewer["PiYG"][min(max(3, len(y)), 10)] pass if legend: # Add legend here via legend function pass
def scrnaseq_pca_plots(pca_results_file=None, metadata=None, pcacomp=(1,2), pcaobjfile=None): """Make PCA QC plots for scrnaseq workflow Args: pca_results_file (str): pca results file metadata (str): metadata file name pcacomp (int): tuple of ints corresponding to components to draw pcaobjfile (str): file name containing pickled pca object Returns: dict: dictionary with keys 'fig' pointing to a (:py:class:`~bokeh.models.GridPlot`) Bokeh GridPlot object and key 'table' pointing to a (:py:class:`~bokeh.widgets.DataTable`) DataTable """ if not metadata is None: md = pd.read_csv(metadata, index_col=0) if not pcaobjfile is None: with open(pcaobjfile, 'rb') as fh: pcaobj = pickle.load(fh) df_pca = pd.read_csv(pca_results_file, index_col="sample") df_pca['color'] = ['red'] * df_pca.shape[0] df_pca['x'] = df_pca['0'] df_pca['y'] = df_pca['1'] source = ColumnDataSource(df_pca) TOOLS = "pan,wheel_zoom,box_zoom,box_select,resize,reset,save,hover" # Add radio button group cmap = colorbrewer(datalen = df_pca.shape[0], palette="RdYlBu") callback_rbg = CustomJS(args=dict(source=source), code=""" var data = source.get('data'); var active = cb_obj.get('active') var label = cb_obj.get('label')[active] var RdYlBu = { 3: ["#fc8d59","#ffffbf","#91bfdb"], 4: ["#d7191c","#fdae61","#abd9e9","#2c7bb6"], 5: ["#d7191c","#fdae61","#ffffbf","#abd9e9","#2c7bb6"], 6: ["#d73027","#fc8d59","#fee090","#e0f3f8","#91bfdb","#4575b4"], 7: ["#d73027","#fc8d59","#fee090","#ffffbf","#e0f3f8","#91bfdb","#4575b4"], 8: ["#d73027","#f46d43","#fdae61","#fee090","#e0f3f8","#abd9e9","#74add1","#4575b4"], 9: ["#d73027","#f46d43","#fdae61","#fee090","#ffffbf","#e0f3f8","#abd9e9","#74add1","#4575b4"], 10: ["#a50026","#d73027","#f46d43","#fdae61","#fee090","#e0f3f8","#abd9e9","#74add1","#4575b4","#313695"], 11: ["#a50026","#d73027","#f46d43","#fdae61","#fee090","#ffffbf","#e0f3f8","#abd9e9","#74add1","#4575b4","#313695"]}; var colormap = {}; var j = 0; for (i = 0; i < data['sample'].length; i++) { if (data[label][i] in colormap) { } else { colormap[data[label][i]] = j; j++; } } var nfac = Object.keys(colormap).length; if (nfac > 11) { nfac = 11; } if (nfac < 3) { nfac = 3; } var colors = RdYlBu[nfac]; for (i = 0; i < data[label].length; i++) { data['color'][i] = colors[colormap[data[label][i]]] } source.trigger('change'); """) callback = CustomJS(args=dict(source=source), code=""" var data = source.get('data'); var active = cb_obj.get('active'); var label = cb_obj.get('label'); var RdYlBu = { 3: ["#fc8d59","#ffffbf","#91bfdb"], 4: ["#d7191c","#fdae61","#abd9e9","#2c7bb6"], 5: ["#d7191c","#fdae61","#ffffbf","#abd9e9","#2c7bb6"], 6: ["#d73027","#fc8d59","#fee090","#e0f3f8","#91bfdb","#4575b4"], 7: ["#d73027","#fc8d59","#fee090","#ffffbf","#e0f3f8","#91bfdb","#4575b4"], 8: ["#d73027","#f46d43","#fdae61","#fee090","#e0f3f8","#abd9e9","#74add1","#4575b4"], 9: ["#d73027","#f46d43","#fdae61","#fee090","#ffffbf","#e0f3f8","#abd9e9","#74add1","#4575b4"], 10: ["#a50026","#d73027","#f46d43","#fdae61","#fee090","#e0f3f8","#abd9e9","#74add1","#4575b4","#313695"], 11: ["#a50026","#d73027","#f46d43","#fdae61","#fee090","#ffffbf","#e0f3f8","#abd9e9","#74add1","#4575b4","#313695"]}; var colormap = {}; if (!active) { var j = 0; for (i = 0; i < data['sample'].length; i++) { if (data[label][i] in colormap) { } else { colormap[data[label][i]] = j; j++; } } var nfac = Object.keys(colormap).length; if (nfac > 11) { nfac = 11; } if (nfac < 3) { nfac = 3; } var colors = RdYlBu[nfac]; for (i = 0; i < data[label].length; i++) { data['color'][i] = colors[colormap[data[label][i]]] } source.trigger('change'); } """) if not md is None: # Waiting for callbacks to be implemented upstream in bokeh # rbg = RadioButtonGroup(labels=list(md.columns), # callback=callback) toggle_buttons = [Toggle(label=x, callback=callback) for x in list(md.columns)] else: toggle_buttons = [] # rbg = RadioButtonGroup() # PC components xcallback = CustomJS(args=dict(source=source), code=""" var data = source.get('data'); var active = cb_obj.get('active') var value = cb_obj.get('value') x = data['x'] for (i = 0; i < x.length; i++) { x[i] = data[value][i] data['sample'][i] = value data['FileID'][i] = active } source.trigger('change'); """) ycallback = CustomJS(args=dict(source=source), code=""" var data = source.get('data'); var value = cb_obj.get('value') y = data['y'] for (i = 0; i < y.length; i++) { y[i] = data[value][i] } source.trigger('change'); """) pca_components = sorted([int(x) + 1 for x in source.column_names if re.match("\d+", x)]) menulist = [(str(x), str(x)) for x in pca_components] component_x = Dropdown(label = "PCA component x", menu = menulist, default_value="1", callback=xcallback) component_y = Dropdown(label = "PCA component y", menu = menulist, default_value="2", callback=ycallback) # Make the pca plot kwfig = {'plot_width': 400, 'plot_height': 400, 'title_text_font_size': "12pt"} p1 = figure(title="Principal component analysis", tools=TOOLS, **kwfig) points(p1, 'x', 'y', source=source, color='color', size=10, alpha=.7) kwxaxis = {'axis_label': "Component {} ({:.2f}%)".format( pcacomp[0], 100.0 * pcaobj.explained_variance_ratio_[pcacomp[0] - 1]), 'axis_label_text_font_size': '10pt', 'major_label_orientation': np.pi/3} kwyaxis = {'axis_label': "Component {} ({:.2f}%)".format( pcacomp[1], 100.0 * pcaobj.explained_variance_ratio_[pcacomp[1] - 1]), 'axis_label_text_font_size': '10pt', 'major_label_orientation': np.pi/3} xaxis(p1, **kwxaxis) yaxis(p1, **kwyaxis) tooltiplist = [("sample", "@sample")] if "sample" in source.column_names else [] if not md is None: tooltiplist = tooltiplist + [(str(x), "@{}".format(x)) for x in md.columns] tooltips(p1, HoverTool, tooltiplist) # Detected genes, FPKM and TPM p2 = figure(title="Number of detected genes", x_range=list(df_pca.index), tools=TOOLS, **kwfig) kwxaxis.update({'axis_label': "Sample"}) kwyaxis.update({'axis_label': "Detected genes"}) dotplot(p2, "sample", "FPKM", source=source) xaxis(p2, **kwxaxis) yaxis(p2, **kwyaxis) tooltips(p2, HoverTool, [('sample', '@sample'), ('# genes (FPKM)', '@FPKM')]) return {'fig':vform(*(toggle_buttons + [gridplot([[p1, p2]])]))}
def scrnaseq_alignment_qc_plots(rseqc_read_distribution=None, rseqc_gene_coverage=None, star_results=None): """Make alignment QC plots for scrnaseq workflow Args: rseqc_read_distribution (str): RSeQC read distribution results csv file rseqc_gene_coverage (str): RSeQC gene coverage results csv file star_results (str): star alignment results csv file Returns: dict: dictionary with keys 'fig' pointing to a (:py:class:`~bokeh.models.GridPlot`) Bokeh GridPlot object and key 'table' pointing to a (:py:class:`~bokeh.widgets.DataTable`) DataTable """ df_star = pd.read_csv(star_results, index_col="Sample") df_rseqc_rd = pd.read_csv(rseqc_read_distribution, index_col="Sample").reset_index().pivot_table(columns=["Group"], values=["Tag_count"], index=["Sample"]) df_rseqc_rd.columns = ["_".join(x) if isinstance(x, tuple) else x for x in df_rseqc_rd.columns] df_rseqc_gc = pd.read_csv(rseqc_gene_coverage, index_col="Sample") df_all = df_star.join(df_rseqc_rd) df_all = df_all.join(df_rseqc_gc['three_prime_map']) source = ColumnDataSource(df_all) columns = [ TableColumn(field="Sample", title="Sample"), TableColumn(field="Number_of_input_reads", title="Number of input reads"), TableColumn(field="Uniquely_mapped_reads_PCT", title="Uniquely mapped reads (%)"), TableColumn(field="Mismatch_rate_per_base__PCT", title="Mismatch rate per base (%)"), TableColumn(field="Insertion_rate_per_base", title="Insertion rate per base (%)"), TableColumn(field="Deletion_rate_per_base", title="Deletion rate per base (%)"), TableColumn(field="PCT_of_reads_unmapped", title="Unmapped reads (%)"), ] table = DataTable(source=source, columns=columns, editable=False, width=1000) TOOLS = "pan,wheel_zoom,box_zoom,box_select,lasso_select,resize,reset,save,hover" kwfig = {'plot_width': 400, 'plot_height': 400, 'title_text_font_size': "12pt"} kwxaxis = {'axis_label': 'Sample', 'major_label_orientation': np.pi/3} kwyaxis = {'axis_label_text_font_size': '10pt', 'major_label_orientation': np.pi/3} # Input reads p1 = figure(title="Number of input reads", x_range=list(df_all.index), tools=TOOLS, y_axis_type="log", **kwfig) dotplot(p1, "Sample", "Number_of_input_reads", source=source) xaxis(p1, **kwxaxis) yaxis(p1, axis_label="Reads", **kwyaxis) tooltips(p1, HoverTool, [('Sample', '@Sample'), ('Reads', '@Number_of_input_reads')]) # Uniquely mapping p2 = figure(title="Uniquely mapping reads", x_range=p1.x_range, y_range=[0, 100], tools=TOOLS, **kwfig) dotplot(p2, "Sample", "Uniquely_mapped_reads_PCT", source=source) xaxis(p2, **kwxaxis) yaxis(p2, axis_label="Percent", **kwyaxis) tooltips(p2, HoverTool, [('Sample', '@Sample'), ('Pct_mapped', '@Uniquely_mapped_reads_PCT')]) # Unmapped p3 = figure(title="Unmapped reads", x_range=p1.x_range, y_range=[0, 100], tools=TOOLS, **kwfig) dotplot(p3, "Sample", "PCT_of_reads_unmapped", source=source) xaxis(p3, **kwxaxis) yaxis(p3, axis_label="Percent", **kwyaxis) tooltips(p3, HoverTool, [('Sample', '@Sample'), ('Pct_unmapped', '@PCT_of_reads_unmapped')]) # Mismatch/indel rate p4 = figure(title="Mismatch and indel rates", x_range=p1.x_range, tools=TOOLS, **kwfig) dotplot(p4, "Sample", "Mismatch_rate_per_base__PCT", source=source, legend="Mismatch") dotplot(p4, "Sample", "Insertion_rate_per_base", source=source, legend="Insertion", color="red") dotplot(p4, "Sample", "Deletion_rate_per_base", source=source, legend="Deletion", color="green") xaxis(p4, **kwxaxis) yaxis(p4, axis_label="Percent", **kwyaxis) tooltips(p4, HoverTool, [('Sample', '@samples'), ('Mismatch rate per base', '@Mismatch_rate_per_base__PCT'), ('Insertion rate per base', '@Insertion_rate_per_base'), ('Deletion rate per base', '@Deletion_rate_per_base'), ]) select_tool = p4.select(dict(type=BoxSelectTool)) select_tool.dimensions = ['width'] # Unmapped p5 = figure(title="Mismatch/indel sum", x_range=p1.x_range, tools=TOOLS, **kwfig) dotplot(p5, "Sample", "mismatch_sum", source=source) xaxis(p5, **kwxaxis) yaxis(p5, axis_label="Percent", **kwyaxis) tooltips(p5, HoverTool, [('Sample', '@Sample'), ('Mismatch/indel rate per base', '@mismatch_sum'), ]) select_tool = p5.select(dict(type=BoxSelectTool)) select_tool.dimensions = ['width'] # Fraction reads mapping to 10% right-most end p6 = figure(title="Tags mapping to exons", x_range=p1.x_range, tools=TOOLS, **kwfig) dotplot(p6, "Sample", "Tag_count_ExonMap", source=source) xaxis(p6, **kwxaxis) yaxis(p6, axis_label="Percent", **kwyaxis) tooltips(p6, HoverTool, [('Sample', '@Sample'), ('ExonMap', '@Tag_count_ExonMap'), ]) # Fraction reads mapping to 10% right-most end p7 = figure(title="Reads mapping to 3' end", x_range=p1.x_range, tools=TOOLS, **kwfig) dotplot(p7, "Sample", "three_prime_map", source=source) xaxis(p7, **kwxaxis) yaxis(p7, axis_label="Percent", **kwyaxis) tooltips(p7, HoverTool, [('Sample', '@Sample'), ("3' map", '@three_prime_map'), ]) return {'fig': gridplot([[p1, p2, p3], [p4, p5, p6], [p7, None, None]]), 'table': table}