def scrnaseq_results_plot_alignrseqc(df, **kwargs): """ Plot scRNASeq QC summary. Args: df (DataFrame): data frame consisting of summary data from alignments and RSeQC kwargs: keyword arguments to bokeh plot functions Returns: (dict): dictionary with a bokeh gridplot and a table """ df = df.set_index("SM") source = ColumnDataSource(df) columns = [ TableColumn(field="SM", title="Sample"), TableColumn(field="Number_of_input_reads", title="Number of input reads"), TableColumn(field="Uniquely_mapped_reads_PCT", title="Uniquely mapped reads (%)"), TableColumn(field="Mismatch_rate_per_base,_PCT", title="Mismatch rate per base (%)"), TableColumn(field="Insertion_rate_per_base", title="Insertion rate per base (%)"), TableColumn(field="Deletion_rate_per_base", title="Deletion rate per base (%)"), TableColumn(field="PCT_of_reads_unmapped", title="Unmapped reads (%)"), ] table = DataTable(source=source, columns=columns, editable=False, width=1000) kwargs = {'plot_width': 400, 'plot_height': 400, 'title_text_font_size': "16pt", 'x_axis_label': 'Sample', 'x_axis_label_text_font_size': '12pt', 'x_major_label_orientation': np.pi/3, 'y_axis_label': 'Reads', 'y_axis_label_text_font_size': '12pt', 'y_major_label_orientation': np.pi/3 } # Input reads p1 = dotplot( df=source, title="Number of input reads", tools=DEFAULT_TOOLS, x="SM", y="Number_of_input_reads", **dict(kwargs, **{'x_range': list(df.index), 'y_axis_type': "log"})) tooltips(p1, HoverTool, [('Sample', '@SM'), ('Reads', '@Number_of_input_reads')]) # Uniquely mapping kwargs['y_axis_label'] = 'Percent' p2 = dotplot( df=source, tools=DEFAULT_TOOLS, title="Uniquely mapping reads", x="SM", y="Uniquely_mapped_reads_PCT", **dict(kwargs, **{'x_range': p1.x_range, 'y_range': [0, 100]}) ) tooltips(p2, HoverTool, [('Sample', '@SM'), ('Pct_mapped', '@Uniquely_mapped_reads_PCT')]) # Unmapped p3 = dotplot( df=source, tools=DEFAULT_TOOLS, title="Unmapped reads", x="SM", y="PCT_of_reads_unmapped", **dict(kwargs, **{'x_range': p1.x_range, 'y_range': [0, 100]}) ) tooltips(p3, HoverTool, [('Sample', '@SM'), ('Pct_unmapped', '@PCT_of_reads_unmapped')]) # Mismatch/indel rate p4 = dotplot( df=source, tools=DEFAULT_TOOLS, title="Mismatch/indel rate", x="SM", y=[ "Mismatch_rate_per_base,_PCT", "Insertion_rate_per_base", "Deletion_rate_per_base" ], **dict(kwargs, **{'x_range': p1.x_range, 'y_range': [0, 1], 'color': ["blue", "red", "green"]}) ) tooltips(p4, HoverTool, [('Sample', '@SM'), ('Mismatch rate per base', '@Mismatch_rate_per_base,_PCT'), ('Insertion rate per base', '@Insertion_rate_per_base'), ('Deletion rate per base', '@Deletion_rate_per_base'), ]) select_tool = p4.select(dict(type=BoxSelectTool)) select_tool.dimensions = ['width'] # Unmapped p5 = dotplot( tools=DEFAULT_TOOLS, title="Mismatch/indel sum", x="SM", y="mismatch_sum", df=source, **dict(kwargs, **{'x_range': p1.x_range}) ) tooltips(p5, HoverTool, [('Sample', '@SM'), ('Mismatch/indel rate per base', '@mismatch_sum'), ]) select_tool = p5.select(dict(type=BoxSelectTool)) select_tool.dimensions = ['width'] # Fraction reads mapping to 10% right-most end p6 = dotplot( tools=DEFAULT_TOOLS, title="Tags mapping to exons", x="SM", y="ExonMap_PCT", df=source, **dict(kwargs, **{'x_range': p1.x_range}) ) tooltips(p6, HoverTool, [('Sample', '@SM'), ('ExonMap (%)', '@ExonMap_PCT'), ]) # Fraction reads mapping to 10% right-most end p7 = dotplot( tools=DEFAULT_TOOLS, title="Reads mapping to 3' end", x="SM", y="three_prime_map", df=source, **dict(kwargs, **{'x_range': p1.x_range}) ) tooltips(p7, HoverTool, [('Sample', '@SM'), ("3' map", '@three_prime_map'), ]) return {'fig': gridplot([[p1, p2, p3], [p4, p5, p6], [p7]]), 'table': table}
def plot_pca(pcaobjfile, pca_results_file=None, metadata=None, taptool_url=None, **kwargs): """Make PCA plot Args: pcaobjfile (str): file name containing pickled pca object pca_results_file (str): pca results file metadata (str): metadata file name taptool_url (str): url prefix that is attached to taptool; typically a link to ensembl Returns: dict: dictionary with keys 'fig' pointing to a (:py:class:`~bokeh.models.GridPlot`) Bokeh GridPlot object and key 'table' pointing to a (:py:class:`~bokeh.widgets.DataTable`) DataTable """ with open(pcaobjfile, 'rb') as fh: pcaobj = pickle.load(fh) md = None if not metadata is None: md = pd.read_csv(metadata, index_col=0) df_pca = pd.read_csv(pca_results_file, index_col=kwargs.get('index_col', "SM")) df_pca['color'] = [kwargs.get('color', 'red')] * df_pca.shape[0] df_pca['x'] = df_pca['0'] df_pca['y'] = df_pca['1'] df_pca['size'] = [kwargs.get('size', 10)] * df_pca.shape[0] pca_source = ColumnDataSource(df_pca) cmap = colorbrewer(datalen = df_pca.shape[0]) callback = CustomJS(args=dict(source=pca_source), code="""pca_callback(source, cb_obj, "SM");""") xcallback = CustomJS(args=dict(source=pca_source), code="""pca_component(source, cb_obj, "x");""") ycallback = CustomJS(args=dict(source=pca_source), code="""pca_component(source, cb_obj, "y");""") if not md is None: # Waiting for callbacks to be implemented upstream in bokeh # rbg = RadioButtonGroup(labels=list(md.columns), # callback=callback) toggle_buttons = [Toggle(label=x, callback=callback) for x in list(md.columns) + ["TPM", "FPKM"]] else: toggle_buttons = [] pca_components = sorted([int(x) + 1 for x in pca_source.column_names if re.match("\d+", x)]) menulist = ["{} ({:.2f}%)".format(x, 100.0 * p) for x, p in zip(pca_components, pcaobj.explained_variance_ratio_)] component_x = Select(title = "PCA component x", options = menulist, value=menulist[0], callback=xcallback) component_y = Select(title = "PCA component y", options = menulist, value=menulist[1], callback=ycallback) # Make the pca plot kwargs = {'plot_width': 400, 'plot_height': 400, 'title_text_font_size': "12pt", 'title': "Principal component analysis", 'tools': TOOLS, 'x_axis_label_text_font_size': '10pt', 'x_major_label_orientation': np.pi/3, 'y_axis_label_text_font_size': '10pt', 'y_major_label_orientation': np.pi/3, } fig = points('x', 'y', df=pca_source, color='color', size='size', alpha=0.7, **kwargs) tooltiplist = [("sample", "@SM")] if "SM" in pca_source.column_names else [] if not md is None: tooltiplist = tooltiplist + [(str(x), "@{}".format(x)) for x in md.columns] + \ [("Detected genes (TPM)", "@TPM"), ("Detected genes (FPKM)", "@FPKM")] tooltips(fig, HoverTool, tooltiplist) # Loadings loadings = pd.DataFrame(pcaobj.components_).T loadings.columns = [str(x) for x in loadings.columns] loadings['x'] = loadings['0'] loadings['y'] = loadings['1'] try: loadings["gene_id"] = pcaobj.features except: smllogger.warn("failed to set gene_id") raise try: loadings["gene_name"] = [pcaobj.labels[x] for x in loadings["gene_id"]] except: smllogger.warn("failed to set gene_name") raise loadings_source = ColumnDataSource(loadings) kwargs.update({'title': "Loadings"}) loadings_fig = points(x='x', y='y', df=loadings_source, **kwargs) tooltips(loadings_fig, HoverTool, [('gene_id', '@gene_id'), ('gene_name', '@gene_name')]) x_loadings_callback = CustomJS(args=dict(source=loadings_source), code="""pca_loadings(source, cb_obj, "x");""") y_loadings_callback = CustomJS(args=dict(source=loadings_source), code="""pca_loadings(source, cb_obj, "y");""") menulist = ["{} ({:.2f}%)".format(x, 100.0 * p) for x, p in zip(pca_components, pcaobj.explained_variance_ratio_)] loadings_component_x = Select(title = "PCA loading x", options = menulist, value=menulist[0], callback=x_loadings_callback) loadings_component_y = Select(title = "PCA loading y", options = menulist, value=menulist[1], callback=y_loadings_callback) # Add taptool url if present if taptool_url: loadings_fig.add_tools(TapTool(callback=OpenURL(url=taptool_url))) # Detected genes, FPKM and TPM kwargs.update({'title': 'Number of detected genes', 'xlabel': "Sample", 'ylabel': "Detected genes", 'x_range': list(pca_source.data["SM"])}) n_genes_fig = dotplot(df=pca_source, x="SM", y="TPM", **kwargs) tooltips(n_genes_fig, HoverTool, [('sample', '@SM'), ('# genes (TPM)', '@TPM'), ('# genes (FPKM)', '@FPKM')]) buttons = toggle_buttons + [component_x, component_y] + [loadings_component_x, loadings_component_y] return {'pca' : vform(*(buttons + [gridplot([[fig, loadings_fig, n_genes_fig]])]))}