def plot_volcano(self): """ .. plot:: :include-source: from sequana.rnadiff import RNADiffResults from sequana import sequana_data r = RNADiffResults(sequana_data("rnadiff/rnadiff_onecond_1")) r.plot_volcano() """ d1 = self.df.query("padj>0.05") d2 = self.df.query("padj<=0.05") fig = pylab.figure() pylab.plot(d1.log2FoldChange, -np.log10(d1.padj), marker="o", alpha=0.5, color="r", lw=0) pylab.plot(d2.log2FoldChange, -np.log10(d2.padj), marker="o", alpha=0.5, color="k", lw=0) pylab.grid(True) pylab.xlabel("fold change") pylab.ylabel("log10 adjusted p-value") m1 = abs(min(self.df.log2FoldChange)) m2 = max(self.df.log2FoldChange) limit = max(m1,m2) pylab.xlim([-limit, limit]) y1,y2 = pylab.ylim() pylab.ylim([0,y2]) pylab.axhline(-np.log10(0.05), lw=2, ls="--", color="r", label="pvalue threshold (0.05)")
def onpick(event): thisline = event.artist self.event = event label = thisline.get_label() if label == cond1: gene_name = A.index[event.ind[0]] x1 = round(A.loc[gene_name].log2FoldChange,1) y1 = round(-np.log10(A.loc[gene_name].padj),1) try: x2 = round(B.loc[gene_name].log2FoldChange,1) y2 = round(-np.log10(B.loc[gene_name].padj),1) except: x2, y2 = None, None else: gene_name = B.index[event.ind[0]] x1 = round(B.loc[gene_name].log2FoldChange,1) y1 = round(-np.log10(B.loc[gene_name].padj),1) try: x2 = round(A.loc[gene_name].log2FoldChange,1) y2 = round(-np.log10(A.loc[gene_name].padj),1) except: x2, y2 = None, None try: if x2 is None: ax.title.set_text("{} at pos [{},{}]".format( gene_name,x1,y1)) else: ax.title.set_text("{} at pos [{},{}] and [{},{}]".format( gene_name,x1,y1,x2,y2)) except: print("exception") ax.title.set_text("") pylab.draw()
def plot_volcano_differences(self, mode="all"): cond1, cond2 = "cond1", "cond2" labels = [cond1, cond2] A = self.r1.df.loc[self.r1.gene_lists[mode]] B = self.r2.df.loc[self.r2.gene_lists[mode]] AB = set(A.index).intersection(set(B.index)) Aonly = A.loc[set(A.index).difference(set(B.index))] Bonly = B.loc[set(B.index).difference(set(A.index))] Acommon = A.loc[AB] Bcommon = B.loc[AB] pylab.clf() pylab.plot(Acommon.log2FoldChange, -np.log10(Acommon.padj), marker="o", alpha=0.5, color="r", lw=0, label="Common in experiment 1", pickradius=4, picker=True) pylab.plot(Bcommon.log2FoldChange, -np.log10(Bcommon.padj), marker="o", alpha=0.5, color="orange", lw=0, label="Common in experiment 2", pickradius=4, picker=True) for x in AB: a_l = A.loc[x].log2FoldChange a_p = -np.log10(A.loc[x].padj) b_l = B.loc[x].log2FoldChange b_p = -np.log10(B.loc[x].padj) pylab.plot([a_l, b_l], [a_p, b_p], 'k', alpha=0.5) pylab.plot(Bonly.log2FoldChange, -np.log10(Bonly.padj), marker="*", alpha=0.5, color="blue", lw=0, label="In experiment 2 only", pickradius=4, picker=True) pylab.plot(Aonly.log2FoldChange, -np.log10(Aonly.padj), marker="*", alpha=0.5, color="cyan", lw=0, label="In experiment 1 only", pickradius=4, picker=True) for name, x in Bonly.iterrows(): x1 = x.log2FoldChange y1 = -np.log10(x.padj) x2 = self.r1.df.loc[name].log2FoldChange y2 = -np.log10(self.r1.df.loc[name].padj) pylab.plot( [x1,x2], [y1,y2], ls="--", color='r') for name, x in Aonly.iterrows(): x1 = x.log2FoldChange y1 = -np.log10(x.padj) x2 = self.r2.df.loc[name].log2FoldChange y2 = -np.log10(self.r2.df.loc[name].padj) pylab.plot( [x1,x2], [y1,y2], ls="-", color='r') pylab.axhline(1.33, alpha=0.5, ls="--", color="r") pylab.xlabel("log2 fold Change") pylab.ylabel("log10 adjusted p-values") pylab.legend() pylab.grid(True) return Aonly, Bonly, Acommon, Bcommon
def imshow_anova_pairs(self, log=True, **kargs): import scipy.stats N = len(self.df.columns) # could use a dataframe straight way ? res = np.ones((N, N)) for i, col1 in enumerate(self.df.columns): for j, col2 in enumerate(self.df.columns): d1 = self.df[col1] d2 = self.df[col2] F, P = scipy.stats.f_oneway(*[d1, d2]) res[i][j] = P df = pd.DataFrame(res, index=self.df.columns, columns=self.df.columns) #FIXME: may have na, which are set to 1 df = df.fillna(1) if log == True: Imshow(-np.log10(df)).plot(**kargs) else: Imshow(df).plot(**kargs) return df
def plot_volcano( self, padj=0.05, add_broken_axes=False, markersize=4, limit_broken_line=[20, 40], plotly=False, annotations=None, ): """ .. plot:: :include-source: from sequana.rnadiff import RNADiffResults from sequana import sequana_data r = RNADiffResults(sequana_data("rnadiff/rnadiff_onecond_1")) r.plot_volcano() """ if plotly: from plotly import express as px df = self.df.copy() if annotations is not None: try: df = pd.concat([df, annotations.annotation], axis=1) except Exception as err: logger.warning( f"Could not merge rnadiff table with annotation. Full error is: {err}" ) df["log_adj_pvalue"] = -pylab.log10(df.padj) df["significance"] = [ "<{}".format(padj) if x else ">={}".format(padj) for x in df.padj < padj ] if "Name" in df.columns: hover_name = "Name" elif "gene_id" in df.columns: hover_name = "gene_id" elif "locus_tag" in df.columns: hover_name = "locus_tag" elif "ID" in df.columns: hover_name = "ID" else: hover_name = None fig = px.scatter( df, x="log2FoldChange", y="log_adj_pvalue", hover_name=hover_name, hover_data=["baseMean"], log_y=False, opacity=0.5, color="significance", height=600, labels={"log_adj_pvalue": "log adjusted p-value"}, ) # axes[0].axhline( # -np.log10(0.05), lw=2, ls="--", color="r", label="pvalue threshold (0.05)" # i) # in future version of plotly, a add_hlines will be available. For # now, this is the only way to add axhline fig.update_layout(shapes=[ dict( type="line", xref="x", x0=df.log2FoldChange.min(), x1=df.log2FoldChange.max(), yref="y", y0=-pylab.log10(padj), y1=-pylab.log10(padj), line=dict(color="black", width=1, dash="dash"), ) ]) return fig from brokenaxes import brokenaxes M = max(-pylab.log10(self.df.padj.dropna())) br1, br2 = limit_broken_line if M > br1: if add_broken_axes: bax = brokenaxes(ylims=((0, br1), (M - 10, M)), xlims=None) else: bax = pylab else: bax = pylab d1 = self.df.query("padj>@padj") d2 = self.df.query("padj<=@padj") bax.plot( d1.log2FoldChange, -np.log10(d1.padj), marker="o", alpha=0.5, color="k", lw=0, markersize=markersize, ) bax.plot( d2.log2FoldChange, -np.log10(d2.padj), marker="o", alpha=0.5, color="r", lw=0, markersize=markersize, ) bax.grid(True) try: bax.set_xlabel("fold change") bax.set_ylabel("log10 adjusted p-value") except: bax.xlabel("fold change") bax.ylabel("log10 adjusted p-value") m1 = abs(min(self.df.log2FoldChange)) m2 = max(self.df.log2FoldChange) limit = max(m1, m2) try: bax.set_xlim([-limit, limit]) except: bax.xlim([-limit, limit]) try: y1, _ = bax.get_ylim() ax1 = bax.axs[0].set_ylim([br2, y1[1] * 1.1]) except: y1, y2 = bax.ylim() bax.ylim([0, y2]) bax.axhline(-np.log10(0.05), lw=2, ls="--", color="r", label="pvalue threshold (0.05)") return bax if colors is None: colors = {} for sample in self.sample_names: colors[sample] = self.colors[self.get_cond_from_sample(sample)] if plotly is True: assert n_components == 3 variance = p.plot( n_components=n_components, colors=colors, show_plot=False, max_features=max_features, ) from plotly import express as px df = pd.DataFrame(p.Xr) df.columns = ["PC1", "PC2", "PC3"] df["names"] = self.sample_names df["colors"] = [colors[x] for x in self.sample_names] df["size"] = [10] * len(df) df[self.condition] = [ self.get_cond_from_sample(sample) for sample in self.sample_names ] fig = px.scatter_3d( df, x="PC1", y="PC2", z="PC3", color=self.condition, labels={ "PC1": "PC1 ({}%)".format(round(100 * variance[0], 2)), "PC2": "PC2 ({}%)".format(round(100 * variance[1], 2)), "PC3": "PC3 ({}%)".format(round(100 * variance[2], 2)), }, height=800, text="names", ) return fig else: variance = p.plot(n_components=n_components, colors=colors, max_features=max_features) return variance
def plot_volcano(self, labels=None): """Volcano plot of log2 fold change versus log10 of adjusted p-value .. plot:: :include-source: from sequana import sequana_data from sequana.compare import RNADiffCompare c = RNADiffCompare( sequana_data("rnadiff/rnadiff_onecond_1"), sequana_data("rnadiff/rnadiff_onecond_2")) c.plot_volcano() """ cond1, cond2 = "cond1", "cond2" if labels is None: labels = [cond1, cond2] A = self.r1.df.loc[self.r1.gene_lists["all"]] B = self.r2.df.loc[self.r2.gene_lists["all"]] if cond1 == cond2: cond1 += "(1)" cond2 += "(2)" pylab.clf() pylab.plot(A.log2FoldChange, -np.log10(A.padj), marker="o", alpha=0.5, color="r", lw=0, label=labels[0], pickradius=4, picker=True) pylab.plot(B.log2FoldChange, -np.log10(B.padj), marker="x", alpha=0.5, color="k", lw=0, label=labels[1], pickradius=4, picker=True) genes = list(A.index) + list(B.index) pylab.grid(True) pylab.xlabel("fold change") pylab.ylabel("log10 adjusted p-value") pylab.legend(loc="lower right") ax = pylab.gca() def onpick(event): thisline = event.artist self.event = event label = thisline.get_label() if label == cond1: gene_name = A.index[event.ind[0]] x1 = round(A.loc[gene_name].log2FoldChange,1) y1 = round(-np.log10(A.loc[gene_name].padj),1) try: x2 = round(B.loc[gene_name].log2FoldChange,1) y2 = round(-np.log10(B.loc[gene_name].padj),1) except: x2, y2 = None, None else: gene_name = B.index[event.ind[0]] x1 = round(B.loc[gene_name].log2FoldChange,1) y1 = round(-np.log10(B.loc[gene_name].padj),1) try: x2 = round(A.loc[gene_name].log2FoldChange,1) y2 = round(-np.log10(A.loc[gene_name].padj),1) except: x2, y2 = None, None try: if x2 is None: ax.title.set_text("{} at pos [{},{}]".format( gene_name,x1,y1)) else: ax.title.set_text("{} at pos [{},{}] and [{},{}]".format( gene_name,x1,y1,x2,y2)) except: print("exception") ax.title.set_text("") pylab.draw() fig = pylab.gcf() fig.canvas.mpl_connect('pick_event', onpick)