def plot_volcano(self): """ .. plot:: :include-source: from sequana.rnadiff import RNADiffResults from sequana import sequana_data r = RNADiffResults(sequana_data("rnadiff/rnadiff_onecond_1")) r.plot_volcano() """ d1 = self.df.query("padj>0.05") d2 = self.df.query("padj<=0.05") fig = pylab.figure() pylab.plot(d1.log2FoldChange, -np.log10(d1.padj), marker="o", alpha=0.5, color="r", lw=0) pylab.plot(d2.log2FoldChange, -np.log10(d2.padj), marker="o", alpha=0.5, color="k", lw=0) pylab.grid(True) pylab.xlabel("fold change") pylab.ylabel("log10 adjusted p-value") m1 = abs(min(self.df.log2FoldChange)) m2 = max(self.df.log2FoldChange) limit = max(m1,m2) pylab.xlim([-limit, limit]) y1,y2 = pylab.ylim() pylab.ylim([0,y2]) pylab.axhline(-np.log10(0.05), lw=2, ls="--", color="r", label="pvalue threshold (0.05)")
def plot_idr_vs_peaks(self, filename=None, savefig=False): # global_idr is actually -log10(idr) pylab.clf() X1 = pylab.linspace(0, self.threshold, 100) X2 = pylab.linspace(self.threshold, 1, 100) # convert global idr to proba df1 = self.df.query("idr<@self.threshold") df2 = self.df.query("idr>[email protected]") pylab.plot([sum(df1['idr'] < x) for x in X1], X1, '-', color='r', lw=2) shift = len(df1) pylab.plot([shift + sum(df2['idr'] < x) for x in X2], X2, "-", color='k', lw=2) pylab.xlabel('Number of significant peaks') pylab.ylabel('IDR') pylab.axhline(0.05, color='b', ls='--') pylab.axvline(self.N_significant_peaks, color='b', ls='--') if savefig: pylab.savefig(filename)
def plot_polymerase_per_barcode(self, fontsize=12, unbarcoded=True): """Number Of Polymerase Reads Per Barcode""" PR = self.df_barcoded["Polymerase Reads"].sum() data = self.df_barcoded['Polymerase Reads'].sort_values( ascending=False).values pylab.plot([int(x) for x in range(1, len(data) + 1)], data, label="barcodes") pylab.axhline(data.mean(), color="r", label="average") try: if unbarcoded is True: unbar = self.df_not_barcoded['Polymerase Reads'].iloc[0] pylab.axhline(unbar, color="k", ls="--", label="not barcoded") except: pass pylab.xlabel("Barcode Rank Order", fontsize=fontsize) pylab.ylabel("Counts of Reads", fontsize=fontsize) pylab.title("Total Polymerase count: {}".format(PR)) pylab.legend() pylab.ylim(ymin=0) try: pylab.tight_layout() except: pass
def plot_percentage_null_read_counts(self): """ Bars represent the percentage of null counts in each samples. The dashed horizontal line represents the percentage of feature counts being equal to zero across all samples. .. plot:: :include-source: from sequana.rnadiff import RNADiffResults from sequana import sequana_data r = RNADiffResults(sequana_data("rnadiff/rnadiff_onecond_1")) r.plot_percentage_null_read_counts() """ N = len(self.sample_names) data = (self.df[self.sample_names]==0).sum() data = data / len(self.df) * 100 all_null = (self.df[self.sample_names].sum(axis=1) == 0).sum() pylab.clf() pylab.bar(range(N), data) pylab.axhline(all_null / len(self.df) * 100, lw=2, ls="--", color="k") pylab.xticks(range(N), self.sample_names) pylab.xlabel("Sample")
def scatter_length_cov_gc(self, min_length=200, min_cov=10): pylab.clf() pylab.scatter(self.df.length, self.df['cov'], c=self.df.GC) pylab.loglog() pylab.axvline(min_length, lw=2, c="r", ls='--') pylab.axhline(min_cov, lw=2, c="r", ls='--') pylab.xlabel("contig length") pylab.ylabel("contig coverage") pylab.colorbar(label="GC") pylab.grid(True)
def plot_volcano_differences(self, mode="all"): cond1, cond2 = "cond1", "cond2" labels = [cond1, cond2] A = self.r1.df.loc[self.r1.gene_lists[mode]] B = self.r2.df.loc[self.r2.gene_lists[mode]] AB = set(A.index).intersection(set(B.index)) Aonly = A.loc[set(A.index).difference(set(B.index))] Bonly = B.loc[set(B.index).difference(set(A.index))] Acommon = A.loc[AB] Bcommon = B.loc[AB] pylab.clf() pylab.plot(Acommon.log2FoldChange, -np.log10(Acommon.padj), marker="o", alpha=0.5, color="r", lw=0, label="Common in experiment 1", pickradius=4, picker=True) pylab.plot(Bcommon.log2FoldChange, -np.log10(Bcommon.padj), marker="o", alpha=0.5, color="orange", lw=0, label="Common in experiment 2", pickradius=4, picker=True) for x in AB: a_l = A.loc[x].log2FoldChange a_p = -np.log10(A.loc[x].padj) b_l = B.loc[x].log2FoldChange b_p = -np.log10(B.loc[x].padj) pylab.plot([a_l, b_l], [a_p, b_p], 'k', alpha=0.5) pylab.plot(Bonly.log2FoldChange, -np.log10(Bonly.padj), marker="*", alpha=0.5, color="blue", lw=0, label="In experiment 2 only", pickradius=4, picker=True) pylab.plot(Aonly.log2FoldChange, -np.log10(Aonly.padj), marker="*", alpha=0.5, color="cyan", lw=0, label="In experiment 1 only", pickradius=4, picker=True) for name, x in Bonly.iterrows(): x1 = x.log2FoldChange y1 = -np.log10(x.padj) x2 = self.r1.df.loc[name].log2FoldChange y2 = -np.log10(self.r1.df.loc[name].padj) pylab.plot( [x1,x2], [y1,y2], ls="--", color='r') for name, x in Aonly.iterrows(): x1 = x.log2FoldChange y1 = -np.log10(x.padj) x2 = self.r2.df.loc[name].log2FoldChange y2 = -np.log10(self.r2.df.loc[name].padj) pylab.plot( [x1,x2], [y1,y2], ls="-", color='r') pylab.axhline(1.33, alpha=0.5, ls="--", color="r") pylab.xlabel("log2 fold Change") pylab.ylabel("log10 adjusted p-values") pylab.legend() pylab.grid(True) return Aonly, Bonly, Acommon, Bcommon
def plot(self, clf=True): if clf: pylab.clf() M = self.df_shustring.shustring_length.max() print(M) M = int(M / 1000) + 1 for i in range(M): pylab.axhline(i * 1000, ls='--', color='grey') pylab.plot(self.df_shustring.shustring_length) pylab.xlabel('position (bp)') pylab.ylabel('Length of repeats') pylab.ylim(bottom=0)
def plot(self, X=[0, 0.1, 0.2, 0.3, .4, .5, .6, .7, .8, .9, .95, .99, .999, 1], fontsize=16, label=None): """plot percentage of genes covered (y axis) as a function of percentage of genes covered at least by X percent (x-axis). """ icol = self.coverage_column N = float(len(self.df)) X = np.array(X) Y = np.array([sum(self.df[icol] > x) / N * 100 for x in X]) if label is None: pylab.plot(X * 100, Y, "o-") else: pylab.plot(X * 100, Y, "o-", label=label) pylab.xlabel("Gene coverage (%)", fontsize=fontsize) pylab.ylabel("Percentage of genes covered", fontsize=fontsize) for this in [25, 50, 75]: pylab.axhline(this, color="r", alpha=0.5, ls="--") pylab.axvline(this, color="r", alpha=0.5, ls="--")
def plot_percentage_null_read_counts(self): """ Bars represent the percentage of null counts in each samples. The dashed horizontal line represents the percentage of feature counts being equal to zero across all samples. .. plot:: :include-source: from sequana.rnadiff import RNADiffResults from sequana import sequana_data r = RNADiffResults(sequana_data("rnadiff/rnadiff_onecond_1")) r.plot_percentage_null_read_counts() """ N = len(self.sample_names) data = (self.df[self.sample_names]==0).sum() data = data / len(self.df) * 100 all_null = (self.df[self.sample_names].sum(axis=1) == 0).sum() colors = [] for sample in self.sample_names: colors.append(self.colors[self.get_cond_from_sample(sample)]) pylab.clf() pylab.bar(range(N), data, color=colors, alpha=1, zorder=10, lw=1, ec="k", width=0.9) pylab.axhline(all_null / len(self.df) * 100, lw=2, ls="--", color="k", zorder=20) pylab.xticks(range(N), self.sample_names) pylab.xlabel("Sample") pylab.ylabel("Proportion of null counts (%)") pylab.grid(True, zorder=0)
def plot_percentage_null_read_counts(self): """Bars represent the percentage of null counts in each samples. The dashed horizontal line represents the percentage of feature counts being equal to zero across all samples .. plot:: :include-source: from sequana.rnadiff import RNADiffResults from sequana import sequana_data r = RNADiffResults(sequana_data("rnadiff/rnadiff_onecond_1")) r.plot_percentage_null_read_counts() """ pylab.clf() # how many null counts ? df = (self.counts_raw == 0).sum() / self.counts_raw.shape[0] * 100 df = df.rename("percent_null") df = pd.concat([self.design_df, df], axis=1) pylab.bar(df.index, df.percent_null, color=df.group_color, ec="k", lw=1, zorder=10) all_null = (self.counts_raw == 0).all(axis=1).sum() / self.counts_raw.shape[0] pylab.axhline(all_null, ls="--", color="black", alpha=0.5) pylab.xticks(rotation=45, ha="right") pylab.ylabel("Proportion of null counts (%)") pylab.grid(True, zorder=0) pylab.tight_layout()
def plot_RSC(self): self.df.RSC.plot(kind='bar') pylab.axhline(0.8, lw=2, color='r', ls='--')
def plot_common_major_counts(self, mode, labels=None, switch_up_down_cond2=False, add_venn=True, xmax=None, title="", fontsize=12, sortby="log2FoldChange"): """ :param mode: down, up or all .. plot:: :include-source: from sequana import sequana_data from sequana.compare import RNADiffCompare c = RNADiffCompare( sequana_data("rnadiff/rnadiff_onecond_1"), sequana_data("rnadiff/rnadiff_onecond_2")) c.plot_common_major_counts("down") """ #cond1, cond2 = self._get_cond1_cond2() if labels is None: labels = ['r1', 'r2'] if mode in ["down"]: # Negative values ! gl1 = set(self.r1.gene_lists['down']) gl2 = set(self.r2.gene_lists['down']) A = self.r1.df.loc[gl1].sort_values(by=sortby) B = self.r2.df.loc[gl1].sort_values(by=sortby) else: gl1 = set(self.r1.gene_lists[mode]) gl2 = set(self.r2.gene_lists[mode]) A = self.r1.df.loc[gl1].sort_values(by=sortby, ascending=False) B = self.r2.df.loc[gl1].sort_values(by=sortby, ascending=False) # sometimes, up and down may be inverted as compared to the other # conditions N = [] for i in range(1,max(len(A), len(B))): a = A.iloc[0:i].index b = B.iloc[0:i].index n = len(set(b).intersection(set(a))) N.append(n / i*100) max_common = len(set(A.index).intersection(set(B.index))) pylab.clf() if len(A) > len(B): pylab.axhline(max_common/len(A)*100, color="r", ls='--', label="min set intersection") pylab.axvline(len(B), ls="--", color="k", label="rank of minor set") else: pylab.axhline(max_common/len(B)*100, color='r', ls='--', label="min set intersect") pylab.axvline(len(A), ls="--", color="k", label="rank of minor set") pylab.plot(N) pylab.xlabel('rank', fontsize=fontsize) pylab.ylabel('% common features', fontsize=fontsize) pylab.grid(True) pylab.ylim([0,100]) if xmax: pylab.xlim([0, xmax]) else: pylab.xlim([0, max(len(A),len(B))]) pylab.title(title, fontsize=fontsize) ax = pylab.gca() ax2 = ax.twinx() ax2.plot(A[sortby].values, "orange", label=sortby) ax2.set_ylabel(sortby) pylab.legend(loc="lower left") ax.legend(loc="lower right") if add_venn: f = pylab.gcf() ax = f.add_axes([0.5,0.5,0.35,0.35], facecolor="grey") if mode=="down": self.plot_venn_down(ax=ax, title=None, labels=labels, mode="two_only") elif mode=="up": self.plot_venn_up(ax=ax, title=None, labels=labels, mode="two_only") elif mode=="all": self.plot_venn_all(ax=ax, title=None, labels=labels, mode="two_only")