def plot(self, color_line='r', bgcolor='grey', color='yellow', lw=4, hold=False, ax=None): xmax = self.xmax + 1 if ax: pylab.sca(ax) pylab.fill_between([0, xmax], [0, 0], [20, 20], color='red', alpha=0.3) pylab.fill_between([0, xmax], [20, 20], [30, 30], color='orange', alpha=0.3) pylab.fill_between([0, xmax], [30, 30], [41, 41], color='green', alpha=0.3) if self.X is None: X = range(1, self.xmax + 1) pylab.fill_between(X, self.df.mean() + self.df.std(), self.df.mean() - self.df.std(), color=color, interpolate=False) pylab.plot(X, self.df.mean(), color=color_line, lw=lw) pylab.ylim([0, 41]) pylab.xlim([0, self.xmax + 1]) pylab.title("Quality scores across all bases") pylab.xlabel("Position in read (bp)") pylab.ylabel("Quality") pylab.grid(axis='x')
def boxplot_quality(self, color_line='r', bgcolor='grey', color='yellow', lw=4, hold=False, ax=None): quality = self.df[[str(x) for x in range(42)]] # not sure why we have phred score from 0 to 41 N = self.metadata['ReadNum'] proba = quality / N self.xmax = 150 xmax = self.xmax + 1 if ax: pylab.sca(ax) # pragma no cover pylab.fill_between([0,xmax], [0,0], [20,20], color='red', alpha=0.3) pylab.fill_between([0,xmax], [20,20], [30,30], color='orange', alpha=0.3) pylab.fill_between([0,xmax], [30,30], [41,41], color='green', alpha=0.3) X = [] Q = [] S = [] for pos in range(1, 151): qualities = [((int(k)+1)*v) for k,v in quality.loc[pos].items()] mean_quality = sum(qualities) / N X.append(pos) Q.append(mean_quality) proba = quality.loc[pos] / N std = pylab.sqrt(sum([(x-mean_quality)**2 * y for x, y in zip(range(42), proba)])) S.append(std) print(len(X)) print(len(Q)) print(len(S)) Q = np.array(Q) X = np.array(X) S = np.array(S) pylab.fill_between(X, Q+S, Q-S, color=color, interpolate=False) pylab.plot(X, Q, color=color_line, lw=lw) pylab.ylim([0, 41]) pylab.xlim([0, self.xmax+1]) pylab.title("Quality scores across all bases") pylab.xlabel("Position in read (bp)") pylab.ylabel("Quality") pylab.grid(axis='x')
def plot_sequence_quality(self, max_score=40, ax=None): ymax = max_score + 1 xmax = 0 for sample in self.fastqc_data.keys(): if "per_sequence_quality_scores" in self.fastqc_data[sample]: data = { self._avg_bp_from_range(d['base']): d['mean'] for d in self.fastqc_data[sample] ['per_base_sequence_quality'] } df = pd.Series(data) df.plot(color="k", alpha=0.5) if df.max() > ymax: ymax = df.max() if df.index.max() > xmax: xmax = df.index.max() if ax: pylab.sca(ax) pylab.fill_between([0, xmax], [0, 0], [20, 20], color='red', alpha=0.4) pylab.fill_between([0, xmax], [20, 20], [30, 30], color='orange', alpha=0.4) pylab.fill_between([0, xmax], [30, 30], [ymax, ymax], color='green', alpha=0.4) X = range(1, xmax + 1) pylab.ylim([0, ymax]) if xmax != 0: pylab.xlim([0, xmax]) pylab.title("Quality scores across all bases") pylab.xlabel("Position in read (bp)") pylab.ylabel("Phred Score", fontsize=12) pylab.grid(axis='x')