Ejemplo n.º 1
0
    def plot(self,
             color_line='r',
             bgcolor='grey',
             color='yellow',
             lw=4,
             hold=False,
             ax=None):

        xmax = self.xmax + 1
        if ax:
            pylab.sca(ax)
        pylab.fill_between([0, xmax], [0, 0], [20, 20], color='red', alpha=0.3)
        pylab.fill_between([0, xmax], [20, 20], [30, 30],
                           color='orange',
                           alpha=0.3)
        pylab.fill_between([0, xmax], [30, 30], [41, 41],
                           color='green',
                           alpha=0.3)

        if self.X is None:
            X = range(1, self.xmax + 1)

        pylab.fill_between(X,
                           self.df.mean() + self.df.std(),
                           self.df.mean() - self.df.std(),
                           color=color,
                           interpolate=False)

        pylab.plot(X, self.df.mean(), color=color_line, lw=lw)
        pylab.ylim([0, 41])
        pylab.xlim([0, self.xmax + 1])
        pylab.title("Quality scores across all bases")
        pylab.xlabel("Position in read (bp)")
        pylab.ylabel("Quality")
        pylab.grid(axis='x')
Ejemplo n.º 2
0
    def boxplot_quality(self, color_line='r', bgcolor='grey', color='yellow', lw=4, 
            hold=False, ax=None):


        quality = self.df[[str(x) for x in range(42)]]  # not sure why we have phred score from 0 to 41
        N = self.metadata['ReadNum']
        proba = quality / N

        self.xmax = 150
        xmax = self.xmax + 1
        if ax:
            pylab.sca(ax) # pragma no cover
        pylab.fill_between([0,xmax], [0,0], [20,20], color='red', alpha=0.3)
        pylab.fill_between([0,xmax], [20,20], [30,30], color='orange', alpha=0.3)
        pylab.fill_between([0,xmax], [30,30], [41,41], color='green', alpha=0.3)


        X = []
        Q = []
        S = []
        for pos in range(1, 151):
            qualities = [((int(k)+1)*v) for k,v in quality.loc[pos].items()]
            mean_quality = sum(qualities) / N
            X.append(pos)
            Q.append(mean_quality)
            proba = quality.loc[pos] / N

            std = pylab.sqrt(sum([(x-mean_quality)**2 * y for x, y in zip(range(42), proba)]))
            S.append(std)

        print(len(X))
        print(len(Q))
        print(len(S))

        Q = np.array(Q)
        X = np.array(X)
        S = np.array(S)
        pylab.fill_between(X, Q+S, Q-S, 
            color=color, interpolate=False)

        pylab.plot(X, Q, color=color_line, lw=lw)
        pylab.ylim([0, 41])
        pylab.xlim([0, self.xmax+1])
        pylab.title("Quality scores across all bases")
        pylab.xlabel("Position in read (bp)")
        pylab.ylabel("Quality")
        pylab.grid(axis='x')
Ejemplo n.º 3
0
    def plot_sequence_quality(self, max_score=40, ax=None):

        ymax = max_score + 1
        xmax = 0
        for sample in self.fastqc_data.keys():
            if "per_sequence_quality_scores" in self.fastqc_data[sample]:
                data = {
                    self._avg_bp_from_range(d['base']): d['mean']
                    for d in self.fastqc_data[sample]
                    ['per_base_sequence_quality']
                }
                df = pd.Series(data)
                df.plot(color="k", alpha=0.5)

                if df.max() > ymax:
                    ymax = df.max()
                if df.index.max() > xmax:
                    xmax = df.index.max()

        if ax:
            pylab.sca(ax)
        pylab.fill_between([0, xmax], [0, 0], [20, 20], color='red', alpha=0.4)
        pylab.fill_between([0, xmax], [20, 20], [30, 30],
                           color='orange',
                           alpha=0.4)
        pylab.fill_between([0, xmax], [30, 30], [ymax, ymax],
                           color='green',
                           alpha=0.4)

        X = range(1, xmax + 1)

        pylab.ylim([0, ymax])
        if xmax != 0:
            pylab.xlim([0, xmax])
        pylab.title("Quality scores across all bases")
        pylab.xlabel("Position in read (bp)")
        pylab.ylabel("Phred Score", fontsize=12)
        pylab.grid(axis='x')