コード例 #1
0
    def plot_percentage_null_read_counts(self):
        """


        Bars represent the percentage of null counts in each samples. 
        The dashed horizontal line represents the percentage of 
        feature counts being equal to zero across all samples.

        .. plot::
            :include-source:
    
            from sequana.rnadiff import RNADiffResults
            from sequana import sequana_data

            r = RNADiffResults(sequana_data("rnadiff/rnadiff_onecond_1"))
            r.plot_percentage_null_read_counts()


        """
        N = len(self.sample_names)

        data = (self.df[self.sample_names]==0).sum() 
        data = data / len(self.df) * 100

        all_null = (self.df[self.sample_names].sum(axis=1) == 0).sum()

        pylab.clf()
        pylab.bar(range(N), data)
        pylab.axhline(all_null / len(self.df) * 100, lw=2, ls="--", color="k")
        pylab.xticks(range(N), self.sample_names)
        pylab.xlabel("Sample")
コード例 #2
0
    def plot_count_per_sample(self, fontsize=12, sample_list=None):
        """"Number of mapped reads per sample. Each color for each replicate

        .. plot::
            :include-source:

            from sequana.rnadiff import RNADiffResults
            from sequana import sequana_data

            r = RNADiffResults(sequana_data("rnadiff/rnadiff_onecond_1"))
            r.plot_count_per_sample()
        """
        sample_names = self.sample_names
        N = len(sample_names)
        dd = self.df[sample_names].sum()
        pylab.clf()

        colors = []
        for sample in self.sample_names:
            colors.append(self.colors[self.get_cond_from_sample(sample)])

        pylab.bar(range(N), (dd/1000000).values, 
            color=colors, alpha=1, 
            zorder=10, lw=1, ec="k", width=0.9)
        pylab.xlabel("Samples", fontsize=fontsize)
        pylab.ylabel("Total read count (millions)", fontsize=fontsize)
        pylab.grid(True, zorder=0)
        pylab.title("Total read count per sample", fontsize=fontsize)
        pylab.xticks(range(N), self.sample_names)
コード例 #3
0
 def plot_corr(self):
     lengths = self.SIRV_data.SIRV.get_lengths_as_dict()
     spikes = self.spikes_found()
     spikes["lengths"] = [lengths[k] for k in spikes.index]
     corr = spikes.corr()
     pylab.imshow(corr)
     N = len(spikes.columns)
     pylab.xticks(range(N), spikes.columns, rotation=90)
     pylab.yticks(range(N), spikes.columns)
     pylab.clim(0, 1)
     pylab.colorbar()
コード例 #4
0
ファイル: isoseq.py プロジェクト: sequana/sequana
 def plot_corr(self):
     lengths = self.SIRV_data.SIRV.get_lengths_as_dict()
     spikes = self.spikes_found()
     spikes["lengths"] = [lengths[k] for k in spikes.index]
     corr = spikes.corr()
     pylab.imshow(corr)
     N = len(spikes.columns)
     pylab.xticks(range(N), spikes.columns, rotation=90)
     pylab.yticks(range(N), spikes.columns)
     pylab.clim(0,1)
     pylab.colorbar()
コード例 #5
0
ファイル: pacbio.py プロジェクト: sequana/sequana
 def boxplot_mapq_concordance(self):
     # method can only be bwa for now
     assert self.method == "bwa"
     data = self._get_data()
     df = pd.DataFrame(data, columns=["mapq", "length", "concordance"])
     pylab.clf()
     pylab.boxplot([df[df.mapq == i]['concordance'] for i in range(1,61)])
     pylab.xlabel("mapq")
     pylab.ylabel("concordance")
     pylab.grid()
     tt = [10,20,30,40,50,60]
     pylab.xticks(tt, tt)
コード例 #6
0
 def boxplot_mapq_concordance(self, method):
     # method can only be bwa for now
     assert method == "bwa"
     data = self._get_data(method)
     df = pd.DataFrame(data, columns=["mapq", "length", "concordance"])
     pylab.clf()
     pylab.boxplot([df[df.mapq == i]['concordance'] for i in range(1, 61)])
     pylab.xlabel("mapq")
     pylab.ylabel("concordance")
     pylab.grid()
     tt = [10, 20, 30, 40, 50, 60]
     pylab.xticks(tt, tt)
コード例 #7
0
ファイル: pacbio.py プロジェクト: wenliangz/sequana
    def hist_nb_passes(self,
                       bins=None,
                       alpha=0.5,
                       hold=False,
                       fontsize=12,
                       grid=True,
                       xlabel="Number of ZMW passes",
                       logy=True,
                       ylabel="#",
                       label="",
                       title="Number of ZMW passes"):
        """Plot histogram of number of reads per ZMW (number of passes)

        :param float alpha: transparency of the histograms
        :param bool hold:
        :param int fontsize:
        :param bool grid:
        :param str xlabel:
        :param str ylabel:
        :param bool logy: use log scale on the y axis (default to True)
        :param str label: label of the histogram (for the legend)
        :param str title:

        .. plot::
            :include-source:

            from sequana.pacbio import PacbioSubreads
            from sequana import sequana_data
            b = PacbioSubreads(sequana_data("test_pacbio_subreads.bam"))
            b.hist_nb_passes()
        """
        max_nb_pass = self.df.nb_passes.max()
        if bins is None:
            k = range(1, max_nb_pass + 1)

        # histogram nb passes
        if hold is False:
            pylab.clf()
        pylab.hist(self.df.nb_passes,
                   bins=bins,
                   alpha=alpha,
                   label=label,
                   log=logy,
                   width=1)
        if len(k) < 5:
            pylab.xticks(range(6), range(6))

        pylab.xlabel(xlabel, fontsize=fontsize)
        pylab.ylabel(ylabel, fontsize=fontsize)
        pylab.title(title, fontsize=fontsize)
        if grid is True:
            pylab.grid(True)
コード例 #8
0
    def hist_ZMW_subreads(self,
                          alpha=0.5,
                          hold=False,
                          fontsize=12,
                          grid=True,
                          xlabel="Number of ZMW passes",
                          logy=True,
                          ylabel="#",
                          label="",
                          title="Number of ZMW passes"):
        """Plot histogram of number of reads per ZMW (number of passes)

        :param float alpha: transparency of the histograms
        :param bool hold:
        :param int fontsize:
        :param bool grid:
        :param str xlabel:
        :param str ylabel:
        :param bool logy: use log scale on the y axis (default to True)
        :param str label: label of the histogram (for the legend)
        :param str title:

        .. plot::
            :include-source:

            from sequana.pacbio import BAMPacbio
            from sequana import sequana_data
            b = BAMPacbio(sequana_data("test_pacbio_subreads.bam"))
            b.hist_ZMW_subreads()
        """
        if self._nb_pass is None:
            self._get_ZMW_passes()

        max_nb_pass = max(self._nb_pass.keys())
        k = range(1, max_nb_pass + 1)
        val = [self._nb_pass[i] for i in k]

        # histogram nb passes
        if hold is False:
            pylab.clf()
        pylab.bar(k, val, alpha=alpha, label=label, log=logy)
        if len(k) < 5:
            pylab.xticks(range(6), range(6))

        pylab.xlabel(xlabel, fontsize=fontsize)
        pylab.ylabel(ylabel, fontsize=fontsize)
        pylab.title(title, fontsize=fontsize)
        if grid is True:
            pylab.grid(True)
コード例 #9
0
ファイル: rnadiff.py プロジェクト: sequana/sequana
    def plot_boxplot_normeddata(self,
                                fliersize=2,
                                linewidth=2,
                                rotation=0,
                                **kwargs):
        import seaborn as sbn

        ax = sbn.boxplot(
            data=self.counts_norm.clip(1),
            linewidth=linewidth,
            fliersize=fliersize,
            palette=self.design_df.group_color,
            **kwargs,
        )
        pos, labs = pylab.xticks()
        pylab.xticks(pos, labs, rotation=rotation)
        ax.set(yscale="log")
        self._format_plot(ylabel="Normalised count distribution")
        pylab.tight_layout()
コード例 #10
0
    def plot(self, interpolation='None', aspect='auto', cmap='hot', tight_layout=True,
        colorbar=True, fontsize_x=None, fontsize_y=None, rotation_x=90,
        xticks_on=True, yticks_on=True, **kargs):
        """wrapper around imshow to plot a dataframe

        :param interpolation: set to None
        :param aspect: set to 'auto'
        :param cmap: colormap to be used.
        :param tight_layout:
        :param colorbar: add a colobar (default to True)
        :param fontsize_x: fontsize on xlabels
        :param fontsize_y: fontsize on ylabels
        :param rotation_x: rotate labels on xaxis
        :param xticks_on: switch off the xticks and labels
        :param yticks_on: switch off the yticks and labels

        """

        data = self.df
        pylab.clf()
        pylab.imshow(data, interpolation=interpolation, aspect=aspect, cmap=cmap, **kargs)

        if fontsize_x == None:
            fontsize_x = 16 #FIXME use default values
        if fontsize_y == None:
            fontsize_y = 16 #FIXME use default values

        if yticks_on is True:
            pylab.yticks(range(0, len(data.index)), data.index, 
                fontsize=fontsize_y)
        else:
            pylab.yticks([])
        if xticks_on is True:
            pylab.xticks(range(0, len(data.columns[:])), data.columns, 
                fontsize=fontsize_x, rotation=rotation_x)
        else:
            pylab.xticks([])

        if colorbar is True:
            pylab.colorbar()

        if tight_layout:
            pylab.tight_layout()
コード例 #11
0
ファイル: rnadiff.py プロジェクト: sequana/sequana
    def plot_boxplot_rawdata(self,
                             fliersize=2,
                             linewidth=2,
                             rotation=0,
                             **kwargs):
        import seaborn as sbn

        ax = sbn.boxplot(
            data=self.counts_raw.clip(1),
            linewidth=linewidth,
            fliersize=fliersize,
            palette=self.design_df.group_color,
            **kwargs,
        )
        pos, labs = pylab.xticks()
        pylab.xticks(pos, labs, rotation=rotation)
        ax.set_ylabel("Counts (raw) in log10 scale")
        ax.set_yscale("log")
        self._format_plot(ylabel="Raw count distribution")
        pylab.tight_layout()
コード例 #12
0
ファイル: pacbio.py プロジェクト: sequana/sequana
    def hist_nb_passes(self, bins=None, alpha=0.5, hold=False, fontsize=12,
                          grid=True, xlabel="Number of ZMW passes", logy=True,
                          ylabel="#", label="", title="Number of ZMW passes"):
        """Plot histogram of number of reads per ZMW (number of passes)

        :param float alpha: transparency of the histograms
        :param bool hold:
        :param int fontsize:
        :param bool grid:
        :param str xlabel:
        :param str ylabel:
        :param bool logy: use log scale on the y axis (default to True)
        :param str label: label of the histogram (for the legend)
        :param str title:

        .. plot::
            :include-source:

            from sequana.pacbio import PacbioSubreads
            from sequana import sequana_data
            b = PacbioSubreads(sequana_data("test_pacbio_subreads.bam"))
            b.hist_nb_passes()
        """
        max_nb_pass = self.df.nb_passes.max()
        if bins is None:
            k = range(1, max_nb_pass+1)

        # histogram nb passes
        if hold is False:
            pylab.clf()
        pylab.hist(self.df.nb_passes, bins=bins, alpha=alpha,
                   label=label, log=logy, width=1)
        if len(k) < 5:
            pylab.xticks(range(6), range(6))

        pylab.xlabel(xlabel, fontsize=fontsize)
        pylab.ylabel(ylabel, fontsize=fontsize)
        pylab.title(title, fontsize=fontsize)
        if grid is True:
            pylab.grid(True)
コード例 #13
0
    def barplot(self, filename="lane{}_status.png", lanes=None):
        df = self.get_data_reads()
        if lanes is None:
            lanes = df.lane.unique()

        for lane in lanes:
            pylab.clf()
            query = "lane==@lane and name!='Undetermined'"
            counts = df.query(query)['count']
            total = counts.sum()
            L = len(counts)

            query = "lane==@lane and name=='Undetermined'"
            under = df.query(query)['count'].sum()
            if total > 0:
                pylab.bar(range(L), counts, color="b", label="reads")

            if total == 0:
                color = "red"
            else:
                if 100 * under / total < 20:
                    color = "green"
                elif 100 * under / total < 50:
                    color = "orange"
                else:
                    color = "red"

            pylab.bar(range(L, L + 1),
                      under,
                      color=color,
                      label="undetermined")
            pylab.xticks([])
            pylab.ylabel("Number of reads")
            try:
                pylab.legend(loc="lower left")
            except:
                pass
            pylab.title("Lane {}".format(lane))
            pylab.savefig(filename.format(lane), dpi=200)
コード例 #14
0
    def plot_percentage_null_read_counts(self):
        """

        Bars represent the percentage of null counts in each samples. 
        The dashed horizontal line represents the percentage of 
        feature counts being equal to zero across all samples.

        .. plot::
            :include-source:
    
            from sequana.rnadiff import RNADiffResults
            from sequana import sequana_data

            r = RNADiffResults(sequana_data("rnadiff/rnadiff_onecond_1"))
            r.plot_percentage_null_read_counts()


        """
        N = len(self.sample_names)

        data = (self.df[self.sample_names]==0).sum() 
        data = data / len(self.df) * 100

        all_null = (self.df[self.sample_names].sum(axis=1) == 0).sum()

        colors = []
        for sample in self.sample_names:
            colors.append(self.colors[self.get_cond_from_sample(sample)])

        pylab.clf()
        pylab.bar(range(N), data, 
            color=colors, alpha=1, 
            zorder=10, lw=1, ec="k", width=0.9)
        pylab.axhline(all_null / len(self.df) * 100, lw=2, ls="--", color="k",
            zorder=20)
        pylab.xticks(range(N), self.sample_names)
        pylab.xlabel("Sample")
        pylab.ylabel("Proportion of null counts (%)")
        pylab.grid(True, zorder=0)
コード例 #15
0
ファイル: rnadiff.py プロジェクト: sequana/sequana
    def plot_count_per_sample(self, fontsize=12, rotation=45):
        """Number of mapped and annotated reads (i.e. counts) per sample. Each color
        for each replicate

        .. plot::
            :include-source:

            from sequana.rnadiff import RNADiffResults
            from sequana import sequana_data

            r = RNADiffResults(sequana_data("rnadiff/rnadiff_onecond_1"))
            r.plot_count_per_sample()

        """
        pylab.clf()
        df = self.counts_raw.sum().rename("total_counts")
        df = pd.concat([self.design_df, df], axis=1)

        pylab.bar(
            df.index,
            df.total_counts / 1000000,
            color=df.group_color,
            lw=1,
            zorder=10,
            ec="k",
            width=0.9,
        )

        pylab.xlabel("Samples", fontsize=fontsize)
        pylab.ylabel("reads (M)", fontsize=fontsize)
        pylab.grid(True, zorder=0)
        pylab.title("Total read count per sample", fontsize=fontsize)
        pylab.xticks(rotation=rotation, ha="right")
        # pylab.xticks(range(N), self.sample_names)
        try:
            pylab.tight_layout()
        except:
            pass
コード例 #16
0
ファイル: rnadiff.py プロジェクト: sequana/sequana
    def plot_percentage_null_read_counts(self):
        """Bars represent the percentage of null counts in each samples.  The dashed
        horizontal line represents the percentage of feature counts being equal
        to zero across all samples

        .. plot::
            :include-source:

            from sequana.rnadiff import RNADiffResults
            from sequana import sequana_data

            r = RNADiffResults(sequana_data("rnadiff/rnadiff_onecond_1"))
            r.plot_percentage_null_read_counts()

        """
        pylab.clf()
        # how many null counts ?
        df = (self.counts_raw == 0).sum() / self.counts_raw.shape[0] * 100
        df = df.rename("percent_null")
        df = pd.concat([self.design_df, df], axis=1)

        pylab.bar(df.index,
                  df.percent_null,
                  color=df.group_color,
                  ec="k",
                  lw=1,
                  zorder=10)

        all_null = (self.counts_raw
                    == 0).all(axis=1).sum() / self.counts_raw.shape[0]

        pylab.axhline(all_null, ls="--", color="black", alpha=0.5)

        pylab.xticks(rotation=45, ha="right")
        pylab.ylabel("Proportion of null counts (%)")
        pylab.grid(True, zorder=0)
        pylab.tight_layout()
コード例 #17
0
ファイル: rnadiff.py プロジェクト: sequana/sequana
 def _format_plot(self, title="", xlabel="", ylabel="", rotation=0):
     pylab.title(title)
     pylab.xticks(rotation=rotation, ha="right")
     pylab.xlabel(xlabel)
     pylab.ylabel(ylabel)
コード例 #18
0
ファイル: scatter.py プロジェクト: sequana/sequana
    def plot(self,
             kargs_scatter={
                 's': 20,
                 'c': 'b'
             },
             kargs_grids={},
             kargs_histx={},
             kargs_histy={},
             scatter_position='bottom left',
             width=.5,
             height=.5,
             offset_x=.10,
             offset_y=.10,
             gap=0.06,
             facecolor='lightgrey',
             grid=True,
             show_labels=True,
             **kargs):
        """Scatter plot of set of 2 vectors and their histograms.

        :param x: a dataframe or a numpy matrix (2 vectors) or a list of 2 items,
            which can be a mix of list or numpy array.
            if **size** and/or **color** are found in the columns dataframe,
            those columns will be used in the scatter plot. kargs_scatter keys **c**
            and **s** will then be ignored. If a list of lists, **x** will be the first row
            and **y** the second row.
        :param y: if x is a list or an array, then y must also be provided as
            a list or an array
        :param kargs_scatter: a dictionary with pairs of key/value accepted by
            matplotlib.scatter function. Examples is a list of colors or a list
            of sizes as shown in the examples below.
        :param kargs_grid: a dictionary with pairs of key/value accepted by
            the maplotlib.grid (applied on histogram and axis at the same time)
        :param kargs_histx: a dictionary with pairs of key/value accepted by the
            matplotlib.histogram
        :param kargs_histy: a dictionary with pairs of key/value accepted by the
            matplotlib.histogram
        :param kargs: other optional parameters are **hold**, **facecolor**.
        :param scatter_position: can be 'bottom right/bottom left/top left/top right'
        :param width: width of the scatter plot (value between 0 and 1)
        :param height: height of the scatter plot (value between 0 and 1)
        :param offset_x:
        :param offset_y:
        :param gap: gap between the scatter and histogram plots.
        :param grid: defaults to True

        :return: the scatter, histogram1 and histogram2 axes.

        .. plot::
            :include-source:
            :width: 80%

            import pylab
            import pandas as pd
            X = pylab.randn(1000)
            Y = pylab.randn(1000)
            df = pd.DataFrame({'X':X, 'Y':Y})

            from sequana.viz import ScatterHist
            ScatterHist(df).plot()


        .. plot::
            :include-source:
            :width: 80%

            from sequana.viz import ScatterHist
            ScatterHist(x=[1,2,3,4], y=[3,5,6,4]).plot(
                kargs_scatter={
                    's':[200,400,600,800],
                    'c': ['red', 'green', 'blue', 'yellow'],
                    'alpha':0.5},
                kargs_histx={'color': 'red'},
                kargs_histy={'color': 'green'})


        .. seealso:: `notebook <http://nbviewer.ipython.org/github/sequana/sequana/blob/master/notebooks/viz/scatter.ipynb>`__
        """
        df = self.df
        try:
            kargs_scatter['s'] = df['size']
        except:
            pass
        try:
            kargs_scatter['c'] = df['color']
        except:
            pass

        if kargs.get("hold", False) is False:
            pylab.clf()

        W = width
        H = height
        if scatter_position == 'bottom left':
            X0 = offset_x
            Y0 = offset_y
            Xoff = X0 + W + gap
            Yoff = Y0 + H + gap
            Wh = 1 - offset_x * 2 - W - gap
            Hh = 1 - offset_y * 2 - H - gap
        elif scatter_position == 'bottom right':
            Wh = 1 - offset_x * 2 - W - gap
            Hh = 1 - offset_y * 2 - H - gap
            X0 = offset_x + Wh + gap
            Y0 = offset_y
            Xoff = offset_x
            Yoff = Y0 + H + gap
        elif scatter_position == 'top right':
            Wh = 1 - offset_x * 2 - W - gap
            Hh = 1 - offset_y * 2 - H - gap
            X0 = offset_x + Wh + gap
            Y0 = offset_y + Hh + gap
            Xoff = offset_x
            Yoff = offset_y
        elif scatter_position == 'top left':
            Wh = 1 - offset_x * 2 - W - gap
            Hh = 1 - offset_y * 2 - H - gap
            X0 = offset_x
            Y0 = offset_y + Hh + gap
            Xoff = offset_x + W + gap
            Yoff = offset_y  #Y0 #+ H + gap
        else:  #pragma: no cover
            raise ValueError(
                "scatter_position must be 'top left', 'top right', 'bottom left', 'bottom right'"
            )

        facecolor = kargs.get('facecolor', 'lightgrey')

        ax_scatter = pylab.axes(
            (X0, Y0, W, H),
            facecolor=facecolor,
            xscale='linear',
            yscale='linear')  #, xticks='auto', yticks='auto')

        if show_labels:
            ax_scatter.set_xlabel(self.xy_names[0])
            ax_scatter.set_ylabel(self.xy_names[1])
        ax_hist_x = pylab.axes(
            (X0, Yoff, W, Hh),
            facecolor=facecolor,
            xscale='linear',
            yscale='linear')  #, xticks='auto', yticks='auto')
        ax_hist_y = pylab.axes(
            (Xoff, Y0, Wh, H),
            facecolor=facecolor,
            xscale='linear',
            yscale='linear')  #, xticks='auto', yticks='auto')

        # move ticks on axis  if needed
        ax_hist_x.xaxis.set_ticks_position('top')
        if scatter_position == 'bottom left':
            ax_scatter.yaxis.set_ticks_position('left')
            ax_hist_x.yaxis.set_ticks_position('right')
        elif scatter_position == 'bottom right':
            ax_hist_y.yaxis.set_ticks_position('left')
        elif scatter_position == 'top right':
            ax_scatter.xaxis.set_ticks_position('top')
            ax_scatter.yaxis.set_ticks_position('right')
            ax_hist_y.yaxis.set_ticks_position('left')
            ax_hist_x.xaxis.set_ticks_position('bottom')
        elif scatter_position == 'top left':
            ax_scatter.xaxis.set_ticks_position('top')
            ax_hist_y.yaxis.set_ticks_position('right')
            ax_hist_x.xaxis.set_ticks_position('bottom')
        else:  #pragma: no cover
            raise ValueError(
                "scatter_position must be 'top left', 'top right', 'bottom left', 'bottom right'"
            )

        ax_scatter.scatter(df.x, df.y, **kargs_scatter)
        ax_hist_x.hist(df.x, **kargs_histx)
        # fixme: user may not want that ?
        kargs_histy['orientation'] = 'horizontal'
        ax_hist_y.hist(df.y, **kargs_histy)
        # I tried c.set_xticks but rotation could not be found
        pylab.xticks(ax_hist_y.get_xticks(), rotation=90)

        # grid
        if grid is True:
            ax_scatter.grid(b=grid, which='major', axis='both', **kargs_grids)
            ax_hist_x.grid(b=grid, which='major', axis='both', **kargs_grids)
            ax_hist_y.grid(b=grid, which='major', axis='both', **kargs_grids)

        return (ax_scatter, ax_hist_x, ax_hist_y)