Example #1
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted histogram view of a channel"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if not self.channel:
            raise util.CytoflowViewError("Must specify a channel")
        
        if self.channel not in experiment.data:
            raise util.CytoflowViewError("Channel {0} not in the experiment"
                                    .format(self.channel))
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment"
                                    .format(self.xfacet))
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment"
                                    .format(self.yfacet))
        
        if self.huefacet and self.huefacet not in experiment.conditions:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment"
                                    .format(self.huefacet))

        if self.subset:
            try:
                data = experiment.query(self.subset).data.reset_index()
            except:
                raise util.CytoflowViewError("Subset string '{0}' isn't valid"
                                        .format(self.subset))
                
            if len(experiment.data) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data
        
        # get the scale
        scale = util.scale_factory(self.scale, experiment, self.channel)
        scaled_data = scale(data[self.channel])
        
        #print scaled_data
        
        kwargs.setdefault('histtype', 'stepfilled')
        kwargs.setdefault('alpha', 0.5)
        kwargs.setdefault('antialiased', True)

        # estimate a "good" number of bins; see cytoflow.utility.num_hist_bins
        # for a reference.
        
        num_bins = util.num_hist_bins(scaled_data)
        
        # clip num_bins to (50, 1000)
        num_bins = max(min(num_bins, 1000), 50)
        
        xmin = bottleneck.nanmin(scaled_data)
        xmax = bottleneck.nanmax(scaled_data)
                    
        if (self.huefacet 
            and "bins" in experiment.metadata[self.huefacet]
            and experiment.metadata[self.huefacet]["bin_scale"] == self.scale):
            # if we color facet by the result of a BinningOp and we don't
            # match the BinningOp bins with the histogram bins, we get
            # gnarly aliasing.
            
            # each color gets at least one bin.  however, if the estimated
            # number of bins for the histogram is much larger than the
            # number of colors, sub-divide each color into multiple bins.
            bins = experiment.metadata[self.huefacet]["bins"]
            bins = np.append(bins, xmax)
            
            num_hues = len(data[self.huefacet].unique())
            bins_per_hue = math.ceil(num_bins / num_hues)
            
            new_bins = [xmin]
            for end in [b for b in bins if (b > xmin and b <= xmax)]:
                new_bins = np.append(new_bins,
                                     np.linspace(new_bins[-1],
                                                 end,
                                                 bins_per_hue + 1,
                                                 endpoint = True)[1:])

            bins = scale.inverse(new_bins)
        else:
            bin_width = (xmax - xmin) / num_bins
            bins = scale.inverse(np.arange(xmin, xmax, bin_width))
            bins = np.append(bins, scale.inverse(xmax))
            
        # take care of a rare rounding error, where the last observation is
        # a liiiitle bit more than the last bin, which makes plt.hist() puke
        bins[-1] += 1
                    
        kwargs.setdefault('bins', bins) 
        
        # mask out the data that's not in the scale domain
        data = data[~np.isnan(scaled_data)]

        g = sns.FacetGrid(data, 
                          size = 6,
                          aspect = 1.5,
                          col = (self.xfacet if self.xfacet else None),
                          row = (self.yfacet if self.yfacet else None),
                          hue = (self.huefacet if self.huefacet else None),
                          col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                          row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                          hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                          legend_out = False,
                          sharex = False,
                          sharey = False)
        
        # set the scale for each set of axes; can't just call plt.xscale() 
        for ax in g.axes.flatten():
            ax.set_xscale(self.scale, **scale.mpl_params)  
                  
        g.map(plt.hist, self.channel, **kwargs)
        
        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.
        
        if self.huefacet:
            current_palette = mpl.rcParams['axes.color_cycle']
            if len(g.hue_names) > len(current_palette):
                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(sns.color_palette("husl", 
                                                                   n_colors = len(g.hue_names)))
                cax, _ = mpl.colorbar.make_axes(plt.gca())
                norm = mpl.colors.Normalize(vmin = np.min(g.hue_names), 
                                            vmax = np.max(g.hue_names), 
                                            clip = False)
                mpl.colorbar.ColorbarBase(cax, 
                                          cmap = cmap, 
                                          norm = norm, 
                                          label = self.huefacet)
                plt.sca(plot_ax)
            else:
                g.add_legend(title = self.huefacet)
Example #2
0
    def estimate(self, experiment, subset = None):
        """
        Estimate the Gaussian mixture model parameters
        """
        
        if not experiment:
            raise util.CytoflowOpError("No experiment specified")

        if self.xchannel not in experiment.data:
            raise util.CytoflowOpError("Column {0} not found in the experiment"
                                  .format(self.xchannel))
            
        if self.ychannel not in experiment.data:
            raise util.CytoflowOpError("Column {0} not found in the experiment"
                                  .format(self.ychannel))
       
        for b in self.by:
            if b not in experiment.data:
                raise util.CytoflowOpError("Aggregation metadata {0} not found"
                                      " in the experiment"
                                      .format(b))
            if len(experiment.data[b].unique()) > 100: #WARNING - magic number
                raise util.CytoflowOpError("More than 100 unique values found for"
                                      " aggregation metadata {0}.  Did you"
                                      " accidentally specify a data channel?"
                                      .format(b))
                
        if self.by:
            groupby = experiment.data.groupby(self.by)
        else:
            # use a lambda expression to return a group that contains
            # all the events
            groupby = experiment.data.groupby(lambda x: True)
            
        # get the scale. estimate the scale params for the ENTIRE data set,
        # not subsets we get from groupby().  And we need to save it so that
        # the data is transformed the same way when we apply()
        self._xscale = util.scale_factory(self.xscale, experiment, self.xchannel)
        self._yscale = util.scale_factory(self.yscale, experiment, self.ychannel)
            
        for group, data_subset in groupby:
            x = data_subset.loc[:, [self.xchannel, self.ychannel]]
            x[self.xchannel] = self._xscale(x[self.xchannel])
            x[self.ychannel] = self._yscale(x[self.ychannel])
            
            # drop data that isn't in the scale range
            x = x[~(np.isnan(x[self.xchannel]) | np.isnan(x[self.ychannel]))]
            x = x.values
            
            gmm = mixture.GMM(n_components = self.num_components,
                              covariance_type = "full",
                              random_state = 1)
            gmm.fit(x)
            
            if not gmm.converged_:
                raise util.CytoflowOpError("Estimator didn't converge"
                                      " for group {0}"
                                      .format(group))
                
            # in the 1D version, we sort the components by the means -- so
            # the first component has the lowest mean, the second component
            # has the next-lowest mean, etc.  that doesn't work in a 2D area,
            # obviously.
            
            # instead, we assume that the clusters are likely (?) to be
            # arranged along *one* of the axes, so we take the |norm| of the
            # x,y mean of each cluster and sort that way.
            
            norms = (gmm.means_[:, 0] ** 2 + gmm.means_[:, 1] ** 2) ** 0.5
            sort_idx = np.argsort(norms)
            gmm.means_ = gmm.means_[sort_idx]
            gmm.weights_ = gmm.weights_[sort_idx]
            gmm.covars_ = gmm.covars_[sort_idx]
            
            self._gmms[group] = gmm
Example #3
0
    def plot(self, experiment, **kwargs):
        """Plot a bar chart"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if not self.variable:
            raise util.CytoflowViewError("variable not set")
            
        if self.variable not in experiment.conditions:
            raise util.CytoflowViewError("variable {0} not in the experiment"
                                    .format(self.variable))
        
        if not (experiment.conditions[self.variable] == "float" or
                experiment.conditions[self.variable] == "int"):
            raise util.CytoflowViewError("variable {0} isn't numeric"
                                    .format(self.variable)) 
            
        if not self.xchannel:
            raise util.CytoflowViewError("X channel isn't set.")
        
        if self.xchannel not in experiment.data:
            raise util.CytoflowViewError("X channel {0} isn't in the experiment"
                                    .format(self.xchannel))
        
        if not self.xfunction:
            raise util.CytoflowViewError("X summary function isn't set")
                
        if not self.ychannel:
            raise util.CytoflowViewError("Y channel isn't set.")
        
        if self.ychannel not in experiment.data:
            raise util.CytoflowViewError("Y channel {0} isn't in the experiment"
                                    .format(self.ychannel))
        
        if not self.yfunction:
            raise util.CytoflowViewError("Y summary function isn't set")
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment")
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment")
        
        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment")        

        if self.x_error_bars and self.x_error_bars != 'data' \
                             and self.x_error_bars not in experiment.conditions:
            raise util.CytoflowViewError("x_error_bars must be either 'data' or "
                                         "a condition in the experiment") 
            
        if self.x_error_bars and not self.x_error_function:
            raise util.CytoflowViewError("didn't set an x error function")

        if self.y_error_bars and self.y_error_bars != 'data' \
                             and self.y_error_bars not in experiment.conditions:
            raise util.CytoflowViewError("y_error_bars must be either 'data' or "
                                         "a condition in the experiment") 
            
        if self.y_error_bars and not self.y_error_function:
            raise util.CytoflowViewError("didn't set an error function")
                
        kwargs.setdefault('antialiased', True)
        
        if self.subset:
            try:
                data = experiment.query(self.subset).data.reset_index()
            except:
                raise util.CytoflowViewError("Subset string '{0}' isn't valid"
                                        .format(self.subset))
                
            if len(data.index) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data
            
        group_vars = [self.variable]
        if self.xfacet: group_vars.append(self.xfacet)
        if self.yfacet: group_vars.append(self.yfacet)
        if self.huefacet: group_vars.append(self.huefacet)
            
        g = data.groupby(by = group_vars)
        
        plot_data = pd.DataFrame(
                {self.xchannel : g[self.xchannel].aggregate(self.xfunction), 
                 self.ychannel : g[self.ychannel].aggregate(self.yfunction)}) \
                      .reset_index()
                      
        # compute the x error statistic
        if self.x_error_bars:
            if self.x_error_bars == 'data':
                # compute the error statistic on the same subsets as the summary
                # statistic
                error_stat = g[self.xchannel].aggregate(self.x_error_function).reset_index()
            else:
                # subdivide the data set further by the error_bars condition
                err_vars = list(group_vars)
                err_vars.append(self.x_error_bars)
                
                # apply the summary statistic to each subgroup
                data_g = data.groupby(by = err_vars)
                data_stat = data_g[self.xchannel].aggregate(self.xfunction).reset_index()
                
                # apply the error function to the summary statistics
                err_g = data_stat.groupby(by = group_vars)
                error_stat = err_g[self.xchannel].aggregate(self.x_error_function).reset_index()
        
            x_err_name = util.random_string(6)
            plot_data[x_err_name] = error_stat[self.xchannel]
            
        # compute the y error statistic
        if self.y_error_bars:
            if self.y_error_bars == 'data':
                # compute the error statistic on the same subsets as the summary
                # statistic
                error_stat = g[self.ychannel].aggregate(self.y_error_function).reset_index()
            else:
                # subdivide the data set further by the error_bars condition
                err_vars = list(group_vars)
                err_vars.append(self.y_error_bars)
                
                # apply the summary statistic to each subgroup
                data_g = data.groupby(by = err_vars)
                data_stat = data_g[self.ychannel].aggregate(self.yfunction).reset_index()
                
                # apply the error function to the summary statistics
                err_g = data_stat.groupby(by = group_vars)
                error_stat = err_g[self.ychannel].aggregate(self.y_error_function).reset_index()
        
            y_err_name = util.random_string(6)
            plot_data[y_err_name] = error_stat[self.ychannel]
        
 
        grid = sns.FacetGrid(plot_data,
                             size = 6,
                             aspect = 1.5,
                             col = (self.xfacet if self.xfacet else None),
                             row = (self.yfacet if self.yfacet else None),
                             hue = (self.huefacet if self.huefacet else None),
                             col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                             row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                             hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                             legend_out = False,
                             sharex = False,
                             sharey = False)
        
        xscale = util.scale_factory(self.xscale, experiment, self.xchannel)
        yscale = util.scale_factory(self.yscale, experiment, self.ychannel)
        
        for ax in grid.axes.flatten():
            ax.set_xscale(self.xscale, **xscale.mpl_params)
            ax.set_yscale(self.yscale, **yscale.mpl_params)
        
        # plot the error bars first so the axis labels don't get overwritten
        if self.x_error_bars:
            grid.map(_x_error_bars, self.xchannel, x_err_name, self.ychannel)
            
        if self.y_error_bars:
            grid.map(_y_error_bars, self.xchannel, self.ychannel, y_err_name)

        grid.map(plt.plot, self.xchannel, self.ychannel, **kwargs)


        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.
        
        if self.huefacet:
            current_palette = mpl.rcParams['axes.color_cycle']
            if len(grid.hue_names) > len(current_palette):
                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(sns.color_palette("husl", 
                                                                   n_colors = len(grid.hue_names)))
                cax, _ = mpl.colorbar.make_axes(plt.gca())
                norm = mpl.colors.Normalize(vmin = np.min(grid.hue_names), 
                                            vmax = np.max(grid.hue_names), 
                                            clip = False)
                mpl.colorbar.ColorbarBase(cax, 
                                          cmap = cmap, 
                                          norm = norm,
                                          label = self.huefacet)
                plt.sca(plot_ax)
            else:
                grid.add_legend(title = self.huefacet)
Example #4
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted scatter plot view of a channel"""

        if not experiment:
            raise util.CytoflowViewError("No experiment specified")

        if not self.xchannel:
            raise util.CytoflowViewError("X channel not specified")

        if self.xchannel not in experiment.data:
            raise util.CytoflowViewError("X channel {0} not in the experiment".format(self.xchannel))

        if not self.ychannel:
            raise util.CytoflowViewError("Y channel not specified")

        if self.ychannel not in experiment.data:
            raise util.CytoflowViewError("Y channel {0} not in the experiment".format(self.ychannel))

        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment".format(self.xfacet))

        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment".format(self.yfacet))

        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment".format(self.huefacet))

        if self.subset:
            try:
                data = experiment.query(self.subset)
            except:
                raise util.CytoflowViewError("Subset string '{0}' isn't valid".format(self.subset))

            if len(data.index) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events".format(self.subset))
        else:
            data = experiment.data

        kwargs.setdefault("alpha", 0.25)
        kwargs.setdefault("s", 2)
        kwargs.setdefault("marker", "o")
        kwargs.setdefault("antialiased", True)

        g = sns.FacetGrid(
            data,
            size=6,
            aspect=1.5,
            col=(self.xfacet if self.xfacet else None),
            row=(self.yfacet if self.yfacet else None),
            hue=(self.huefacet if self.huefacet else None),
            col_order=(np.sort(data[self.xfacet].unique()) if self.xfacet else None),
            row_order=(np.sort(data[self.yfacet].unique()) if self.yfacet else None),
            hue_order=(np.sort(data[self.huefacet].unique()) if self.huefacet else None),
            legend_out=False,
            sharex=False,
            sharey=False,
        )

        xscale = util.scale_factory(self.xscale, experiment, self.xchannel)
        yscale = util.scale_factory(self.yscale, experiment, self.ychannel)

        for ax in g.axes.flatten():
            ax.set_xscale(self.xscale, **xscale.mpl_params)
            ax.set_yscale(self.yscale, **yscale.mpl_params)

        g.map(plt.scatter, self.xchannel, self.ychannel, **kwargs)

        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.

        if self.huefacet:
            current_palette = mpl.rcParams["axes.color_cycle"]
            if len(g.hue_names) > len(current_palette):
                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(sns.color_palette("husl", n_colors=len(g.hue_names)))
                cax, _ = mpl.colorbar.make_axes(plt.gca())
                norm = mpl.colors.Normalize(vmin=np.min(g.hue_names), vmax=np.max(g.hue_names), clip=False)
                mpl.colorbar.ColorbarBase(cax, cmap=cmap, norm=norm)
                plt.sca(plot_ax)
            else:
                g.add_legend()
Example #5
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted histogram view of a channel"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if not self.xchannel:
            raise util.CytoflowViewError("X channel not specified")
        
        if self.xchannel not in experiment.data:
            raise util.CytoflowViewError("X channel {0} not in the experiment"
                                    .format(self.xchannel))
            
        if not self.ychannel:
            raise util.CytoflowViewError("Y channel not specified")
        
        if self.ychannel not in experiment.data:
            raise util.CytoflowViewError("Y channel {0} not in the experiment")
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment")
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment")
        
        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment")

        if self.subset:
            try: 
                data = experiment.query(self.subset).data.reset_index()
            except:
                raise util.CytoflowViewError("Subset string \'{0}\' not valid")
                            
            if len(data.index) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data
            
        xscale = util.scale_factory(self.xscale, experiment, self.xchannel)
        yscale = util.scale_factory(self.yscale, experiment, self.ychannel)

        kwargs['xscale'] = xscale
        kwargs['yscale'] = yscale
        
        scaled_xdata = xscale(data[self.xchannel])
        data = data[~np.isnan(scaled_xdata)]
        scaled_xdata = scaled_xdata[~np.isnan(scaled_xdata)]

        scaled_ydata = yscale(data[self.ychannel])
        data = data[~np.isnan(scaled_ydata)]
        scaled_ydata = scaled_ydata[~np.isnan(scaled_ydata)]
        

        # find good bin counts
        num_xbins = util.num_hist_bins(scaled_xdata)
        num_ybins = util.num_hist_bins(scaled_ydata)
        
        # there are situations where this produces an unreasonable estimate.
        if num_xbins > self._max_bins:
            warnings.warn("Capping X bins to {}! To increase this limit, "
                          "change _max_bins"
                          .format(self._max_bins))
            num_xbins = self._max_bins
            
        if num_ybins > self._max_bins:
            warnings.warn("Capping Y bins to {}! To increase this limit, "
                          "change _max_bins"
                          .format(self._max_bins))
            num_ybins = self._max_bins
      
        kwargs.setdefault('smoothed', False)
        if kwargs['smoothed']:
            num_xbins /= 2
            num_ybins /= 2
                
        _, xedges, yedges = np.histogram2d(scaled_xdata, 
                                           scaled_ydata, 
                                           bins = (num_xbins, num_ybins))

        kwargs['xedges'] = xscale.inverse(xedges)
        kwargs['yedges'] = yscale.inverse(yedges)
        
        kwargs.setdefault('antialiased', True)

        g = sns.FacetGrid(data,
                          size = 6,
                          aspect = 1.5, 
                          col = (self.xfacet if self.xfacet else None),
                          row = (self.yfacet if self.yfacet else None),
                          hue = (self.huefacet if self.huefacet else None),
                          col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                          row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                          hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                          sharex = False,
                          sharey = False)
         
        for ax in g.axes.flatten():
            ax.set_xscale(self.xscale, **xscale.mpl_params)
            ax.set_yscale(self.yscale, **yscale.mpl_params)
        
        g.map(_hist2d, self.xchannel, self.ychannel, **kwargs)
        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.
        
        if self.huefacet:
            current_palette = mpl.rcParams['axes.color_cycle']
            if len(g.hue_names) > len(current_palette):
                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(sns.color_palette("husl", 
                                                                   n_colors = len(g.hue_names)))
                cax, _ = mpl.colorbar.make_axes(plt.gca())
                norm = mpl.colors.Normalize(vmin = np.min(g.hue_names), 
                                            vmax = np.max(g.hue_names), 
                                            clip = False)
                mpl.colorbar.ColorbarBase(cax, 
                                          cmap = cmap, 
                                          norm = norm,
                                          label = self.huefacet)
                plt.sca(plot_ax)
            else:
                g.add_legend(title = self.huefacet)
Example #6
0
    def apply(self, experiment):
        """Applies the binning to an experiment.
        
        Parameters
        ----------
        experiment : Experiment
            the old_experiment to which this op is applied
            
        Returns
        -------
            a new experiment, the same as old_experiment but with a new
            column the same as the operation name.  The bool is True if the
            event's measurement in self.channel is greater than self.low and
            less than self.high; it is False otherwise.
        """
        if not experiment:
            raise util.CytoflowOpError("no experiment specified")
        
        if not self.name:
            raise util.CytoflowOpError("name is not set")
        
        if self.name in experiment.data.columns:
            raise util.CytoflowOpError("name {0} is in the experiment already"
                                  .format(self.name))
            
        if self.bin_count_name and self.bin_count_name in experiment.data.columns:
            raise util.CytoflowOpError("bin_count_name {0} is in the experiment already"
                                  .format(self.bin_count_name))
        
        if not self.channel:
            raise util.CytoflowOpError("channel is not set")
        
        if self.channel not in experiment.data.columns:
            raise util.CytoflowOpError("channel {0} isn't in the experiment"
                                  .format(self.channel))
              
        if self.num_bins is Undefined and self.bin_width is Undefined:
            raise util.CytoflowOpError("must set either bin number or width")
        
        if self.num_bins is Undefined \
           and not (self.scale == "linear" or self.scale == "log"):
            raise util.CytoflowOpError("Can only use bin_width with linear or log scale") 
        
        scale = util.scale_factory(self.scale, experiment, self.channel)
        scaled_data = scale(experiment.data[self.channel])
            
        channel_min = bn.nanmin(scaled_data)
        channel_max = bn.nanmax(scaled_data)
        
        num_bins = self.num_bins if self.num_bins is not Undefined else \
                   (channel_max - channel_min) / self.bin_width

        bins = np.linspace(start = channel_min, stop = channel_max,
                           num = num_bins)
            
        # bins need to be internal; drop the first and last one
        bins = bins[1:-1]
            
        new_experiment = experiment.clone()
        new_experiment.add_condition(self.name,
                                     "int",
                                     np.digitize(scaled_data, bins))
        
        # if we're log-scaled (for example), don't label data that isn't
        # showable on a log scale!
        new_experiment.data.ix[np.isnan(scaled_data), self.name] = np.NaN
        
        # keep track of the bins we used, for pretty plotting later.
        new_experiment.metadata[self.name]["bin_scale"] = self.scale
        new_experiment.metadata[self.name]["bins"] = bins
        
        if self.bin_count_name:
            # TODO - this is a HUGE memory hog?!
            agg_count = new_experiment.data.groupby(self.name).count()
            agg_count = agg_count[agg_count.columns[0]]
            
            # have to make the condition a float64, because if we're in log
            # space there may be events that have NaN as the bin number.
            
            new_experiment.add_condition(
                self.bin_count_name,
                "float64",
                new_experiment[self.name].map(agg_count))
        
        new_experiment.history.append(self.clone_traits())
        return new_experiment
Example #7
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted 2d kernel density estimate"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")

        if not self.xchannel:
            raise util.CytoflowViewError("X channel not specified")
        
        if self.xchannel not in experiment.data:
            raise util.CytoflowViewError("X channel {0} not in the experiment"
                                    .format(self.xchannel))
            
        if not self.ychannel:
            raise util.CytoflowViewError("Y channel not specified")
        
        if self.ychannel not in experiment.data:
            raise util.CytoflowViewError("Y channel {0} not in the experiment"
                                         .format(self.ychannel))
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment"
                                         .format(self.xfacet))
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment"
                                         .format(self.yfacet))
        
        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment"
                                         .format(self.huefacet))
        
        if self.subset:
            try:
                data = experiment.query(self.subset).data.reset_index()
            except:
                raise util.CytoflowViewError("Subset string '{0}' isn't valid"
                                        .format(self.subset))
                            
            if len(data.index) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data
        
        kwargs.setdefault('shade', False)
        kwargs.setdefault('min_alpha', 0.2)
        kwargs.setdefault('max_alpha', 0.9)
        kwargs.setdefault('n_levels', 10)

        g = sns.FacetGrid(data, 
                          size = 6,
                          aspect = 1.5,
                          col = (self.xfacet if self.xfacet else None),
                          row = (self.yfacet if self.yfacet else None),
                          hue = (self.huefacet if self.huefacet else None),
                          col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                          row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                          hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                          legend_out = False,
                          sharex = False,
                          sharey = False)
        
        xscale = util.scale_factory(self.xscale, experiment, self.xchannel)
        yscale = util.scale_factory(self.yscale, experiment, self.ychannel)
        
        for ax in g.axes.flatten():
            ax.set_xscale(self.xscale, **xscale.mpl_params)
            ax.set_yscale(self.yscale, **yscale.mpl_params)
            
        kwargs['xscale'] = xscale
        kwargs['yscale'] = yscale

        g.map(_bivariate_kdeplot, self.xchannel, self.ychannel, **kwargs)
        
        if self.huefacet:
            g.add_legend(title = self.huefacet)
Example #8
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted histogram view of a channel"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if not self.channel:
            raise util.CytoflowViewError("Must specify a channel")
        
        if self.channel not in experiment.data:
            raise util.CytoflowViewError("Channel {0} not in the experiment"
                                    .format(self.channel))
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment"
                                    .format(self.xfacet))
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment"
                                    .format(self.yfacet))
        
        if self.huefacet and self.huefacet not in experiment.conditions:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment"
                                    .format(self.huefacet))

        if self.subset:
            try:
                data = experiment.query(self.subset).data.reset_index()
            except:
                raise util.CytoflowViewError("Subset string '{0}' isn't valid"
                                        .format(self.subset))
                
            if len(data.index) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data
            

        #print scaled_data
        
        kwargs.setdefault('shade', True)
        kwargs['label'] = self.name
        
        g = sns.FacetGrid(data, 
                          size = 6,
                          aspect = 1.5,
                          col = (self.xfacet if self.xfacet else None),
                          row = (self.yfacet if self.yfacet else None),
                          hue = (self.huefacet if self.huefacet else None),
                          col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                          row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                          hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                          legend_out = False,
                          sharex = False,
                          sharey = False)
        
        # get the scale     
        kwargs['scale'] = scale = util.scale_factory(self.scale, experiment, self.channel)
        
        # set the scale for each set of axes; can't just call plt.xscale() 
        for ax in g.axes.flatten():
            ax.set_xscale(self.scale, **scale.mpl_params)  
                  
        g.map(_univariate_kdeplot, self.channel, **kwargs)
        if self.huefacet:
            g.add_legend(title = self.huefacet)
Example #9
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted histogram view of a channel"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if not self.xchannel:
            raise util.CytoflowViewError("X channel not specified")
        
        if self.xchannel not in experiment.data:
            raise util.CytoflowViewError("X channel {0} not in the experiment"
                                    .format(self.xchannel))
            
        if not self.ychannel:
            raise util.CytoflowViewError("Y channel not specified")
        
        if self.ychannel not in experiment.data:
            raise util.CytoflowViewError("Y channel {0} not in the experiment")
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment")
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment")
        
        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment")

        if self.subset:
            try: 
                data = experiment.query(self.subset)
            except:
                raise util.CytoflowViewError("Subset string \'{0}\' not valid")
                            
            if len(data.index) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data
        
        #kwargs.setdefault('histtype', 'stepfilled')
        #kwargs.setdefault('alpha', 0.5)
        kwargs.setdefault('edgecolor', 'none')
        #kwargs.setdefault('mincnt', 1)
        #kwargs.setdefault('bins', 'log')
        kwargs.setdefault('antialiased', True)
            
        xmin, xmax = (np.amin(data[self.xchannel]), np.amax(data[self.xchannel]))
        ymin, ymax = (np.amin(data[self.ychannel]), np.amax(data[self.ychannel]))
        # to avoid issues with singular data, expand the min/max pairs
        xmin, xmax = mtrans.nonsingular(xmin, xmax, expander=0.1)
        ymin, ymax = mtrans.nonsingular(ymin, ymax, expander=0.1)
        
        extent = (xmin, xmax, ymin, ymax)
        kwargs.setdefault('extent', extent)
        
        xbins = util.num_hist_bins(experiment[self.xchannel])
        ybins = util.num_hist_bins(experiment[self.ychannel])
        bins = np.mean([xbins, ybins])
        
        kwargs.setdefault('bins', bins) # Do not move above.  don't ask.

        g = sns.FacetGrid(data,
                          size = 6,
                          aspect = 1.5, 
                          col = (self.xfacet if self.xfacet else None),
                          row = (self.yfacet if self.yfacet else None),
                          hue = (self.huefacet if self.huefacet else None),
                          col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                          row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                          hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                          sharex = False,
                          sharey = False)
        
        if(self.xscale != "linear" or self.yscale != "linear"):
            warnings.warn("hexbin is broken with scales other than \"linear\"",
                          util.CytoflowViewWarning)

        xscale = util.scale_factory(self.xscale, experiment, self.xchannel)
        yscale = util.scale_factory(self.yscale, experiment, self.ychannel)
        
        for ax in g.axes.flatten():
            ax.set_xscale(self.xscale, **xscale.mpl_params)
            ax.set_yscale(self.yscale, **yscale.mpl_params)
        
        g.map(plt.hexbin, self.xchannel, self.ychannel, **kwargs)
Example #10
0
    def plot(self, experiment, **kwargs):
        """Plot a bar chart"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if not self.by:
            raise util.CytoflowViewError("Independent variable 'by' not set")
            
        if self.by not in experiment.conditions:
            raise util.CytoflowViewError("Independent variable {0} not in the experiment"
                                    .format(self.by))
        
        if not (experiment.conditions[self.by] == "float" or
                experiment.conditions[self.by] == "int"):
            raise util.CytoflowViewError("by variable {0} isn't numeric"
                                    .format(self.by)) 
            
        if not self.xchannel:
            raise util.CytoflowViewError("X channel isn't set.")
        
        if self.xchannel not in experiment.data:
            raise util.CytoflowViewError("X channel {0} isn't in the experiment"
                                    .format(self.xchannel))
        
        if not self.xfunction:
            raise util.CytoflowViewError("X summary function isn't set")
                
        if not self.ychannel:
            raise util.CytoflowViewError("Y channel isn't set.")
        
        if self.ychannel not in experiment.data:
            raise util.CytoflowViewError("Y channel {0} isn't in the experiment"
                                    .format(self.ychannel))
        
        if not self.yfunction:
            raise util.CytoflowViewError("Y summary function isn't set")
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment")
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment")
        
        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment")        

        kwargs.setdefault('antialiased', True)
        
        if self.subset:
            try:
                data = experiment.query(self.subset)
            except:
                raise util.CytoflowViewError("Subset string '{0}' isn't valid"
                                        .format(self.subset))
                
            if len(data.index) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data
            
        group_vars = [self.by]
        if self.xfacet:
            group_vars.append(self.xfacet)
        if self.yfacet:
            group_vars.append(self.yfacet)
        if self.huefacet:
            group_vars.append(self.huefacet)
            
        g = data.groupby(by = group_vars)
        
        plot_data = pd.DataFrame(
                {self.xchannel : g[self.xchannel].aggregate(self.xfunction), 
                 self.ychannel : g[self.ychannel].aggregate(self.yfunction)}) \
                      .reset_index()
 
        grid = sns.FacetGrid(plot_data,
                             size = 6,
                             aspect = 1.5,
                             col = (self.xfacet if self.xfacet else None),
                             row = (self.yfacet if self.yfacet else None),
                             hue = (self.huefacet if self.huefacet else None),
                             col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                             row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                             hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                             legend_out = False,
                             sharex = False,
                             sharey = False)
        
        xscale = util.scale_factory(self.xscale, experiment, self.xchannel)
        yscale = util.scale_factory(self.yscale, experiment, self.ychannel)
        
        for ax in grid.axes.flatten():
            ax.set_xscale(self.xscale, **xscale.mpl_params)
            ax.set_yscale(self.yscale, **yscale.mpl_params)

        grid.map(plt.plot, self.xchannel, self.ychannel, **kwargs)

        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.
        
        if self.huefacet:
            current_palette = mpl.rcParams['axes.color_cycle']
            if len(grid.hue_names) > len(current_palette):
                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(sns.color_palette("husl", 
                                                                   n_colors = len(grid.hue_names)))
                cax, _ = mpl.colorbar.make_axes(plt.gca())
                norm = mpl.colors.Normalize(vmin = np.min(grid.hue_names), 
                                            vmax = np.max(grid.hue_names), 
                                            clip = False)
                mpl.colorbar.ColorbarBase(cax, cmap = cmap, norm = norm)
                plt.sca(plot_ax)
            else:
                grid.add_legend()
Example #11
0
    def estimate(self, experiment, subset = None):
        """
        Estimate the Gaussian mixture model parameters
        """
        
        if not experiment:
            raise util.CytoflowOpError("No experiment specified")

        if self.channel not in experiment.data:
            raise util.CytoflowOpError("Column {0} not found in the experiment"
                                  .format(self.channel))
       
        for b in self.by:
            if b not in experiment.data:
                raise util.CytoflowOpError("Aggregation metadata {0} not found"
                                      " in the experiment"
                                      .format(b))
            if len(experiment.data[b].unique()) > 100: #WARNING - magic number
                raise util.CytoflowOpError("More than 100 unique values found for"
                                      " aggregation metadata {0}.  Did you"
                                      " accidentally specify a data channel?"
                                      .format(b))
                
        if self.by:
            groupby = experiment.data.groupby(self.by)
        else:
            # use a lambda expression to return a group that contains
            # all the events
            groupby = experiment.data.groupby(lambda x: True)
            
        # get the scale. estimate the scale params for the ENTIRE data set,
        # not subsets we get from groupby().  And we need to save it so that
        # the data is transformed the same way when we apply()
        self._scale = util.scale_factory(self.scale, experiment, self.channel)
            
        for group, data_subset in groupby:
            x = data_subset[self.channel].reset_index(drop = True)
            x = self._scale(x)
            
            # drop data that isn't in the scale range
            #x = pd.Series(self._scale(x)).dropna()
            x = x[~np.isnan(x)]
            
            gmm = mixture.GMM(n_components = self.num_components,
                              random_state = 1)
            gmm.fit(x[:, np.newaxis])
            
            if not gmm.converged_:
                raise util.CytoflowOpError("Estimator didn't converge"
                                      " for group {0}"
                                      .format(group))
                
            # to make sure we have a stable ordering, sort the components
            # by the means (so the first component has the lowest mean, 
            # the next component has the next-lowest, etc.)
            
            sort_idx = np.argsort(gmm.means_[:, 0])
            gmm.means_ = gmm.means_[sort_idx]
            gmm.weights_ = gmm.weights_[sort_idx]
            gmm.covars_ = gmm.covars_[sort_idx]
           
            self._gmms[group] = gmm
Example #12
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted histogram view of a channel"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if not self.channel:
            raise util.CytoflowViewError("Must specify a channel")
        
        if self.channel not in experiment.data:
            raise util.CytoflowViewError("Channel {0} not in the experiment"
                                    .format(self.channel))
        
        if not self.variable:
            raise util.CytoflowViewError("Variable not specified")
        
        if not self.variable in experiment.conditions:
            raise util.CytoflowViewError("Variable {0} isn't in the experiment")
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment"
                                    .format(self.xfacet))
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment"
                                    .format(self.yfacet))
        
        if self.huefacet and self.huefacet not in experiment.conditions:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment"
                                    .format(self.huefacet))

        if self.subset:
            try:
                data = experiment.query(self.subset).data.reset_index()
            except:
                raise util.CytoflowViewError("Subset string '{0}' isn't valid"
                                        .format(self.subset))
                
            if len(data.index) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data.copy()
                    
        # get the scale
        scale = util.scale_factory(self.scale, experiment, self.channel)
        kwargs['data_scale'] = scale
        
        kwargs.setdefault('orient', 'v')
                
        g = sns.FacetGrid(data, 
                          size = 6,
                          aspect = 1.5,
                          col = (self.xfacet if self.xfacet else None),
                          row = (self.yfacet if self.yfacet else None),
                          col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                          row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                          legend_out = False,
                          sharex = False,
                          sharey = False)
                
        # set the scale for each set of axes; can't just call plt.xscale() 
        for ax in g.axes.flatten():
            if kwargs['orient'] == 'h':
                ax.set_xscale(self.scale, **scale.mpl_params)  
            else:
                ax.set_yscale(self.scale, **scale.mpl_params)  
            
        # this order-dependent thing weirds me out.      
        if kwargs['orient'] == 'h':
            violin_args = [self.channel, self.variable]
        else:
            violin_args = [self.variable, self.channel]
            
        if self.huefacet:
            violin_args.append(self.huefacet)
            
        g.map(_violinplot,   
              *violin_args,      
              order = np.sort(data[self.variable].unique()),
              hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
              **kwargs)
        
        if self.huefacet:
            g.add_legend(title = self.huefacet)
Example #13
0
    def plot(self, experiment, **kwargs):
        """Plot a bar chart"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")

        if not self.channel:
            raise util.CytoflowViewError("Channel not specified")
        
        if self.channel not in experiment.data:
            raise util.CytoflowViewError("Channel {0} isn't in the experiment"
                                    .format(self.channel))
        
        if not self.by:
            raise util.CytoflowViewError("Variable not specified")
        
        if not self.by in experiment.conditions:
            raise util.CytoflowViewError("Variable {0} isn't in the experiment")
        
        if not self.function:
            raise util.CytoflowViewError("Function not specified")
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} isn't in the experiment"
                                    .format(self.xfacet))
        
        if self.yfacet and self.yfacet not in experiment.metadata:
            raise util.CytoflowViewError("Y facet {0} isn't in the experiment"
                                    .format(self.yfacet))

        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError("Hue facet {0} isn't in the experiment"
                                    .format(self.huefacet))
        
#         if self.error_bars == 'data' and self.error_function is None:
#             return False
#         
#         if self.error_bars == 'summary' \
#             and (self.error_function is None 
#                  or not self.error_var in experiment.metadata):
#             return False
        
        if self.subset:
            try:
                data = experiment.query(self.subset)
            except:
                raise util.CytoflowViewError("Subset string {0} isn't valid"
                                        .format(self.subset))
                            
            if len(data.index) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data
            
        sns.factorplot(x = self.by,
                       y = self.channel,
                       data = data,
                       size = 6,
                       aspect = 1.5,
                       row = (self.yfacet if self.yfacet else None),
                       col = (self.xfacet if self.xfacet else None),
                       hue = (self.huefacet if self.huefacet else None),
                       col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                       row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                       hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                       # something buggy here.
                       #orient = ("h" if self.orientation == "horizontal" else "v"),
                       estimator = self.function,
                       ci = None,
                       kind = "bar")
        
        scale = util.scale_factory(self.scale, experiment, self.channel)
        
        # because the bottom of a bar chart is "0", masking out bad
        # values on a log scale doesn't work.  we must clip instead.
        if self.scale == "log":
            scale.mode = "clip"

        plt.yscale(self.scale, **scale.mpl_params)