Exemple #1
0
    def plot(self, experiment, **kwargs):
        """
        Plot the plots.
        
        Parameters
        ----------
        """

        if experiment is None:
            raise util.CytoflowViewError('experiment', "No experiment specified")
                
        view, trait_name = self._strip_trait(self.op.name)
        
        if self.xchannel in self.op._scale:
            xscale = self.op._scale[self.xchannel]
        else:
            xscale = util.scale_factory(self.xscale, experiment, channel = self.xchannel)

        if self.ychannel in self.op._scale:
            yscale = self.op._scale[self.ychannel]
        else:
            yscale = util.scale_factory(self.yscale, experiment, channel = self.ychannel)
    
        super(KMeans2DView, view).plot(experiment,
                                       annotation_facet = self.op.name,
                                       annotation_trait = trait_name,
                                       annotations = self.op._kmeans,
                                       xscale = xscale,
                                       yscale = yscale,
                                       **kwargs)
Exemple #2
0
    def plot(self, experiment, plot_name=None, **kwargs):
        """
        Parameters
        ----------
        xlim, ylim : (float, float)
            Set the range of the plot's axis.
            
        """
        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")

        data = self._make_data(experiment)

        xscale = util.scale_factory(self.xscale,
                                    experiment,
                                    statistic=self.xstatistic,
                                    error_statistic=self.x_error_statistic)

        yscale = util.scale_factory(self.yscale,
                                    experiment,
                                    statistic=self.ystatistic,
                                    error_statistic=self.y_error_statistic)

        super().plot(experiment,
                     data,
                     plot_name,
                     xscale=xscale,
                     yscale=yscale,
                     **kwargs)
Exemple #3
0
    def plot(self, experiment, **kwargs):
        """
        Plot the plots.
        
        Parameters
        ----------
        """

        view, trait_name = self._strip_trait(self.op.name)

        if self.xchannel in self.op._scale:
            xscale = self.op._scale[self.xchannel]
        else:
            xscale = util.scale_factory(self.xscale,
                                        experiment,
                                        channel=self.xchannel)

        if self.ychannel in self.op._scale:
            yscale = self.op._scale[self.ychannel]
        else:
            yscale = util.scale_factory(self.yscale,
                                        experiment,
                                        channel=self.ychannel)

        super(GaussianMixture2DView, view).plot(experiment,
                                                annotation_facet=self.op.name,
                                                annotation_trait=trait_name,
                                                annotations=self.op._gmms,
                                                xscale=xscale,
                                                yscale=yscale,
                                                **kwargs)
Exemple #4
0
    def plot(self, experiment, **kwargs):
        """
        Plot the plots.
        
        Parameters
        ----------
        """

        if experiment is None:
            raise util.CytoflowViewError('experiment', "No experiment specified")
        
        if self.op.num_components == 1:
            annotation_facet = self.op.name + "_1"
        else:
            annotation_facet = self.op.name
        
        view, trait_name = self._strip_trait(annotation_facet)
        
        if self.xchannel in self.op._scale:
            xscale = self.op._scale[self.xchannel]
        else:
            xscale = util.scale_factory(self.xscale, experiment, channel = self.xchannel)

        if self.ychannel in self.op._scale:
            yscale = self.op._scale[self.ychannel]
        else:
            yscale = util.scale_factory(self.yscale, experiment, channel = self.ychannel)
    
        super(GaussianMixture2DView, view).plot(experiment,
                                                annotation_facet = annotation_facet,
                                                annotation_trait = trait_name,
                                                annotations = self.op._gmms,
                                                xscale = xscale,
                                                yscale = yscale,
                                                **kwargs)
Exemple #5
0
 def plot(self, experiment, plot_name = None, **kwargs):       
     data = self._make_data(experiment)
     
     if not self.variable:
         raise util.CytoflowViewError('variable',
                                      "variable not set")
         
     if self.variable not in experiment.conditions:
         raise util.CytoflowViewError('variable',
                                      "variable {0} not in the experiment"
                                 .format(self.variable))
         
     if util.is_numeric(experiment[self.variable]):
         xscale = util.scale_factory(self.xscale, experiment, condition = self.variable)
     else:
         xscale = None 
     
     yscale = util.scale_factory(self.yscale, 
                                 experiment, 
                                 statistic = self.statistic, 
                                 error_statistic = self.error_statistic)
         
     super().plot(experiment, 
                  data, 
                  plot_name, 
                  xscale = xscale, 
                  yscale = yscale, 
                  **kwargs)
Exemple #6
0
    def plot(self, experiment, **kwargs):
        """
        Parameters
        ----------  
        xlim, ylim : (float, float)
            Set the range of the plot's axis.
        """

        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")

        if not self.xchannel:
            raise util.CytoflowViewError('xchannel',
                                         "Must specify an xchannel")

        if self.xchannel not in experiment.data:
            raise util.CytoflowViewError(
                'xchannel',
                "Channel {} not in the experiment".format(self.xchannel))

        if not self.ychannel:
            raise util.CytoflowViewError('ychannel', "Must specify a ychannel")

        if self.ychannel not in experiment.data:
            raise util.CytoflowViewError(
                'ychannel',
                "Channel {} not in the experiment".format(self.ychannel))

        # get the scale
        xscale = kwargs.pop('xscale', None)
        if xscale is None:
            xscale = util.scale_factory(self.xscale,
                                        experiment,
                                        channel=self.xchannel)

        yscale = kwargs.pop('yscale', None)
        if yscale is None:
            yscale = util.scale_factory(self.yscale,
                                        experiment,
                                        channel=self.ychannel)

        xlim = kwargs.pop('xlim', None)
        ylim = kwargs.pop('ylim', None)

        super().plot(experiment,
                     lim={
                         self.xchannel: xlim,
                         self.ychannel: ylim
                     },
                     scale={
                         self.xchannel: xscale,
                         self.ychannel: yscale
                     },
                     **kwargs)
Exemple #7
0
    def plot(self, experiment, **kwargs):
        """
        Parameters
        ----------
        lim : Dict(Str : (float, float))
            Set the range of each channel's axis.  If unspecified, assume
            that the limits are the minimum and maximum of the clipped data
        """

        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")

        if len(self.channels) == 0:
            raise util.CytoflowOpError('channels',
                                       "Must set at least one channel")

        if len(self.channels) != len(set(self.channels)):
            raise util.CytoflowOpError('channels',
                                       "Must not duplicate channels")

        for c in self.channels:
            if c not in experiment.data:
                raise util.CytoflowOpError(
                    'channels',
                    "Channel {0} not found in the experiment".format(c))

        for c in self.scale:
            if c not in self.channels:
                raise util.CytoflowOpError(
                    'scale', "Scale set for channel {0}, but it isn't "
                    "in 'channels'".format(c))

        # get the scale
        scale = {}
        for c in self.channels:
            if c in self.scale:
                scale[c] = util.scale_factory(self.scale[c],
                                              experiment,
                                              channel=c)
            else:
                scale[c] = util.scale_factory(util.get_default_scale(),
                                              experiment,
                                              channel=c)

        lim = kwargs.pop("lim", {})
        for c in self.channels:
            if c not in lim:
                lim[c] = None

        super().plot(experiment, lim=lim, scale=scale, **kwargs)
Exemple #8
0
    def plot(self, experiment, **kwargs):
        """
        Parameters
        ----------
        lim : (float, float)
            Set the range of the plot's data axis.
            
        orientation : {'vertical', 'horizontal'}
        """

        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")

        if not self.channel:
            raise util.CytoflowViewError('channel', "Must specify a channel")

        if self.channel not in experiment.data:
            raise util.CytoflowViewError(
                'channel',
                "Channel {0} not in the experiment".format(self.channel))

        # get the scale
        scale = kwargs.pop('scale', None)
        if scale is None:
            scale = util.scale_factory(self.scale,
                                       experiment,
                                       channel=self.channel)

        lim = kwargs.pop("lim", None)

        super().plot(experiment,
                     lim={self.channel: lim},
                     scale={self.channel: scale},
                     **kwargs)
Exemple #9
0
    def plot(self, experiment, plot_name=None, **kwargs):
        """
        Parameters
        ----------
        orientation : {'vertical', 'horizontal'}
        
        lim : (float, float)
            Set the range of the plot's axis.
        """

        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")

        data = self._make_data(experiment)

        if not self.variable:
            raise util.CytoflowViewError('variable', "variable not set")

        if self.variable not in experiment.conditions:
            raise util.CytoflowViewError(
                'variable',
                "variable {0} not in the experiment".format(self.variable))

        scale = util.scale_factory(self.scale,
                                   experiment,
                                   statistic=self.statistic,
                                   error_statistic=self.error_statistic)

        super().plot(experiment,
                     data,
                     plot_name=plot_name,
                     scale=scale,
                     **kwargs)
Exemple #10
0
    def plot(self, experiment, **kwargs):
        """
        Plot the plots.
        
        Parameters
        ----------
        """

        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")

        annotations = {}
        for k in self.op._kmeans:
            annotations[k] = (self.op._kmeans[k], self.op._peaks[k],
                              self.op._cluster_peak[k])

        if self.xchannel in self.op._scale:
            xscale = self.op._scale[self.xchannel]
        else:
            xscale = util.scale_factory(self.xscale,
                                        experiment,
                                        channel=self.xchannel)

        if self.ychannel in self.op._scale:
            yscale = self.op._scale[self.ychannel]
        else:
            yscale = util.scale_factory(self.yscale,
                                        experiment,
                                        channel=self.ychannel)

        if not self.op._kmeans:
            raise util.CytoflowViewError(
                None, "Must estimate a model before plotting "
                "the density plot.")

        for k in self.op._kmeans:
            annotations[k] = (self.op._kmeans[k], self.op._peaks[k],
                              self.op._cluster_peak[k], self.op._density[k])

        super().plot(experiment,
                     annotations=annotations,
                     xscale=xscale,
                     yscale=yscale,
                     **kwargs)
Exemple #11
0
    def plot(self, experiment, plot_name=None, **kwargs):
        data = self._make_data(experiment)

        xscale = util.scale_factory(self.xscale,
                                    experiment,
                                    condition=self.variable)

        yscale = util.scale_factory(self.yscale,
                                    experiment,
                                    statistic=self.statistic,
                                    error_statistic=self.error_statistic)

        super().plot(experiment,
                     data,
                     plot_name,
                     xscale=xscale,
                     yscale=yscale,
                     **kwargs)
Exemple #12
0
    def plot(self, experiment, **kwargs):
        """
        Parameters
        ----------
        min_quantile : float (>0.0 and <1.0, default = 0.001)
            Clip data that is less than this quantile.
            
        max_quantile : float (>0.0 and <1.0, default = 1.00)
            Clip data that is greater than this quantile.
            
        xlim : (float, float)
            Set the range of the plot's x axis.
        """
        
        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")
        
        if not self.channel:
            raise util.CytoflowViewError('channel',
                                         "Must specify a channel")
        
        if self.channel not in experiment.data:
            raise util.CytoflowViewError('channel',
                                         "Channel {0} not in the experiment"
                                         .format(self.channel))
        
        # get the scale
        scale = kwargs.pop('scale', None)
        if scale is None:
            scale = util.scale_factory(self.scale, experiment, channel = self.channel)
        
        # adjust the limits to clip extreme values
        min_quantile = kwargs.pop("min_quantile", 0.001)
        max_quantile = kwargs.pop("max_quantile", 1.0) 
        
        if min_quantile < 0.0 or min_quantile > 1:
            raise util.CytoflowViewError('min_quantile',
                                         "min_quantile must be between 0 and 1")

        if max_quantile < 0.0 or max_quantile > 1:
            raise util.CytoflowViewError('max_quantile',
                                         "max_quantile must be between 0 and 1")     
        
        if min_quantile >= max_quantile:
            raise util.CytoflowViewError('min_quantile',
                                         "min_quantile must be less than max_quantile")   
                
        xlim = kwargs.pop("xlim", None)
        if xlim is None:
            xlim = (experiment[self.channel].quantile(min_quantile),
                    experiment[self.channel].quantile(max_quantile))
            
        xlim = [scale.clip(x) for x in xlim]
        
        super().plot(experiment, xlim = xlim, xscale = scale, **kwargs)
Exemple #13
0
    def test_logicle_estimate(self):
        """
        Test the parameter estimator against the R implementation
        """
        
        scale = util.scale_factory("logicle", self.ex, channel = "Y2-A")

        # these are the values the R implementation gives
        self.assertAlmostEqual(scale.A, 0.0)
        self.assertAlmostEqual(scale.W, 0.533191950161284)
Exemple #14
0
    def test_run(self):
        scale = util.scale_factory("log", self.ex, channel="Pacific Blue-A")
        x = scale(20.0)
        self.assertTrue(isinstance(x, float))

        x = scale([20])
        self.assertTrue(isinstance(x, list))

        x = scale(pd.Series([20]))
        self.assertTrue(isinstance(x, pd.Series))
Exemple #15
0
 def test_logicle_apply(self):
     """
     Make sure the function applies without segfaulting
     """
     
     scale = util.scale_factory("logicle", self.ex, channel = "Y2-A")
     
     x = scale(20.0)
     self.assertTrue(isinstance(x, float))
     
     x = scale([20])
     self.assertTrue(isinstance(x, list))
     
     x = scale(pd.Series([20]))
     self.assertTrue(isinstance(x, pd.Series))
Exemple #16
0
    def _grid_plot(self, experiment, grid, xlim, ylim, xscale, yscale,
                   **kwargs):

        kwargs.setdefault('antialiased', False)
        kwargs.setdefault('linewidth', 0)
        kwargs.setdefault('edgecolors', 'face')
        kwargs.setdefault('cmap', plt.get_cmap('viridis'))

        under_color = kwargs.pop('under_color', None)
        if under_color is not None:
            kwargs['cmap'].set_under(color=under_color)
        else:
            kwargs['cmap'].set_under(color=kwargs['cmap'](0.0))

        bad_color = kwargs.pop('bad_color', None)
        if bad_color is not None:
            kwargs['cmap'].set_bad(color=kwargs['cmap'](0.0))

        gridsize = kwargs.pop('gridsize', 50)
        xbins = xscale.inverse(
            np.linspace(xscale(xlim[0]), xscale(xlim[1]), gridsize))
        ybins = yscale.inverse(
            np.linspace(yscale(ylim[0]), yscale(ylim[1]), gridsize))

        # set up the range of the color map
        if 'norm' not in kwargs:
            data_max = 0
            for _, data_ijk in grid.facet_data():
                x = data_ijk[self.xchannel]
                y = data_ijk[self.ychannel]
                h, _, _ = np.histogram2d(x, y, bins=[xbins, ybins])
                data_max = max(data_max, h.max())

            hue_scale = util.scale_factory(self.huescale,
                                           experiment,
                                           data=np.array([1, data_max]))
            kwargs['norm'] = hue_scale.color_norm()

        grid.map(_densityplot,
                 self.xchannel,
                 self.ychannel,
                 xbins=xbins,
                 ybins=ybins,
                 **kwargs)

        return {'cmap': kwargs['cmap'], 'norm': kwargs['norm']}
Exemple #17
0
    def plot(self, experiment, **kwargs):

        if experiment is None:
            raise util.CytoflowViewError("No experiment specified")

        if not self.channel:
            raise util.CytoflowViewError("Must specify a channel")

        if self.channel not in experiment.data:
            raise util.CytoflowViewError(
                "Channel {0} not in the experiment".format(self.channel))

        # get the scale
        scale = kwargs.pop('scale', None)
        if scale is None:
            scale = util.scale_factory(self.scale,
                                       experiment,
                                       channel=self.channel)

        # adjust the limits to clip extreme values
        min_quantile = kwargs.pop("min_quantile", 0.001)
        max_quantile = kwargs.pop("max_quantile", 1.0)

        if min_quantile < 0.0 or min_quantile > 1:
            raise util.CytoflowViewError(
                "min_quantile must be between 0 and 1")

        if max_quantile < 0.0 or max_quantile > 1:
            raise util.CytoflowViewError(
                "max_quantile must be between 0 and 1")

        if min_quantile >= max_quantile:
            raise util.CytoflowViewError(
                "min_quantile must be less than max_quantile")

        xlim = kwargs.pop("xlim", None)
        if xlim is None:
            xlim = (experiment[self.channel].quantile(min_quantile),
                    experiment[self.channel].quantile(max_quantile))

        xlim = [scale.clip(x) for x in xlim]

        super().plot(experiment, xlim=xlim, xscale=scale, **kwargs)
Exemple #18
0
    def estimate(self, experiment, subset = None):
        """
        Estimate the Gaussian mixture model parameters
        
        Parameters
        ----------
        experiment : Experiment
            The data to use to estimate the mixture parameters
            
        subset : str (default = None)
            If set, a Python expression to determine the subset of the data
            to use to in the estimation.
        """

        if experiment is None:
            raise util.CytoflowOpError('experiment',
                                       "No experiment specified")
        
        if len(self.channels) == 0:
            raise util.CytoflowOpError('channels',
                                       "Must set at least one channel")

        if len(self.channels) != len(set(self.channels)):
            raise util.CytoflowOpError('channels', 
                                       "Must not duplicate channels")

        for c in self.channels:
            if c not in experiment.data:
                raise util.CytoflowOpError('channels',
                                           "Channel {0} not found in the experiment"
                                      .format(c))
                
        for c in self.scale:
            if c not in self.channels:
                raise util.CytoflowOpError('channels',
                                           "Scale set for channel {0}, but it isn't "
                                           "in the experiment"
                                           .format(c))
       
        for b in self.by:
            if b not in experiment.data:
                raise util.CytoflowOpError('by',
                                           "Aggregation metadata {} not found, "
                                           "must be one of {}"
                                           .format(b, experiment.conditions))
                
        if subset:
            try:
                experiment = experiment.query(subset)
            except:
                raise util.CytoflowViewError('subset',
                                             "Subset string '{0}' isn't valid"
                                             .format(subset))
                
            if len(experiment) == 0:
                raise util.CytoflowViewError('subset',
                                             "Subset string '{0}' returned no events"
                                             .format(subset))
                
        if self.by:
            groupby = experiment.data.groupby(self.by)
        else:
            # use a lambda expression to return a group that contains
            # all the events
            groupby = experiment.data.groupby(lambda _: True)
            
        # get the scale. estimate the scale params for the ENTIRE data set,
        # not subsets we get from groupby().  And we need to save it so that
        # the data is transformed the same way when we apply()
        for c in self.channels:
            if c in self.scale:
                self._scale[c] = util.scale_factory(self.scale[c], experiment, channel = c)
            else:
                self._scale[c] = util.scale_factory(util.get_default_scale(), experiment, channel = c)
        
        gmms = {}
            
        for group, data_subset in groupby:
            if len(data_subset) == 0:
                raise util.CytoflowOpError(None,
                                           "Group {} had no data"
                                           .format(group))
            x = data_subset.loc[:, self.channels[:]]
            for c in self.channels:
                x[c] = self._scale[c](x[c])
            
            # drop data that isn't in the scale range
            for c in self.channels:
                x = x[~(np.isnan(x[c]))]
            x = x.values
            
            gmm = sklearn.mixture.GaussianMixture(n_components = self.num_components,
                                                  covariance_type = "full",
                                                  random_state = 1)
            gmm.fit(x)
            
            if not gmm.converged_:
                raise util.CytoflowOpError(None,
                                           "Estimator didn't converge"
                                           " for group {0}"
                                           .format(group))
                
            # in the 1D version, we sorted the components by the means -- so
            # the first component has the lowest mean, the second component
            # has the next-lowest mean, etc.
            
            # that doesn't work in the general case.  instead, we assume that 
            # the clusters are likely (?) to be arranged along *one* of the 
            # axes, so we take the |norm| of the mean of each cluster and 
            # sort that way.
            
            norms = np.sum(gmm.means_ ** 2, axis = 1) ** 0.5
            sort_idx = np.argsort(norms)
            gmm.means_ = gmm.means_[sort_idx]
            gmm.weights_ = gmm.weights_[sort_idx]
            gmm.covariances_ = gmm.covariances_[sort_idx]
            gmm.precisions_ = gmm.precisions_[sort_idx]
            gmm.precisions_cholesky_ = gmm.precisions_cholesky_[sort_idx]

            
            gmms[group] = gmm
            
        self._gmms = gmms
Exemple #19
0
    def estimate(self, experiment, subset = None):
        """
        Estimate the Gaussian mixture model parameters
        """
        
        if not experiment:
            raise util.CytoflowOpError("No experiment specified")

        if self.channel not in experiment.data:
            raise util.CytoflowOpError("Column {0} not found in the experiment"
                                  .format(self.channel))
       
        for b in self.by:
            if b not in experiment.data:
                raise util.CytoflowOpError("Aggregation metadata {0} not found"
                                      " in the experiment"
                                      .format(b))
            if len(experiment.data[b].unique()) > 100: #WARNING - magic number
                raise util.CytoflowOpError("More than 100 unique values found for"
                                      " aggregation metadata {0}.  Did you"
                                      " accidentally specify a data channel?"
                                      .format(b))
                
        if self.by:
            groupby = experiment.data.groupby(self.by)
        else:
            # use a lambda expression to return a group that contains
            # all the events
            groupby = experiment.data.groupby(lambda x: True)
            
        # get the scale. estimate the scale params for the ENTIRE data set,
        # not subsets we get from groupby().  And we need to save it so that
        # the data is transformed the same way when we apply()
        self._scale = util.scale_factory(self.scale, experiment, self.channel)
            
        for group, data_subset in groupby:
            x = data_subset[self.channel].reset_index(drop = True)
            x = self._scale(x)
            
            # drop data that isn't in the scale range
            #x = pd.Series(self._scale(x)).dropna()
            x = x[~np.isnan(x)]
            
            gmm = mixture.GMM(n_components = self.num_components,
                              random_state = 1)
            gmm.fit(x[:, np.newaxis])
            
            if not gmm.converged_:
                raise util.CytoflowOpError("Estimator didn't converge"
                                      " for group {0}"
                                      .format(group))
                
            # to make sure we have a stable ordering, sort the components
            # by the means (so the first component has the lowest mean, 
            # the next component has the next-lowest, etc.)
            
            sort_idx = np.argsort(gmm.means_[:, 0])
            gmm.means_ = gmm.means_[sort_idx]
            gmm.weights_ = gmm.weights_[sort_idx]
            gmm.covars_ = gmm.covars_[sort_idx]
           
            self._gmms[group] = gmm
Exemple #20
0
    def apply(self, experiment):
        """
        Applies the binning to an experiment.
        
        Parameters
        ----------
        experiment : Experiment
            the old_experiment to which this op is applied
            
        Returns
        -------
        Experiment
            A new experiment with a condition column named :attr:`name`, which
            contains the location of the left-most edge of the bin that the
            event is in.  If :attr:`bin_count_name` is set, another column
            is added with that name as well, containing the number of events
            in the same bin as the event.

        """
        if experiment is None:
            raise util.CytoflowOpError('experiment', "no experiment specified")

        if not self.name:
            raise util.CytoflowOpError('name', "Name is not set")

        if self.name != util.sanitize_identifier(self.name):
            raise util.CytoflowOpError(
                'name',
                "Name can only contain letters, numbers and underscores.".
                format(self.name))

        if self.name in experiment.data.columns:
            raise util.CytoflowOpError(
                'name',
                "Name {} is in the experiment already".format(self.name))

        if self.bin_count_name and self.bin_count_name in experiment.data.columns:
            raise util.CytoflowOpError(
                'bin_count_name',
                "bin_count_name {} is in the experiment already".format(
                    self.bin_count_name))

        if not self.channel:
            raise util.CytoflowOpError('channel', "channel is not set")

        if self.channel not in experiment.data.columns:
            raise util.CytoflowOpError(
                'channel',
                "channel {} isn't in the experiment".format(self.channel))

        if not self.bin_width:
            raise util.CytoflowOpError('bin_width', "must set bin width")

        if not (self.scale == "linear" or self.scale == "log"):
            raise util.CytoflowOpError(
                'scale', "Can only use binning op with linear or log scale")

        scale = util.scale_factory(self.scale,
                                   experiment,
                                   channel=self.channel)

        scaled_min = scale(scale.clip(experiment.data[self.channel]).min())
        scaled_max = scale(scale.clip(experiment.data[self.channel]).max())

        if self.scale == 'linear':
            start = 0
        else:
            start = 1

        scaled_bins_left = np.arange(start=-1.0 * start,
                                     stop=(-1.0 * scaled_min) + self.bin_width,
                                     step=self.bin_width) * -1.0
        scaled_bins_left = scaled_bins_left[::-1][:-1]

        scaled_bins_right = np.arange(start=start,
                                      stop=scaled_max + self.bin_width,
                                      step=self.bin_width)
        scaled_bins = np.append(scaled_bins_left, scaled_bins_right)

        if len(scaled_bins) > self._max_num_bins:
            raise util.CytoflowOpError(
                None, "Too many bins! To increase this limit, "
                "change _max_num_bins (currently {})".format(
                    self._max_num_bins))

        if len(scaled_bins) < 2:
            raise util.CytoflowOpError('bin_width',
                                       "Must have more than one bin")

        # now, back into data space
        bins = scale.inverse(scaled_bins)

        # reduce to 4 sig figs
        bins = ['%.4g' % x for x in bins]
        bins = [float(x) for x in bins]
        bins = np.array(bins)

        # put the data in bins
        bin_idx = np.digitize(experiment.data[self.channel], bins[1:-1])

        new_experiment = experiment.clone()
        new_experiment.add_condition(self.name, "float64", bins[bin_idx])

        # keep track of the bins we used, for prettier plotting later.
        new_experiment.metadata[self.name]["bin_scale"] = self.scale
        new_experiment.metadata[self.name]["bins"] = bins

        if self.bin_count_name:
            # TODO - this is a HUGE memory hog?!
            # TODO - fix this, then turn it on by default
            agg_count = new_experiment.data.groupby(self.name).count()
            agg_count = agg_count[agg_count.columns[0]]

            # have to make the condition a float64, because if we're in log
            # space there may be events that have NaN as the bin number.

            new_experiment.add_condition(
                self.bin_count_name, "float64",
                new_experiment[self.name].map(agg_count))

        new_experiment.history.append(
            self.clone_traits(transient=lambda _: True))
        return new_experiment
Exemple #21
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted histogram view of a channel"""

        if not experiment:
            raise util.CytoflowViewError("No experiment specified")

        if not self.channel:
            raise util.CytoflowViewError("Must specify a channel")

        if self.channel not in experiment.data:
            raise util.CytoflowViewError(
                "Channel {0} not in the experiment".format(self.channel))

        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "X facet {0} not in the experiment".format(self.xfacet))

        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Y facet {0} not in the experiment".format(self.yfacet))

        if self.huefacet and self.huefacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Hue facet {0} not in the experiment".format(self.huefacet))

        facets = filter(lambda x: x, [self.xfacet, self.yfacet, self.huefacet])
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError("Can't reuse facets")

        col_wrap = kwargs.pop('col_wrap', None)

        if col_wrap and self.yfacet:
            raise util.CytoflowViewError(
                "Can't set yfacet and col_wrap at the same time.")

        if col_wrap and not self.xfacet:
            raise util.CytoflowViewError("Must set xfacet to use col_wrap.")

        if self.subset:
            try:
                data = experiment.query(self.subset).data.reset_index()
            except util.CytoflowError as e:
                raise util.CytoflowViewError(str(e))
            except Exception as e:
                raise util.CytoflowViewError(
                    "Subset string '{0}' isn't valid".format(self.subset))

            if len(data) == 0:
                raise util.CytoflowViewError(
                    "Subset string '{0}' returned no events".format(
                        self.subset))
        else:
            data = experiment.data

        # get the scale
        scale = kwargs.pop('scale', None)
        if scale is None:
            scale = util.scale_factory(self.scale,
                                       experiment,
                                       channel=self.channel)

        scaled_data = scale(data[self.channel])

        kwargs.setdefault('histtype', 'stepfilled')
        kwargs.setdefault('alpha', 0.5)
        kwargs.setdefault('antialiased', True)

        # estimate a "good" number of bins; see cytoflow.utility.num_hist_bins
        # for a reference.

        num_bins = util.num_hist_bins(scaled_data)

        # clip num_bins to (50, 1000)
        num_bins = max(min(num_bins, 1000), 50)

        xmin = bottleneck.nanmin(scaled_data)
        xmax = bottleneck.nanmax(scaled_data)

        if (self.huefacet and "bins" in experiment.metadata[self.huefacet] and
                experiment.metadata[self.huefacet]["bin_scale"] == self.scale):
            # if we color facet by the result of a BinningOp and we don't
            # match the BinningOp bins with the histogram bins, we get
            # gnarly aliasing.

            # each color gets at least one bin.  however, if the estimated
            # number of bins for the histogram is much larger than the
            # number of colors, sub-divide each color into multiple bins.
            bins = experiment.metadata[self.huefacet]["bins"]
            bins = np.append(bins, xmax)

            num_hues = len(data[self.huefacet].unique())
            bins_per_hue = math.ceil(num_bins / num_hues)

            new_bins = [xmin]
            for end in [b for b in bins if (b > xmin and b <= xmax)]:
                new_bins = np.append(
                    new_bins,
                    np.linspace(new_bins[-1],
                                end,
                                bins_per_hue + 1,
                                endpoint=True)[1:])

            bins = scale.inverse(new_bins)
        else:
            bin_width = (xmax - xmin) / num_bins
            bins = scale.inverse(np.arange(xmin, xmax, bin_width))
            bins = np.append(bins, scale.inverse(xmax))

        # take care of a rare rounding error, where the first observation is
        # less than the first bin or the last observation is more than the last
        # bin, which makes plt.hist() puke
        bins[-1] += 1
        bins[0] -= 1

        kwargs.setdefault('bins', bins)

        # mask out the data that's not in the scale domain
        data = data[~np.isnan(scaled_data)]

        # adjust the limits to clip extreme values
        min_quantile = kwargs.pop("min_quantile", 0.001)
        max_quantile = kwargs.pop("max_quantile", 0.999)

        xlim = kwargs.pop("xlim", None)
        if xlim is None:
            xlim = (data[self.channel].quantile(min_quantile),
                    data[self.channel].quantile(max_quantile))

        sharex = kwargs.pop("sharex", True)
        sharey = kwargs.pop("sharey", True)

        cols = col_wrap if col_wrap else \
               len(data[self.xfacet].unique()) if self.xfacet else 1

        g = sns.FacetGrid(data,
                          size=6 / cols,
                          aspect=1.5,
                          col=(self.xfacet if self.xfacet else None),
                          row=(self.yfacet if self.yfacet else None),
                          hue=(self.huefacet if self.huefacet else None),
                          col_order=(np.sort(data[self.xfacet].unique())
                                     if self.xfacet else None),
                          row_order=(np.sort(data[self.yfacet].unique())
                                     if self.yfacet else None),
                          hue_order=(np.sort(data[self.huefacet].unique())
                                     if self.huefacet else None),
                          col_wrap=col_wrap,
                          legend_out=False,
                          sharex=sharex,
                          sharey=sharey,
                          xlim=xlim)

        # set the scale for each set of axes; can't just call plt.xscale()
        for ax in g.axes.flatten():
            ax.set_xscale(self.scale, **scale.mpl_params)

        legend = kwargs.pop('legend', True)
        g.map(plt.hist, self.channel, **kwargs)

        # if we are sharing y axes, make sure the y scale is the same for each
        if sharey:
            fig = plt.gcf()
            fig_y_max = float("-inf")

            for ax in fig.get_axes():
                _, ax_y_max = ax.get_ylim()
                if ax_y_max > fig_y_max:
                    fig_y_max = ax_y_max

            for ax in fig.get_axes():
                ax.set_ylim(None, fig_y_max)

        # if we are sharing x axes, make sure the x scale is the same for each
        if sharex:
            fig = plt.gcf()
            fig_x_min = float("inf")
            fig_x_max = float("-inf")

            for ax in fig.get_axes():
                ax_x_min, ax_x_max = ax.get_xlim()
                if ax_x_min < fig_x_min:
                    fig_x_min = ax_x_min
                if ax_x_max > fig_x_max:
                    fig_x_max = ax_x_max

            for ax in fig.get_axes():
                ax.set_xlim(fig_x_min, fig_x_max)

        # if we have a hue facet, the y scaling is frequently wrong.
        if self.huefacet:
            h = np.histogram(data[self.channel], bins=bins)
            ymax = np.max(h[0])
            plt.ylim(0, 1.1 * ymax)

        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.

        if self.huefacet and legend:
            current_palette = mpl.rcParams['axes.color_cycle']

            if util.is_numeric(experiment.data[self.huefacet]) and \
               len(g.hue_names) > len(current_palette):

                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(
                    sns.color_palette("husl", n_colors=len(g.hue_names)))
                cax, _ = mpl.colorbar.make_axes(plt.gca())
                norm = mpl.colors.Normalize(vmin=np.min(g.hue_names),
                                            vmax=np.max(g.hue_names),
                                            clip=False)
                mpl.colorbar.ColorbarBase(cax,
                                          cmap=cmap,
                                          norm=norm,
                                          label=self.huefacet)
                plt.sca(plot_ax)
            else:
                g.add_legend(title=self.huefacet)

        return g
Exemple #22
0
    def apply(self, experiment):
        """Applies the threshold to an experiment.
        
        Parameters
        ----------
        experiment : Experiment
            the old :class:`Experiment` to which this op is applied
            
        Returns
        -------
        Experiment
            a new :class:'Experiment`, the same as ``old_experiment`` but with 
            a new column of type `bool` with the same as the operation name.  
            The bool is ``True`` if the event's measurement is within the 
            polygon, and ``False`` otherwise.
            
        Raises
        ------
        util.CytoflowOpError
            if for some reason the operation can't be applied to this
            experiment. The reason is in :attr:`.CytoflowOpError.args`
        """

        if experiment is None:
            raise util.CytoflowOpError('experiment', "No experiment specified")

        if self.name in experiment.data.columns:
            raise util.CytoflowOpError(
                'name', "{} is in the experiment already!".format(self.name))

        if not self.xchannel:
            raise util.CytoflowOpError('xchannel', "Must specify an x channel")

        if not self.ychannel:
            raise util.CytoflowOpError('ychannel', "Must specify a y channel")

        if not self.xchannel in experiment.channels:
            raise util.CytoflowOpError(
                'xchannel',
                "xchannel {0} is not in the experiment".format(self.xchannel))

        if not self.ychannel in experiment.channels:
            raise util.CytoflowOpError(
                'ychannel',
                "ychannel {0} is not in the experiment".format(self.ychannel))

        if len(self.vertices) < 3:
            raise util.CytoflowOpError('vertices',
                                       "Must have at least 3 vertices")

        if any([len(x) != 2 for x in self.vertices]):
            return util.CytoflowOpError(
                'vertices', "All vertices must be lists or tuples "
                "of length = 2")

        # make sure name got set!
        if not self.name:
            raise util.CytoflowOpError(
                'name', "You have to set the Polygon gate's name "
                "before applying it!")

        # make sure old_experiment doesn't already have a column named self.name
        if (self.name in experiment.data.columns):
            raise util.CytoflowOpError(
                'name',
                "Experiment already contains a column {0}".format(self.name))

        # there's a bit of a subtlety here: if the vertices were
        # selected with an interactive plot, and that plot had scaled
        # axes, we need to apply that scale function to both the
        # vertices and the data before looking for path membership
        xscale = util.scale_factory(self.xscale,
                                    experiment,
                                    channel=self.xchannel)
        yscale = util.scale_factory(self.yscale,
                                    experiment,
                                    channel=self.ychannel)

        vertices = [(xscale(x), yscale(y)) for (x, y) in self.vertices]
        data = experiment.data[[self.xchannel, self.ychannel]].copy()
        data[self.xchannel] = xscale(data[self.xchannel])
        data[self.ychannel] = yscale(data[self.ychannel])

        # use a matplotlib Path because testing for membership is a fast C fn.
        path = mpl.path.Path(np.array(vertices))
        xy_data = data[[self.xchannel, self.ychannel]].values

        new_experiment = experiment.clone()
        new_experiment.add_condition(self.name, "bool",
                                     path.contains_points(xy_data))
        new_experiment.history.append(
            self.clone_traits(transient=lambda _: True))

        return new_experiment
    def plot(self, experiment, **kwargs):
        """Plot a faceted histogram view of a channel"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if not self.xchannel:
            raise util.CytoflowViewError("X channel not specified")
        
        if self.xchannel not in experiment.data:
            raise util.CytoflowViewError("X channel {0} not in the experiment"
                                    .format(self.xchannel))
            
        if not self.ychannel:
            raise util.CytoflowViewError("Y channel not specified")
        
        if self.ychannel not in experiment.data:
            raise util.CytoflowViewError("Y channel {0} not in the experiment")
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment")
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment")
        
        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment")

        if self.subset:
            try: 
                data = experiment.query(self.subset).data.reset_index()
            except:
                raise util.CytoflowViewError("Subset string \'{0}\' not valid")
                            
            if len(data.index) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data
            
        xscale = util.scale_factory(self.xscale, experiment, self.xchannel)
        yscale = util.scale_factory(self.yscale, experiment, self.ychannel)

        kwargs['xscale'] = xscale
        kwargs['yscale'] = yscale
        
        scaled_xdata = xscale(data[self.xchannel])
        data = data[~np.isnan(scaled_xdata)]
        scaled_xdata = scaled_xdata[~np.isnan(scaled_xdata)]

        scaled_ydata = yscale(data[self.ychannel])
        data = data[~np.isnan(scaled_ydata)]
        scaled_ydata = scaled_ydata[~np.isnan(scaled_ydata)]
        

        # find good bin counts
        num_xbins = util.num_hist_bins(scaled_xdata)
        num_ybins = util.num_hist_bins(scaled_ydata)
        
        # there are situations where this produces an unreasonable estimate.
        if num_xbins > self._max_bins:
            warnings.warn("Capping X bins to {}! To increase this limit, "
                          "change _max_bins"
                          .format(self._max_bins))
            num_xbins = self._max_bins
            
        if num_ybins > self._max_bins:
            warnings.warn("Capping Y bins to {}! To increase this limit, "
                          "change _max_bins"
                          .format(self._max_bins))
            num_ybins = self._max_bins
      
        kwargs.setdefault('smoothed', False)
        if kwargs['smoothed']:
            num_xbins /= 2
            num_ybins /= 2
                
        _, xedges, yedges = np.histogram2d(scaled_xdata, 
                                           scaled_ydata, 
                                           bins = (num_xbins, num_ybins))

        kwargs['xedges'] = xscale.inverse(xedges)
        kwargs['yedges'] = yscale.inverse(yedges)
        
        kwargs.setdefault('antialiased', True)

        g = sns.FacetGrid(data,
                          size = 6,
                          aspect = 1.5, 
                          col = (self.xfacet if self.xfacet else None),
                          row = (self.yfacet if self.yfacet else None),
                          hue = (self.huefacet if self.huefacet else None),
                          col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                          row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                          hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                          sharex = False,
                          sharey = False)
         
        for ax in g.axes.flatten():
            ax.set_xscale(self.xscale, **xscale.mpl_params)
            ax.set_yscale(self.yscale, **yscale.mpl_params)
        
        g.map(_hist2d, self.xchannel, self.ychannel, **kwargs)
        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.
        
        if self.huefacet:
            current_palette = mpl.rcParams['axes.color_cycle']
            if len(g.hue_names) > len(current_palette):
                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(sns.color_palette("husl", 
                                                                   n_colors = len(g.hue_names)))
                cax, _ = mpl.colorbar.make_axes(plt.gca())
                norm = mpl.colors.Normalize(vmin = np.min(g.hue_names), 
                                            vmax = np.max(g.hue_names), 
                                            clip = False)
                mpl.colorbar.ColorbarBase(cax, 
                                          cmap = cmap, 
                                          norm = norm,
                                          label = self.huefacet)
                plt.sca(plot_ax)
            else:
                g.add_legend(title = self.huefacet)
Exemple #24
0
    def apply(self, experiment):
        """
        Applies the binning to an experiment.
        
        Parameters
        ----------
        experiment : Experiment
            the old_experiment to which this op is applied
            
        Returns
        -------
        Experiment
            A new experiment with a condition column named :attr:`name`, which
            contains the location of the left-most edge of the bin that the
            event is in.  If :attr:`bin_count_name` is set, another column
            is added with that name as well, containing the number of events
            in the same bin as the event.

        """
        if experiment is None:
            raise util.CytoflowOpError('experiment', "no experiment specified")

        if not self.name:
            raise util.CytoflowOpError('name', "Name is not set")

        if self.name in experiment.data.columns:
            raise util.CytoflowOpError(
                'name',
                "Name {} is in the experiment already".format(self.name))

        if self.bin_count_name and self.bin_count_name in experiment.data.columns:
            raise util.CytoflowOpError(
                'bin_count_name',
                "bin_count_name {} is in the experiment already".format(
                    self.bin_count_name))

        if not self.channel:
            raise util.CytoflowOpError('channel', "channel is not set")

        if self.channel not in experiment.data.columns:
            raise util.CytoflowOpError(
                'channel',
                "channel {} isn't in the experiment".format(self.channel))

        if not self.num_bins and not self.bin_width:
            raise util.CytoflowOpError('num_bins',
                                       "must set either bin number or width")

        if self.bin_width \
           and not (self.scale == "linear" or self.scale == "log"):
            raise util.CytoflowOpError(
                'scale', "Can only use bin_width with linear or log scale")

        scale = util.scale_factory(self.scale,
                                   experiment,
                                   channel=self.channel)
        scaled_data = scale(experiment.data[self.channel])

        scaled_min = bn.nanmin(scaled_data)
        scaled_max = bn.nanmax(scaled_data)

        num_bins = self.num_bins if self.num_bins else \
                   (scaled_max - scaled_min) / self.bin_width

        if num_bins > self._max_num_bins:
            raise util.CytoflowOpError(
                None, "Too many bins! To increase this limit, "
                "change _max_num_bins (currently {})".format(
                    self._max_num_bins))

        scaled_bins = np.linspace(start=scaled_min,
                                  stop=scaled_max,
                                  num=num_bins)

        if len(scaled_bins) < 2:
            raise util.CytoflowOpError('num_bins',
                                       "Must have more than one bin")

        # put the data in bins
        bin_idx = np.digitize(scaled_data, scaled_bins[1:-1])

        # now, back into data space
        bins = scale.inverse(scaled_bins)

        new_experiment = experiment.clone()
        new_experiment.add_condition(self.name, "float", bins[bin_idx])

        # keep track of the bins we used, for prettier plotting later.
        new_experiment.metadata[self.name]["bin_scale"] = self.scale
        new_experiment.metadata[self.name]["bins"] = bins

        if self.bin_count_name:
            # TODO - this is a HUGE memory hog?!
            # TODO - fix this, then turn it on by default
            agg_count = new_experiment.data.groupby(self.name).count()
            agg_count = agg_count[agg_count.columns[0]]

            # have to make the condition a float64, because if we're in log
            # space there may be events that have NaN as the bin number.

            new_experiment.add_condition(
                self.bin_count_name, "float64",
                new_experiment[self.name].map(agg_count))

        new_experiment.history.append(
            self.clone_traits(transient=lambda _: True))
        return new_experiment
Exemple #25
0
    def plot(self, experiment, **kwargs):
        """Plot a bar chart"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if not self.variable:
            raise util.CytoflowViewError("variable not set")
            
        if self.variable not in experiment.conditions:
            raise util.CytoflowViewError("variable {0} not in the experiment"
                                    .format(self.variable))
        
        if not (experiment.conditions[self.variable] == "float" or
                experiment.conditions[self.variable] == "int"):
            raise util.CytoflowViewError("variable {0} isn't numeric"
                                    .format(self.variable)) 
            
        if not self.xchannel:
            raise util.CytoflowViewError("X channel isn't set.")
        
        if self.xchannel not in experiment.data:
            raise util.CytoflowViewError("X channel {0} isn't in the experiment"
                                    .format(self.xchannel))
        
        if not self.xfunction:
            raise util.CytoflowViewError("X summary function isn't set")
                
        if not self.ychannel:
            raise util.CytoflowViewError("Y channel isn't set.")
        
        if self.ychannel not in experiment.data:
            raise util.CytoflowViewError("Y channel {0} isn't in the experiment"
                                    .format(self.ychannel))
        
        if not self.yfunction:
            raise util.CytoflowViewError("Y summary function isn't set")
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment")
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment")
        
        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment")        

        if self.x_error_bars and self.x_error_bars != 'data' \
                             and self.x_error_bars not in experiment.conditions:
            raise util.CytoflowViewError("x_error_bars must be either 'data' or "
                                         "a condition in the experiment") 
            
        if self.x_error_bars and not self.x_error_function:
            raise util.CytoflowViewError("didn't set an x error function")

        if self.y_error_bars and self.y_error_bars != 'data' \
                             and self.y_error_bars not in experiment.conditions:
            raise util.CytoflowViewError("y_error_bars must be either 'data' or "
                                         "a condition in the experiment") 
            
        if self.y_error_bars and not self.y_error_function:
            raise util.CytoflowViewError("didn't set an error function")
                
        kwargs.setdefault('antialiased', True)
        
        if self.subset:
            try:
                data = experiment.query(self.subset).data.reset_index()
            except:
                raise util.CytoflowViewError("Subset string '{0}' isn't valid"
                                        .format(self.subset))
                
            if len(data.index) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data
            
        group_vars = [self.variable]
        if self.xfacet: group_vars.append(self.xfacet)
        if self.yfacet: group_vars.append(self.yfacet)
        if self.huefacet: group_vars.append(self.huefacet)
            
        g = data.groupby(by = group_vars)
        
        plot_data = pd.DataFrame(
                {self.xchannel : g[self.xchannel].aggregate(self.xfunction), 
                 self.ychannel : g[self.ychannel].aggregate(self.yfunction)}) \
                      .reset_index()
                      
        # compute the x error statistic
        if self.x_error_bars:
            if self.x_error_bars == 'data':
                # compute the error statistic on the same subsets as the summary
                # statistic
                error_stat = g[self.xchannel].aggregate(self.x_error_function).reset_index()
            else:
                # subdivide the data set further by the error_bars condition
                err_vars = list(group_vars)
                err_vars.append(self.x_error_bars)
                
                # apply the summary statistic to each subgroup
                data_g = data.groupby(by = err_vars)
                data_stat = data_g[self.xchannel].aggregate(self.xfunction).reset_index()
                
                # apply the error function to the summary statistics
                err_g = data_stat.groupby(by = group_vars)
                error_stat = err_g[self.xchannel].aggregate(self.x_error_function).reset_index()
        
            x_err_name = util.random_string(6)
            plot_data[x_err_name] = error_stat[self.xchannel]
            
        # compute the y error statistic
        if self.y_error_bars:
            if self.y_error_bars == 'data':
                # compute the error statistic on the same subsets as the summary
                # statistic
                error_stat = g[self.ychannel].aggregate(self.y_error_function).reset_index()
            else:
                # subdivide the data set further by the error_bars condition
                err_vars = list(group_vars)
                err_vars.append(self.y_error_bars)
                
                # apply the summary statistic to each subgroup
                data_g = data.groupby(by = err_vars)
                data_stat = data_g[self.ychannel].aggregate(self.yfunction).reset_index()
                
                # apply the error function to the summary statistics
                err_g = data_stat.groupby(by = group_vars)
                error_stat = err_g[self.ychannel].aggregate(self.y_error_function).reset_index()
        
            y_err_name = util.random_string(6)
            plot_data[y_err_name] = error_stat[self.ychannel]
        
 
        grid = sns.FacetGrid(plot_data,
                             size = 6,
                             aspect = 1.5,
                             col = (self.xfacet if self.xfacet else None),
                             row = (self.yfacet if self.yfacet else None),
                             hue = (self.huefacet if self.huefacet else None),
                             col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                             row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                             hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                             legend_out = False,
                             sharex = False,
                             sharey = False)
        
        xscale = util.scale_factory(self.xscale, experiment, self.xchannel)
        yscale = util.scale_factory(self.yscale, experiment, self.ychannel)
        
        for ax in grid.axes.flatten():
            ax.set_xscale(self.xscale, **xscale.mpl_params)
            ax.set_yscale(self.yscale, **yscale.mpl_params)
        
        # plot the error bars first so the axis labels don't get overwritten
        if self.x_error_bars:
            grid.map(_x_error_bars, self.xchannel, x_err_name, self.ychannel)
            
        if self.y_error_bars:
            grid.map(_y_error_bars, self.xchannel, self.ychannel, y_err_name)

        grid.map(plt.plot, self.xchannel, self.ychannel, **kwargs)


        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.
        
        if self.huefacet:
            current_palette = mpl.rcParams['axes.color_cycle']
            if len(grid.hue_names) > len(current_palette):
                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(sns.color_palette("husl", 
                                                                   n_colors = len(grid.hue_names)))
                cax, _ = mpl.colorbar.make_axes(plt.gca())
                norm = mpl.colors.Normalize(vmin = np.min(grid.hue_names), 
                                            vmax = np.max(grid.hue_names), 
                                            clip = False)
                mpl.colorbar.ColorbarBase(cax, 
                                          cmap = cmap, 
                                          norm = norm,
                                          label = self.huefacet)
                plt.sca(plot_ax)
            else:
                grid.add_legend(title = self.huefacet)
Exemple #26
0
    def plot(self, experiment = None, **kwargs):
        """Plot a faceted histogram view of a channel"""
        
        if experiment is None:
            raise util.CytoflowViewError("No experiment specified")

        if not self.op.controls:
            raise util.CytoflowViewError("No controls specified")
        
        if not self.op.spillover:
            raise util.CytoflowViewError("No spillover matrix specified")
        
        kwargs.setdefault('histtype', 'stepfilled')
        kwargs.setdefault('alpha', 0.5)
        kwargs.setdefault('antialiased', True)
         
        plt.figure()
        
        # the completely arbitrary ordering of the channels
        channels = list(set([x for (x, _) in list(self.op.spillover.keys())]))
        num_channels = len(channels)
        
        for from_idx, from_channel in enumerate(channels):
            for to_idx, to_channel in enumerate(channels):
                if from_idx == to_idx:
                    continue
                
                check_tube(self.op.controls[from_channel], experiment)
                tube_exp = ImportOp(tubes = [Tube(file = self.op.controls[from_channel])],
                                    channels = {experiment.metadata[c]["fcs_name"] : c for c in experiment.channels},
                                    name_metadata = experiment.metadata['name_metadata']).apply()
                
                # apply previous operations
                for op in experiment.history:
                    tube_exp = op.apply(tube_exp)
                    
                # subset it
                if self.subset:
                    try:
                        tube_exp = tube_exp.query(self.subset)
                    except Exception as e:
                        raise util.CytoflowOpError("Subset string '{0}' isn't valid"
                                              .format(self.subset)) from e
                                    
                    if len(tube_exp.data) == 0:
                        raise util.CytoflowOpError("Subset string '{0}' returned no events"
                                              .format(self.subset))
                    
                tube_data = tube_exp.data
                
                xscale = util.scale_factory("logicle", tube_exp, channel = from_channel)
                yscale = util.scale_factory("logicle", tube_exp, channel = to_channel)

                plt.subplot(num_channels, 
                            num_channels, 
                            from_idx + (to_idx * num_channels) + 1)
                plt.xscale('logicle', **xscale.mpl_params)
                plt.yscale('logicle', **yscale.mpl_params)
                plt.xlabel(from_channel)
                plt.ylabel(to_channel)
                plt.scatter(tube_data[from_channel],
                            tube_data[to_channel],
                            alpha = 0.1,
                            s = 1,
                            marker = 'o')
                
                xs = np.logspace(-1, math.log(tube_data[from_channel].max(), 10))
                ys = xs * self.op.spillover[(from_channel, to_channel)]
          
                plt.plot(xs, ys, 'g-', lw=3)
                
        plt.tight_layout(pad = 0.8)
Exemple #27
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted 2d kernel density estimate"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")

        if not self.xchannel:
            raise util.CytoflowViewError("X channel not specified")
        
        if self.xchannel not in experiment.data:
            raise util.CytoflowViewError("X channel {0} not in the experiment"
                                    .format(self.xchannel))
            
        if not self.ychannel:
            raise util.CytoflowViewError("Y channel not specified")
        
        if self.ychannel not in experiment.data:
            raise util.CytoflowViewError("Y channel {0} not in the experiment"
                                         .format(self.ychannel))
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment"
                                         .format(self.xfacet))
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment"
                                         .format(self.yfacet))
        
        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment"
                                         .format(self.huefacet))
        
        if self.subset:
            try:
                data = experiment.query(self.subset).data.reset_index()
            except:
                raise util.CytoflowViewError("Subset string '{0}' isn't valid"
                                        .format(self.subset))
                            
            if len(data.index) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data
        
        kwargs.setdefault('shade', False)
        kwargs.setdefault('min_alpha', 0.2)
        kwargs.setdefault('max_alpha', 0.9)
        kwargs.setdefault('n_levels', 10)

        g = sns.FacetGrid(data, 
                          size = 6,
                          aspect = 1.5,
                          col = (self.xfacet if self.xfacet else None),
                          row = (self.yfacet if self.yfacet else None),
                          hue = (self.huefacet if self.huefacet else None),
                          col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                          row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                          hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                          legend_out = False,
                          sharex = False,
                          sharey = False)
        
        xscale = util.scale_factory(self.xscale, experiment, self.xchannel)
        yscale = util.scale_factory(self.yscale, experiment, self.ychannel)
        
        for ax in g.axes.flatten():
            ax.set_xscale(self.xscale, **xscale.mpl_params)
            ax.set_yscale(self.yscale, **yscale.mpl_params)
            
        kwargs['xscale'] = xscale
        kwargs['yscale'] = yscale

        g.map(_bivariate_kdeplot, self.xchannel, self.ychannel, **kwargs)
        
        if self.huefacet:
            g.add_legend(title = self.huefacet)
Exemple #28
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted histogram view of a channel"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if not self.channel:
            raise util.CytoflowViewError("Must specify a channel")
        
        if self.channel not in experiment.data:
            raise util.CytoflowViewError("Channel {0} not in the experiment"
                                    .format(self.channel))
        
        if not self.variable:
            raise util.CytoflowViewError("Variable not specified")
        
        if not self.variable in experiment.conditions:
            raise util.CytoflowViewError("Variable {0} isn't in the experiment")
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment"
                                    .format(self.xfacet))
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment"
                                    .format(self.yfacet))
        
        if self.huefacet and self.huefacet not in experiment.conditions:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment"
                                    .format(self.huefacet))

        if self.subset:
            try:
                data = experiment.query(self.subset).data.reset_index()
            except:
                raise util.CytoflowViewError("Subset string '{0}' isn't valid"
                                        .format(self.subset))
                
            if len(data.index) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data.copy()
                    
        # get the scale
        scale = util.scale_factory(self.scale, experiment, self.channel)
        kwargs['data_scale'] = scale
        
        kwargs.setdefault('orient', 'v')
                
        g = sns.FacetGrid(data, 
                          size = 6,
                          aspect = 1.5,
                          col = (self.xfacet if self.xfacet else None),
                          row = (self.yfacet if self.yfacet else None),
                          col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                          row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                          legend_out = False,
                          sharex = False,
                          sharey = False)
                
        # set the scale for each set of axes; can't just call plt.xscale() 
        for ax in g.axes.flatten():
            if kwargs['orient'] == 'h':
                ax.set_xscale(self.scale, **scale.mpl_params)  
            else:
                ax.set_yscale(self.scale, **scale.mpl_params)  
            
        # this order-dependent thing weirds me out.      
        if kwargs['orient'] == 'h':
            violin_args = [self.channel, self.variable]
        else:
            violin_args = [self.variable, self.channel]
            
        if self.huefacet:
            violin_args.append(self.huefacet)
            
        g.map(_violinplot,   
              *violin_args,      
              order = np.sort(data[self.variable].unique()),
              hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
              **kwargs)
        
        if self.huefacet:
            g.add_legend(title = self.huefacet)
Exemple #29
0
    def estimate(self, experiment, subset = None):
        """
        Split the data set into bins and determine which ones to keep.
        
        Parameters
        ----------
        experiment : Experiment
            The :class:`.Experiment` to use to estimate the gate parameters.
            
        subset : Str (default = None)
            If set, determine the gate parameters on only a subset of the
            ``experiment`` parameter.
        """
        
        if experiment is None:
            raise util.CytoflowOpError('experiment',
                                       "No experiment specified")

        if self.xchannel not in experiment.data:
            raise util.CytoflowOpError('xchannel',
                                       "Column {0} not found in the experiment"
                                       .format(self.xchannel))
            
        if self.ychannel not in experiment.data:
            raise util.CytoflowOpError('ychannel',
                                       "Column {0} not found in the experiment"
                                       .format(self.ychannel))

        if self.min_quantile > 1.0:
            raise util.CytoflowOpError('min_quantile',
                                       "min_quantile must be <= 1.0")
            
        if self.max_quantile > 1.0:
            raise util.CytoflowOpError('max_quantile',
                                       "max_quantile must be <= 1.0")
               
        if not (self.max_quantile > self.min_quantile):
            raise util.CytoflowOpError('max_quantile',
                                       "max_quantile must be > min_quantile")
        
        if self.keep > 1.0:
            raise util.CytoflowOpError('keep',
                                       "keep must be <= 1.0")

        for b in self.by:
            if b not in experiment.conditions:
                raise util.CytoflowOpError('by',
                                           "Aggregation metadata {} not found, "
                                           "must be one of {}"
                                           .format(b, experiment.conditions))
                
        if subset:
            try:
                experiment = experiment.query(subset)
            except:
                raise util.CytoflowOpError('subset',
                                            "Subset string '{0}' isn't valid"
                                            .format(subset))
                
            if len(experiment) == 0:
                raise util.CytoflowOpError('subset',
                                           "Subset string '{0}' returned no events"
                                           .format(subset))
                
        if self.by:
            groupby = experiment.data.groupby(self.by)
        else:
            # use a lambda expression to return a group that contains
            # all the events
            groupby = experiment.data.groupby(lambda _: True)
            
        # get the scale. estimate the scale params for the ENTIRE data set,
        # not subsets we get from groupby().  And we need to save it so that
        # the data is transformed the same way when we apply()
        self._xscale = xscale = util.scale_factory(self.xscale, experiment, channel = self.xchannel)
        self._yscale = yscale = util.scale_factory(self.yscale, experiment, channel = self.ychannel)
        

        xlim = (xscale.clip(experiment[self.xchannel].quantile(self.min_quantile)),
                xscale.clip(experiment[self.xchannel].quantile(self.max_quantile)))
                  
        ylim = (yscale.clip(experiment[self.ychannel].quantile(self.min_quantile)),
                yscale.clip(experiment[self.ychannel].quantile(self.max_quantile)))
        
        self._xbins = xbins = xscale.inverse(np.linspace(xscale(xlim[0]), 
                                                         xscale(xlim[1]), 
                                                         self.bins))
        self._ybins = ybins = yscale.inverse(np.linspace(yscale(ylim[0]), 
                                                         yscale(ylim[1]), 
                                                         self.bins))
                    
        for group, group_data in groupby:
            if len(group_data) == 0:
                raise util.CytoflowOpError('by',
                                           "Group {} had no data"
                                           .format(group))

            h, _, _ = np.histogram2d(group_data[self.xchannel], 
                                     group_data[self.ychannel], 
                                     bins=[xbins, ybins])
            
            h = scipy.ndimage.filters.gaussian_filter(h, sigma = self.sigma)
            
            i = scipy.stats.rankdata(h, method = "ordinal") - 1
            i = np.unravel_index(np.argsort(-i), h.shape)
            
            goal_count = self.keep * len(group_data)
            curr_count = 0
            num_bins = 0

            while(curr_count < goal_count and num_bins < i[0].size):
                curr_count += h[i[0][num_bins], i[1][num_bins]]
                num_bins += 1
                
            self._keep_xbins[group] = i[0][0:num_bins]
            self._keep_ybins[group] = i[1][0:num_bins]
            self._histogram[group] = h
Exemple #30
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted histogram view of a channel"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if not self.channel:
            raise util.CytoflowViewError("Must specify a channel")
        
        if self.channel not in experiment.data:
            raise util.CytoflowViewError("Channel {0} not in the experiment"
                                    .format(self.channel))
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment"
                                    .format(self.xfacet))
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment"
                                    .format(self.yfacet))
        
        if self.huefacet and self.huefacet not in experiment.conditions:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment"
                                    .format(self.huefacet))

        if self.subset:
            try:
                data = experiment.query(self.subset).data.reset_index()
            except:
                raise util.CytoflowViewError("Subset string '{0}' isn't valid"
                                        .format(self.subset))
                
            if len(data.index) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data
            

        #print scaled_data
        
        kwargs.setdefault('shade', True)
        kwargs['label'] = self.name
        
        g = sns.FacetGrid(data, 
                          size = 6,
                          aspect = 1.5,
                          col = (self.xfacet if self.xfacet else None),
                          row = (self.yfacet if self.yfacet else None),
                          hue = (self.huefacet if self.huefacet else None),
                          col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                          row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                          hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                          legend_out = False,
                          sharex = False,
                          sharey = False)
        
        # get the scale     
        kwargs['scale'] = scale = util.scale_factory(self.scale, experiment, self.channel)
        
        # set the scale for each set of axes; can't just call plt.xscale() 
        for ax in g.axes.flatten():
            ax.set_xscale(self.scale, **scale.mpl_params)  
                  
        g.map(_univariate_kdeplot, self.channel, **kwargs)
        if self.huefacet:
            g.add_legend(title = self.huefacet)
Exemple #31
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted scatter plot view of a channel"""

        if not experiment:
            raise util.CytoflowViewError("No experiment specified")

        if not self.xchannel:
            raise util.CytoflowViewError("X channel not specified")

        if self.xchannel not in experiment.data:
            raise util.CytoflowViewError("X channel {0} not in the experiment".format(self.xchannel))

        if not self.ychannel:
            raise util.CytoflowViewError("Y channel not specified")

        if self.ychannel not in experiment.data:
            raise util.CytoflowViewError("Y channel {0} not in the experiment".format(self.ychannel))

        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment".format(self.xfacet))

        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment".format(self.yfacet))

        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment".format(self.huefacet))

        if self.subset:
            try:
                data = experiment.query(self.subset)
            except:
                raise util.CytoflowViewError("Subset string '{0}' isn't valid".format(self.subset))

            if len(data.index) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events".format(self.subset))
        else:
            data = experiment.data

        kwargs.setdefault("alpha", 0.25)
        kwargs.setdefault("s", 2)
        kwargs.setdefault("marker", "o")
        kwargs.setdefault("antialiased", True)

        g = sns.FacetGrid(
            data,
            size=6,
            aspect=1.5,
            col=(self.xfacet if self.xfacet else None),
            row=(self.yfacet if self.yfacet else None),
            hue=(self.huefacet if self.huefacet else None),
            col_order=(np.sort(data[self.xfacet].unique()) if self.xfacet else None),
            row_order=(np.sort(data[self.yfacet].unique()) if self.yfacet else None),
            hue_order=(np.sort(data[self.huefacet].unique()) if self.huefacet else None),
            legend_out=False,
            sharex=False,
            sharey=False,
        )

        xscale = util.scale_factory(self.xscale, experiment, self.xchannel)
        yscale = util.scale_factory(self.yscale, experiment, self.ychannel)

        for ax in g.axes.flatten():
            ax.set_xscale(self.xscale, **xscale.mpl_params)
            ax.set_yscale(self.yscale, **yscale.mpl_params)

        g.map(plt.scatter, self.xchannel, self.ychannel, **kwargs)

        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.

        if self.huefacet:
            current_palette = mpl.rcParams["axes.color_cycle"]
            if len(g.hue_names) > len(current_palette):
                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(sns.color_palette("husl", n_colors=len(g.hue_names)))
                cax, _ = mpl.colorbar.make_axes(plt.gca())
                norm = mpl.colors.Normalize(vmin=np.min(g.hue_names), vmax=np.max(g.hue_names), clip=False)
                mpl.colorbar.ColorbarBase(cax, cmap=cmap, norm=norm)
                plt.sca(plot_ax)
            else:
                g.add_legend()
Exemple #32
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted histogram view of a channel"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if not self.channel:
            raise util.CytoflowViewError("Must specify a channel")
        
        if self.channel not in experiment.data:
            raise util.CytoflowViewError("Channel {0} not in the experiment"
                                    .format(self.channel))
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment"
                                    .format(self.xfacet))
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment"
                                    .format(self.yfacet))
        
        if self.huefacet and self.huefacet not in experiment.conditions:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment"
                                    .format(self.huefacet))

        if self.subset:
            try:
                data = experiment.query(self.subset).data.reset_index()
            except:
                raise util.CytoflowViewError("Subset string '{0}' isn't valid"
                                        .format(self.subset))
                
            if len(experiment.data) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data
        
        # get the scale
        scale = util.scale_factory(self.scale, experiment, self.channel)
        scaled_data = scale(data[self.channel])
        
        #print scaled_data
        
        kwargs.setdefault('histtype', 'stepfilled')
        kwargs.setdefault('alpha', 0.5)
        kwargs.setdefault('antialiased', True)

        # estimate a "good" number of bins; see cytoflow.utility.num_hist_bins
        # for a reference.
        
        num_bins = util.num_hist_bins(scaled_data)
        
        # clip num_bins to (50, 1000)
        num_bins = max(min(num_bins, 1000), 50)
        
        xmin = bottleneck.nanmin(scaled_data)
        xmax = bottleneck.nanmax(scaled_data)
                    
        if (self.huefacet 
            and "bins" in experiment.metadata[self.huefacet]
            and experiment.metadata[self.huefacet]["bin_scale"] == self.scale):
            # if we color facet by the result of a BinningOp and we don't
            # match the BinningOp bins with the histogram bins, we get
            # gnarly aliasing.
            
            # each color gets at least one bin.  however, if the estimated
            # number of bins for the histogram is much larger than the
            # number of colors, sub-divide each color into multiple bins.
            bins = experiment.metadata[self.huefacet]["bins"]
            bins = np.append(bins, xmax)
            
            num_hues = len(data[self.huefacet].unique())
            bins_per_hue = math.ceil(num_bins / num_hues)
            
            new_bins = [xmin]
            for end in [b for b in bins if (b > xmin and b <= xmax)]:
                new_bins = np.append(new_bins,
                                     np.linspace(new_bins[-1],
                                                 end,
                                                 bins_per_hue + 1,
                                                 endpoint = True)[1:])

            bins = scale.inverse(new_bins)
        else:
            bin_width = (xmax - xmin) / num_bins
            bins = scale.inverse(np.arange(xmin, xmax, bin_width))
            bins = np.append(bins, scale.inverse(xmax))
            
        # take care of a rare rounding error, where the last observation is
        # a liiiitle bit more than the last bin, which makes plt.hist() puke
        bins[-1] += 1
                    
        kwargs.setdefault('bins', bins) 
        
        # mask out the data that's not in the scale domain
        data = data[~np.isnan(scaled_data)]

        g = sns.FacetGrid(data, 
                          size = 6,
                          aspect = 1.5,
                          col = (self.xfacet if self.xfacet else None),
                          row = (self.yfacet if self.yfacet else None),
                          hue = (self.huefacet if self.huefacet else None),
                          col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                          row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                          hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                          legend_out = False,
                          sharex = False,
                          sharey = False)
        
        # set the scale for each set of axes; can't just call plt.xscale() 
        for ax in g.axes.flatten():
            ax.set_xscale(self.scale, **scale.mpl_params)  
                  
        g.map(plt.hist, self.channel, **kwargs)
        
        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.
        
        if self.huefacet:
            current_palette = mpl.rcParams['axes.color_cycle']
            if len(g.hue_names) > len(current_palette):
                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(sns.color_palette("husl", 
                                                                   n_colors = len(g.hue_names)))
                cax, _ = mpl.colorbar.make_axes(plt.gca())
                norm = mpl.colors.Normalize(vmin = np.min(g.hue_names), 
                                            vmax = np.max(g.hue_names), 
                                            clip = False)
                mpl.colorbar.ColorbarBase(cax, 
                                          cmap = cmap, 
                                          norm = norm, 
                                          label = self.huefacet)
                plt.sca(plot_ax)
            else:
                g.add_legend(title = self.huefacet)
    def plot(self, experiment = None, **kwargs):
        """
        Plot a diagnostic of the bleedthrough model computation.
        """
        
        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")

        if not self.op.controls:
            raise util.CytoflowViewError('op',
                                         "No controls specified")
        
        if not self.op.spillover:
            raise util.CytoflowViewError('op',
                                         "No spillover matrix specified")
        
        kwargs.setdefault('histtype', 'stepfilled')
        kwargs.setdefault('alpha', 0.5)
        kwargs.setdefault('antialiased', True)
         
        plt.figure()
        
        # the completely arbitrary ordering of the channels
        channels = list(set([x for (x, _) in list(self.op.spillover.keys())]))
        num_channels = len(channels)
        
        for from_idx, from_channel in enumerate(channels):
            for to_idx, to_channel in enumerate(channels):
                if from_idx == to_idx:
                    continue
                
                tube_data = self.op._sample[from_channel]
                
                # for ReadTheDocs, which doesn't have swig
                import sys
                if sys.modules['cytoflow.utility.logicle_ext.Logicle'].__name__ != 'cytoflow.utility.logicle_ext.Logicle':
                    scale_name = 'log'
                else:
                    scale_name = 'logicle'
                
                xscale = util.scale_factory(scale_name, experiment, channel = from_channel)
                yscale = util.scale_factory(scale_name, experiment, channel = to_channel)

                plt.subplot(num_channels, 
                            num_channels, 
                            from_idx + (to_idx * num_channels) + 1)
                plt.xscale(scale_name, **xscale.mpl_params)
                plt.yscale(scale_name, **yscale.mpl_params)
                plt.xlabel(from_channel)
                plt.ylabel(to_channel)
                plt.scatter(tube_data[from_channel],
                            tube_data[to_channel],
                            alpha = 1,
                            s = 1,
                            marker = 'o')
                
                xs = np.logspace(-1, math.log(tube_data[from_channel].max(), 10))
                ys = xs * self.op.spillover[(from_channel, to_channel)]
          
                plt.plot(xs, ys, 'g-', lw=3)
                
        plt.tight_layout(pad = 0.8)
Exemple #34
0
    def apply(self, experiment):
        """Applies the binning to an experiment.
        
        Parameters
        ----------
        experiment : Experiment
            the old_experiment to which this op is applied
            
        Returns
        -------
            a new experiment, the same as old_experiment but with a new
            column the same as the operation name.  The bool is True if the
            event's measurement in self.channel is greater than self.low and
            less than self.high; it is False otherwise.
        """
        if not experiment:
            raise util.CytoflowOpError("no experiment specified")
        
        if not self.name:
            raise util.CytoflowOpError("name is not set")
        
        if self.name in experiment.data.columns:
            raise util.CytoflowOpError("name {0} is in the experiment already"
                                  .format(self.name))
            
        if self.bin_count_name and self.bin_count_name in experiment.data.columns:
            raise util.CytoflowOpError("bin_count_name {0} is in the experiment already"
                                  .format(self.bin_count_name))
        
        if not self.channel:
            raise util.CytoflowOpError("channel is not set")
        
        if self.channel not in experiment.data.columns:
            raise util.CytoflowOpError("channel {0} isn't in the experiment"
                                  .format(self.channel))
              
        if self.num_bins is Undefined and self.bin_width is Undefined:
            raise util.CytoflowOpError("must set either bin number or width")
        
        if self.num_bins is Undefined \
           and not (self.scale == "linear" or self.scale == "log"):
            raise util.CytoflowOpError("Can only use bin_width with linear or log scale") 
        
        scale = util.scale_factory(self.scale, experiment, self.channel)
        scaled_data = scale(experiment.data[self.channel])
            
        channel_min = bn.nanmin(scaled_data)
        channel_max = bn.nanmax(scaled_data)
        
        num_bins = self.num_bins if self.num_bins is not Undefined else \
                   (channel_max - channel_min) / self.bin_width

        bins = np.linspace(start = channel_min, stop = channel_max,
                           num = num_bins)
            
        # bins need to be internal; drop the first and last one
        bins = bins[1:-1]
            
        new_experiment = experiment.clone()
        new_experiment.add_condition(self.name,
                                     "int",
                                     np.digitize(scaled_data, bins))
        
        # if we're log-scaled (for example), don't label data that isn't
        # showable on a log scale!
        new_experiment.data.ix[np.isnan(scaled_data), self.name] = np.NaN
        
        # keep track of the bins we used, for pretty plotting later.
        new_experiment.metadata[self.name]["bin_scale"] = self.scale
        new_experiment.metadata[self.name]["bins"] = bins
        
        if self.bin_count_name:
            # TODO - this is a HUGE memory hog?!
            agg_count = new_experiment.data.groupby(self.name).count()
            agg_count = agg_count[agg_count.columns[0]]
            
            # have to make the condition a float64, because if we're in log
            # space there may be events that have NaN as the bin number.
            
            new_experiment.add_condition(
                self.bin_count_name,
                "float64",
                new_experiment[self.name].map(agg_count))
        
        new_experiment.history.append(self.clone_traits())
        return new_experiment
Exemple #35
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted 2d kernel density estimate"""

        if not experiment:
            raise util.CytoflowViewError("No experiment specified")

        if not self.xchannel:
            raise util.CytoflowViewError("X channel not specified")

        if self.xchannel not in experiment.data:
            raise util.CytoflowViewError(
                "X channel {0} not in the experiment".format(self.xchannel))

        if not self.ychannel:
            raise util.CytoflowViewError("Y channel not specified")

        if self.ychannel not in experiment.data:
            raise util.CytoflowViewError(
                "Y channel {0} not in the experiment".format(self.ychannel))

        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "X facet {0} not in the experiment".format(self.xfacet))

        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Y facet {0} not in the experiment".format(self.yfacet))

        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError(
                "Hue facet {0} not in the experiment".format(self.huefacet))

        facets = filter(lambda x: x, [self.xfacet, self.yfacet, self.huefacet])
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError("Can't reuse facets")

        col_wrap = kwargs.pop('col_wrap', None)

        if col_wrap and self.yfacet:
            raise util.CytoflowViewError(
                "Can't set yfacet and col_wrap at the same time.")

        if col_wrap and not self.xfacet:
            raise util.CytoflowViewError("Must set xfacet to use col_wrap.")

        if self.subset:
            try:
                data = experiment.query(self.subset).data.reset_index()
            except:
                raise util.CytoflowViewError(
                    "Subset string '{0}' isn't valid".format(self.subset))

            if len(data) == 0:
                raise util.CytoflowViewError(
                    "Subset string '{0}' returned no events".format(
                        self.subset))
        else:
            data = experiment.data

        kwargs.setdefault('shade', False)
        kwargs.setdefault('min_alpha', 0.2)
        kwargs.setdefault('max_alpha', 0.9)
        kwargs.setdefault('n_levels', 10)

        xscale = util.scale_factory(self.xscale,
                                    experiment,
                                    channel=self.xchannel)
        yscale = util.scale_factory(self.yscale,
                                    experiment,
                                    channel=self.ychannel)

        # adjust the limits to clip extreme values
        min_quantile = kwargs.pop("min_quantile", 0.001)
        max_quantile = kwargs.pop("max_quantile", 0.999)

        xlim = kwargs.pop("xlim", None)
        if xlim is None:
            xlim = (xscale.clip(data[self.xchannel].quantile(min_quantile)),
                    xscale.clip(data[self.xchannel].quantile(max_quantile)))

        ylim = kwargs.pop("ylim", None)
        if ylim is None:
            ylim = (yscale.clip(data[self.ychannel].quantile(min_quantile)),
                    yscale.clip(data[self.ychannel].quantile(max_quantile)))

        sharex = kwargs.pop('sharex', True)
        sharey = kwargs.pop('sharey', True)

        cols = col_wrap if col_wrap else \
               len(data[self.xfacet].unique()) if self.xfacet else 1

        g = sns.FacetGrid(data,
                          size=(6 / cols),
                          aspect=1.5,
                          col=(self.xfacet if self.xfacet else None),
                          row=(self.yfacet if self.yfacet else None),
                          hue=(self.huefacet if self.huefacet else None),
                          col_order=(np.sort(data[self.xfacet].unique())
                                     if self.xfacet else None),
                          row_order=(np.sort(data[self.yfacet].unique())
                                     if self.yfacet else None),
                          hue_order=(np.sort(data[self.huefacet].unique())
                                     if self.huefacet else None),
                          col_wrap=col_wrap,
                          legend_out=False,
                          sharex=sharex,
                          sharey=sharey,
                          xlim=xlim,
                          ylim=ylim)

        for ax in g.axes.flatten():
            ax.set_xscale(self.xscale, **xscale.mpl_params)
            ax.set_yscale(self.yscale, **yscale.mpl_params)

        kwargs['xscale'] = xscale
        kwargs['yscale'] = yscale

        g.map(_bivariate_kdeplot, self.xchannel, self.ychannel, **kwargs)

        # if we are sharing y axes, make sure the y scale is the same for each
        if sharey:
            fig = plt.gcf()
            fig_y_min = float("inf")
            fig_y_max = float("-inf")

            for ax in fig.get_axes():
                ax_y_min, ax_y_max = ax.get_ylim()
                if ax_y_min < fig_y_min:
                    fig_y_min = ax_y_min
                if ax_y_max > fig_y_max:
                    fig_y_max = ax_y_max

            for ax in fig.get_axes():
                ax.set_ylim(fig_y_min, fig_y_max)

        # if we have are sharing x axes, make sure the x scale is the same for each
        if sharex:
            fig = plt.gcf()
            fig_x_min = float("inf")
            fig_x_max = float("-inf")

            for ax in fig.get_axes():
                ax_x_min, ax_x_max = ax.get_xlim()
                if ax_x_min < fig_x_min:
                    fig_x_min = ax_x_min
                if ax_x_max > fig_x_max:
                    fig_x_max = ax_x_max

            for ax in fig.get_axes():
                ax.set_xlim(fig_x_min, fig_x_max)

        if self.huefacet:
            current_palette = mpl.rcParams['axes.color_cycle']
            if util.is_numeric(experiment.data[self.huefacet]) and \
               len(g.hue_names) > len(current_palette):

                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(
                    sns.color_palette("husl", n_colors=len(g.hue_names)))
                cax, _ = mpl.colorbar.make_axes(plt.gca())
                hue_scale = util.scale_factory(self.huescale,
                                               experiment,
                                               condition=self.huefacet)
                mpl.colorbar.ColorbarBase(cax,
                                          cmap=cmap,
                                          norm=hue_scale.color_norm(),
                                          label=self.huefacet)
                plt.sca(plot_ax)
            else:
                g.add_legend(title=self.huefacet)
Exemple #36
0
    def plot(self, experiment, data, **kwargs):
        """
        Base function for facetted plotting
        
        Parameters
        ----------
        experiment: Experiment
            The :class:`.Experiment` to plot using this view.
            
        title : str
            Set the plot title
            
        xlabel, ylabel : str
            Set the X and Y axis labels
            
        huelabel : str
            Set the label for the hue facet (in the legend)
            
        legend : bool
            Plot a legend for the color or hue facet?  Defaults to `True`.
            
        sharex, sharey : bool
            If there are multiple subplots, should they share axes?  Defaults
            to `True`.

        col_wrap : int
            If `xfacet` is set and `yfacet` is not set, you can "wrap" the
            subplots around so that they form a multi-row grid by setting
            `col_wrap` to the number of columns you want. 
            
        sns_style : {"darkgrid", "whitegrid", "dark", "white", "ticks"}
            Which `seaborn` style to apply to the plot?  Default is `whitegrid`.
            
        sns_context : {"paper", "notebook", "talk", "poster"}
            Which `seaborn` context to use?  Controls the scaling of plot 
            elements such as tick labels and the legend.  Default is `talk`.
            
        despine : Bool
            Remove the top and right axes from the plot?  Default is `True`.

        Other Parameters
        ----------------
        cmap : matplotlib colormap
            If plotting a huefacet with many values, use this color map instead
            of the default.
            
        norm : matplotlib.colors.Normalize
            If plotting a huefacet with many values, use this object for color
            scale normalization.

        """

        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")

        col_wrap = kwargs.pop('col_wrap', None)

        if col_wrap is not None and self.yfacet:
            raise util.CytoflowViewError(
                'yfacet', "Can't set yfacet and col_wrap at the same time.")

        if col_wrap is not None and not self.xfacet:
            raise util.CytoflowViewError('xfacet',
                                         "Must set xfacet to use col_wrap.")

        if col_wrap is not None and col_wrap < 2:
            raise util.CytoflowViewError(None, "col_wrap must be None or > 1")

        title = kwargs.pop("title", None)
        xlabel = kwargs.pop("xlabel", None)
        ylabel = kwargs.pop("ylabel", None)
        huelabel = kwargs.pop("huelabel", self.huefacet)

        sharex = kwargs.pop("sharex", True)
        sharey = kwargs.pop("sharey", True)

        legend = kwargs.pop('legend', True)

        sns_style = kwargs.pop('sns_style', 'whitegrid')
        sns_context = kwargs.pop('sns_context', 'talk')
        despine = kwargs.pop('despine', False)

        cols = col_wrap if col_wrap else \
               len(data[self.xfacet].unique()) if self.xfacet else 1

        sns.set_style(sns_style)
        sns.set_context(sns_context)

        g = sns.FacetGrid(data,
                          size=6 / cols,
                          aspect=1.5,
                          col=(self.xfacet if self.xfacet else None),
                          row=(self.yfacet if self.yfacet else None),
                          hue=(self.huefacet if self.huefacet else None),
                          col_order=(np.sort(data[self.xfacet].unique())
                                     if self.xfacet else None),
                          row_order=(np.sort(data[self.yfacet].unique())
                                     if self.yfacet else None),
                          hue_order=(np.sort(data[self.huefacet].unique())
                                     if self.huefacet else None),
                          col_wrap=col_wrap,
                          legend_out=False,
                          sharex=sharex,
                          sharey=sharey)

        plot_ret = self._grid_plot(experiment=experiment, grid=g, **kwargs)

        kwargs.update(plot_ret)

        xscale = kwargs.pop("xscale", None)
        yscale = kwargs.pop("yscale", None)
        xlim = kwargs.pop("xlim", None)
        ylim = kwargs.pop("ylim", None)

        for ax in g.axes.flatten():
            if xscale:
                ax.set_xscale(xscale.name, **xscale.mpl_params)
            if yscale:
                ax.set_yscale(yscale.name, **yscale.mpl_params)
            if xlim:
                ax.set_xlim(xlim)
            if ylim:
                ax.set_ylim(ylim)

        # if we are sharing x axes, make sure the x limits are the same for each
        if sharex:
            fig = plt.gcf()
            fig_x_min = float("inf")
            fig_x_max = float("-inf")

            for ax in fig.get_axes():
                ax_x_min, ax_x_max = ax.get_xlim()
                if ax_x_min < fig_x_min:
                    fig_x_min = ax_x_min
                if ax_x_max > fig_x_max:
                    fig_x_max = ax_x_max

            for ax in fig.get_axes():
                ax.set_xlim(fig_x_min, fig_x_max)

        # if we are sharing y axes, make sure the y limits are the same for each
        if sharey:
            fig = plt.gcf()
            fig_y_max = float("-inf")

            for ax in fig.get_axes():
                _, ax_y_max = ax.get_ylim()
                if ax_y_max > fig_y_max:
                    fig_y_max = ax_y_max

            for ax in fig.get_axes():
                ax.set_ylim(None, fig_y_max)

        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.

        cmap = kwargs.pop('cmap', None)
        norm = kwargs.pop('norm', None)
        legend_data = kwargs.pop('legend_data', None)

        if legend:
            if cmap and norm:
                plot_ax = plt.gca()
                cax, _ = mpl.colorbar.make_axes(plt.gcf().get_axes())
                mpl.colorbar.ColorbarBase(cax, cmap, norm)
                plt.sca(plot_ax)
            elif self.huefacet:

                current_palette = mpl.rcParams['axes.prop_cycle']

                if util.is_numeric(data[self.huefacet]) and \
                   len(g.hue_names) > len(current_palette):

                    cmap = mpl.colors.ListedColormap(
                        sns.color_palette("husl", n_colors=len(g.hue_names)))
                    hue_scale = util.scale_factory(
                        self.huescale,
                        experiment,
                        data=data[self.huefacet].values)

                    plot_ax = plt.gca()

                    cax, _ = mpl.colorbar.make_axes(plt.gcf().get_axes())

                    mpl.colorbar.ColorbarBase(cax,
                                              cmap=cmap,
                                              norm=hue_scale.norm(),
                                              label=huelabel)
                    plt.sca(plot_ax)
                else:
                    g.add_legend(title=huelabel, legend_data=legend_data)
                    ax = g.axes.flat[0]
                    legend = ax.legend_
                    for lh in legend.legendHandles:
                        lh.set_alpha(1.0)

        if title:
            plt.title(title)

        if xlabel == "":
            xlabel = None

        if ylabel == "":
            ylabel = None

        g.set_axis_labels(xlabel, ylabel)

        sns.despine(top=despine, right=despine, bottom=False, left=False)
Exemple #37
0
    def estimate(self, experiment, subset=None):
        """
        Estimate the Gaussian mixture model parameters
        """

        if experiment is None:
            raise util.CytoflowOpError("No experiment specified")

        if len(self.channels) == 0:
            raise util.CytoflowOpError("Must set at least one channel")

        for c in self.channels:
            if c not in experiment.data:
                raise util.CytoflowOpError(
                    "Channel {0} not found in the experiment".format(c))

        for c in self.scale:
            if c not in self.channels:
                raise util.CytoflowOpError(
                    "Scale set for channel {0}, but it isn't "
                    "in the experiment".format(c))

        for b in self.by:
            if b not in experiment.data:
                raise util.CytoflowOpError("Aggregation metadata {0} not found"
                                           " in the experiment".format(b))
            if len(experiment.data[b].unique()) > 100:  #WARNING - magic number
                raise util.CytoflowOpError(
                    "More than 100 unique values found for"
                    " aggregation metadata {0}.  Did you"
                    " accidentally specify a data channel?".format(b))

        if subset:
            try:
                experiment = experiment.query(subset)
            except:
                raise util.CytoflowViewError(
                    "Subset string '{0}' isn't valid".format(subset))

            if len(experiment) == 0:
                raise util.CytoflowViewError(
                    "Subset string '{0}' returned no events".format(subset))

        if self.by:
            groupby = experiment.data.groupby(self.by)
        else:
            # use a lambda expression to return a group that contains
            # all the events
            groupby = experiment.data.groupby(lambda _: True)

        # get the scale. estimate the scale params for the ENTIRE data set,
        # not subsets we get from groupby().  And we need to save it so that
        # the data is transformed the same way when we apply()
        for c in self.channels:
            if c in self.scale:
                self._scale[c] = util.scale_factory(self.scale[c],
                                                    experiment,
                                                    channel=c)
#                 if self.scale[c] == 'log':
#                     self._scale[c].mode = 'mask'
            else:
                self._scale[c] = util.scale_factory(util.get_default_scale(),
                                                    experiment,
                                                    channel=c)

        for data_group, data_subset in groupby:
            if len(data_subset) == 0:
                raise util.CytoflowOpError(
                    "Group {} had no data".format(data_group))
            x = data_subset.loc[:, self.channels[:]]
            for c in self.channels:
                x[c] = self._scale[c](x[c])

            # drop data that isn't in the scale range
            for c in self.channels:
                x = x[~(np.isnan(x[c]))]
            x = x.values

            #### choose the number of clusters and fit the kmeans
            num_clusters = [
                util.num_hist_bins(x[:, c]) for c in range(len(self.channels))
            ]
            num_clusters = np.ceil(np.median(num_clusters))
            num_clusters = int(num_clusters)

            self._kmeans[data_group] = kmeans = \
                sklearn.cluster.MiniBatchKMeans(n_clusters = num_clusters)

            kmeans.fit(x)
            x_labels = kmeans.predict(x)
            d = len(self.channels)

            #### use the kmeans centroids to parameterize a finite gaussian
            #### mixture model which estimates the density function

            d = len(self.channels)
            s0 = np.zeros([d, d])
            for j in range(d):
                r = x[d].max() - x[d].min()
                s0[j, j] = (r / (num_clusters**(1. / d)))**0.5

            means = []
            weights = []
            normals = []
            beta_max = []

            for k in range(num_clusters):
                xk = x[x_labels == k]
                num_k = np.sum(x_labels == k)
                weight_k = num_k / len(x_labels)
                mu = xk.mean(axis=0)
                means.append(mu)
                s = np.cov(xk, rowvar=False)

                el = num_k / (num_clusters + num_k)
                s_smooth = el * self.h * s + (1.0 - el) * self.h0 * s0

                n = scipy.stats.multivariate_normal(mean=mu, cov=s_smooth)
                weights.append(weight_k)
                normals.append(lambda x, n=n: n.pdf(x))

                # get appropriate step size for peak finding
                min_b = np.inf
                for b in np.diagonal(s_smooth):
                    if np.sqrt(b) < min_b:
                        min_b = np.sqrt(b)
                beta_max.append(b)

            self._normals[data_group] = normals
            self._density[
                data_group] = density = lambda x, weights=weights, normals=normals: np.sum(
                    [w * n(x) for w, n in zip(weights, normals)], axis=0)

            ### use optimization on the finite gmm to find the local peak for
            ### each kmeans cluster
            peaks = []
            peak_clusters = []  # peak idx --> list of clusters

            min_mu = [np.inf] * len(self.channels)
            max_mu = [-1.0 * np.inf] * len(self.channels)

            for k in range(num_clusters):
                mu = means[k]
                for ci in range(len(self.channels)):
                    if mu[ci] < min_mu[ci]:
                        min_mu[ci] = mu[ci]
                    if mu[ci] > max_mu[ci]:
                        max_mu[ci] = mu[ci]

            constraints = []
            for ci, c in enumerate(self.channels):
                constraints.append({
                    'type':
                    'ineq',
                    'fun':
                    lambda x, min_mu=min_mu[ci]: x - min_mu
                })
                constraints.append({
                    'type':
                    'ineq',
                    'fun':
                    lambda x, max_mu=max_mu[ci]: max_mu - x
                })

            for k in range(num_clusters):
                mu = means[k]
                f = lambda x: -1.0 * density(x)

                res = scipy.optimize.minimize(f,
                                              mu,
                                              method='COBYLA',
                                              constraints=constraints,
                                              options={
                                                  'rhobeg': beta_max[k],
                                                  'maxiter': 5000
                                              })
                if not res.success:
                    raise util.CytoflowOpError(
                        "Peak finding failed for cluster {}: {}".format(
                            k, res.message))


#                 ### The peak-searching algorithm from the paper.  works fine,
#                 ### but slow!  we get similar results with the COBYLA
#                 ### optimization method from scipy, using an appropriate rho
#                 x0 = x = means[k]
#                 k0 = k
#                 b = beta_max[k] / 10.0
#                 Nsuc = 0
#                 n = 0
#
#                 while(n < 1000):
# #                     df = scipy.misc.derivative(density, x, 1e-6)
#                     df = statsmodels.tools.numdiff.approx_fprime(x, density)
#                     if np.linalg.norm(df) < 1e-3:
#                         break
#
#                     y = x + b * df / np.linalg.norm(df)
#                     if density(y) <= density(x):
#                         Nsuc = 0
#                         b = b / 2.0
#                         continue
#
#                     Nsuc += 1
#                     if Nsuc >= 2:
#                         b = min(2*b, beta_max[k])
#
#                     ky = kmeans.predict(y[np.newaxis, :])[0]
#                     if ky == k:
#                         x = y
#                     else:
#                         k = ky
#                         b = beta_max[k] / 10.0
#                         mu = means[k]
#                         if density(mu) > density(y):
#                             x = mu
#                         else:
#                             x = y
#
#                     n += 1
#
#
#
#                 print("{} --> {}, {}".format(x0, x, n))

                merged = False
                for pi, p in enumerate(peaks):
                    if np.linalg.norm(p - res.x) < (1e-2):
                        peak_clusters[pi].append(k)
                        merged = True
                        break

                if not merged:
                    peak_clusters.append([k])
                    peaks.append(res.x)

            self._peaks[data_group] = peaks

            ### merge peaks that are sufficiently close

            groups = [[x] for x in range(len(peaks))]
            peak_groups = [x for x in range(len(peaks))
                           ]  # peak idx --> group idx

            def max_tol(x, y):
                f = lambda a: density(a[np.newaxis, :])
                #                 lx = kmeans.predict(x[np.newaxis, :])[0]
                #                 ly = kmeans.predict(y[np.newaxis, :])[0]
                n = len(x)
                n_scale = 1

                #                 n_scale = np.sqrt(((nx + ny) / 2.0) / (n / num_clusters))

                def tol(t):
                    zt = x + t * (y - x)
                    fhat_zt = f(x) + t * (f(y) - f(x))
                    return -1.0 * abs((f(zt) - fhat_zt) / fhat_zt) * n_scale

                res = scipy.optimize.minimize_scalar(tol,
                                                     bounds=[0, 1],
                                                     method='Bounded')

                if res.status != 0:
                    raise util.CytoflowOpError(
                        "tol optimization failed for {}, {}".format(x, y))
                return -1.0 * res.fun

            def nearest_neighbor_dist(k):
                min_dist = np.inf

                for i in range(num_clusters):
                    if i == k:
                        continue
                    dist = np.linalg.norm(means[k] - means[i])
                    if dist < min_dist:
                        min_dist = dist

                return min_dist

            sk = [nearest_neighbor_dist(x) for x in range(num_clusters)]

            def s(x):
                k = kmeans.predict(x[np.newaxis, :])[0]
                return sk[k]

            def can_merge(g, h):
                for pg in g:
                    for ph in h:
                        vg = peaks[pg]
                        vh = peaks[ph]
                        dist_gh = np.linalg.norm(vg - vh)

                        if max_tol(vg, vh) < self.tol and dist_gh / (
                                s(vg) + s(vh)) <= self.merge_dist:
                            return True

                return False

            while True:
                if len(groups) == 1:
                    break

                # find closest mergable groups
                min_dist = np.inf
                for gi in range(len(groups)):
                    g = groups[gi]

                    for hi in range(gi + 1, len(groups)):
                        h = groups[hi]

                        if can_merge(g, h):
                            dist_gh = np.inf
                            for pg in g:
                                vg = peaks[pg]
                                for ph in h:
                                    vh = peaks[ph]
                                    #                                     print("vg {} vh {}".format(vg, vh))
                                    dist_gh = min(dist_gh,
                                                  np.linalg.norm(vg - vh))

                            if dist_gh < min_dist:
                                min_gi = gi
                                min_hi = hi
                                min_dist = dist_gh

                if min_dist == np.inf:
                    break

                # merge the groups
                groups[min_gi].extend(groups[min_hi])
                for g in groups[min_hi]:
                    peak_groups[g] = min_gi
                del groups[min_hi]

        cluster_group = [0] * num_clusters
        cluster_peaks = [0] * num_clusters

        for gi, g in enumerate(groups):
            for p in g:
                for cluster in peak_clusters[p]:
                    cluster_group[cluster] = gi
                    cluster_peaks[cluster] = p

        self._peaks[data_group] = peaks
        self._cluster_peak[data_group] = cluster_peaks
        self._cluster_group[data_group] = cluster_group
Exemple #38
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted histogram view of a channel"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if not self.xchannel:
            raise util.CytoflowViewError("X channel not specified")
        
        if self.xchannel not in experiment.data:
            raise util.CytoflowViewError("X channel {0} not in the experiment"
                                    .format(self.xchannel))
            
        if not self.ychannel:
            raise util.CytoflowViewError("Y channel not specified")
        
        if self.ychannel not in experiment.data:
            raise util.CytoflowViewError("Y channel {0} not in the experiment")
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment")
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment")
        
        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment")

        if self.subset:
            try: 
                data = experiment.query(self.subset)
            except:
                raise util.CytoflowViewError("Subset string \'{0}\' not valid")
                            
            if len(data.index) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data
        
        #kwargs.setdefault('histtype', 'stepfilled')
        #kwargs.setdefault('alpha', 0.5)
        kwargs.setdefault('edgecolor', 'none')
        #kwargs.setdefault('mincnt', 1)
        #kwargs.setdefault('bins', 'log')
        kwargs.setdefault('antialiased', True)
            
        xmin, xmax = (np.amin(data[self.xchannel]), np.amax(data[self.xchannel]))
        ymin, ymax = (np.amin(data[self.ychannel]), np.amax(data[self.ychannel]))
        # to avoid issues with singular data, expand the min/max pairs
        xmin, xmax = mtrans.nonsingular(xmin, xmax, expander=0.1)
        ymin, ymax = mtrans.nonsingular(ymin, ymax, expander=0.1)
        
        extent = (xmin, xmax, ymin, ymax)
        kwargs.setdefault('extent', extent)
        
        xbins = util.num_hist_bins(experiment[self.xchannel])
        ybins = util.num_hist_bins(experiment[self.ychannel])
        bins = np.mean([xbins, ybins])
        
        kwargs.setdefault('bins', bins) # Do not move above.  don't ask.

        g = sns.FacetGrid(data,
                          size = 6,
                          aspect = 1.5, 
                          col = (self.xfacet if self.xfacet else None),
                          row = (self.yfacet if self.yfacet else None),
                          hue = (self.huefacet if self.huefacet else None),
                          col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                          row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                          hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                          sharex = False,
                          sharey = False)
        
        if(self.xscale != "linear" or self.yscale != "linear"):
            warnings.warn("hexbin is broken with scales other than \"linear\"",
                          util.CytoflowViewWarning)

        xscale = util.scale_factory(self.xscale, experiment, self.xchannel)
        yscale = util.scale_factory(self.yscale, experiment, self.ychannel)
        
        for ax in g.axes.flatten():
            ax.set_xscale(self.xscale, **xscale.mpl_params)
            ax.set_yscale(self.yscale, **yscale.mpl_params)
        
        g.map(plt.hexbin, self.xchannel, self.ychannel, **kwargs)
Exemple #39
0
    def estimate(self, experiment, subset = None):
        """
        Estimate the Gaussian mixture model parameters.
        
        Parameters
        ----------
        experiment : Experiment
            The data to use to estimate the mixture parameters
            
        subset : str (default = None)
            If set, a Python expression to determine the subset of the data
            to use to in the estimation.
        """
        
        warn("GaussianMixture1DOp is DEPRECATED.  Please use GaussianMixtureOp.",
             util.CytoflowOpWarning)
        
        if experiment is None:
            raise util.CytoflowOpError('experiment', "No experiment specified")

        if self.channel not in experiment.data:
            raise util.CytoflowOpError('channel',
                                       "Column {0} not found in the experiment"
                                       .format(self.channel))
       
        for b in self.by:
            if b not in experiment.data:
                raise util.CytoflowOpError('by',
                                           "Aggregation metadata {} not found, "
                                           "must be one of {}"
                                           .format(b, experiment.conditions))
            
        if self.num_components == 1 and self.posteriors:
            raise util.CytoflowOpError('num_components',
                                       "If num_components == 1, all posteriors are 1.")
        
        if subset:
            try:
                experiment = experiment.query(subset)
            except Exception as e:
                raise util.CytoflowOpError('subset',
                                           "Subset string '{0}' isn't valid"
                                           .format(subset)) from e
                
            if len(experiment) == 0:
                raise util.CytoflowOpError('subset',
                                           "Subset string '{0}' returned no events"
                                           .format(subset))
                
        if self.by:
            by = sorted(self.by)
            groupby = experiment.data.groupby(by)
        else:
            # use a lambda expression to return a group that contains
            # all the events
            groupby = experiment.data.groupby(lambda _: True)
            
        # get the scale. estimate the scale params for the ENTIRE data set,
        # not subsets we get from groupby().  And we need to save it so that
        # the data is transformed the same way when we apply()
        self._scale = util.scale_factory(self.scale, experiment, channel = self.channel)
        
        gmms = {}
            
        for group, data_subset in groupby:
            if len(data_subset) == 0:
                raise util.CytoflowOpError(None, 
                                           "Group {} had no data".format(group))
            x = data_subset[self.channel].reset_index(drop = True)
            x = self._scale(x)
            
            # drop data that isn't in the scale range
            #x = pd.Series(self._scale(x)).dropna()
            x = x[~np.isnan(x)]
            
            gmm = mixture.GaussianMixture(n_components = self.num_components,
                                          random_state = 1)
            gmm.fit(x[:, np.newaxis])
            
            if not gmm.converged_:
                raise util.CytoflowOpError(None,
                                           "Estimator didn't converge"
                                           " for group {0}"
                                           .format(group))
                
            # to make sure we have a stable ordering, sort the components
            # by the means (so the first component has the lowest mean, 
            # the next component has the next-lowest, etc.)
            
            sort_idx = np.argsort(gmm.means_[:, 0])
            gmm.means_ = gmm.means_[sort_idx]
            gmm.weights_ = gmm.weights_[sort_idx]
            gmm.covariances_ = gmm.covariances_[sort_idx]
           
            gmms[group] = gmm
            
        self._gmms = gmms
Exemple #40
0
    def plot(self, experiment, **kwargs):
        """Plot a bar chart"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if not self.by:
            raise util.CytoflowViewError("Independent variable 'by' not set")
            
        if self.by not in experiment.conditions:
            raise util.CytoflowViewError("Independent variable {0} not in the experiment"
                                    .format(self.by))
        
        if not (experiment.conditions[self.by] == "float" or
                experiment.conditions[self.by] == "int"):
            raise util.CytoflowViewError("by variable {0} isn't numeric"
                                    .format(self.by)) 
            
        if not self.xchannel:
            raise util.CytoflowViewError("X channel isn't set.")
        
        if self.xchannel not in experiment.data:
            raise util.CytoflowViewError("X channel {0} isn't in the experiment"
                                    .format(self.xchannel))
        
        if not self.xfunction:
            raise util.CytoflowViewError("X summary function isn't set")
                
        if not self.ychannel:
            raise util.CytoflowViewError("Y channel isn't set.")
        
        if self.ychannel not in experiment.data:
            raise util.CytoflowViewError("Y channel {0} isn't in the experiment"
                                    .format(self.ychannel))
        
        if not self.yfunction:
            raise util.CytoflowViewError("Y summary function isn't set")
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment")
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment")
        
        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment")        

        kwargs.setdefault('antialiased', True)
        
        if self.subset:
            try:
                data = experiment.query(self.subset)
            except:
                raise util.CytoflowViewError("Subset string '{0}' isn't valid"
                                        .format(self.subset))
                
            if len(data.index) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data
            
        group_vars = [self.by]
        if self.xfacet:
            group_vars.append(self.xfacet)
        if self.yfacet:
            group_vars.append(self.yfacet)
        if self.huefacet:
            group_vars.append(self.huefacet)
            
        g = data.groupby(by = group_vars)
        
        plot_data = pd.DataFrame(
                {self.xchannel : g[self.xchannel].aggregate(self.xfunction), 
                 self.ychannel : g[self.ychannel].aggregate(self.yfunction)}) \
                      .reset_index()
 
        grid = sns.FacetGrid(plot_data,
                             size = 6,
                             aspect = 1.5,
                             col = (self.xfacet if self.xfacet else None),
                             row = (self.yfacet if self.yfacet else None),
                             hue = (self.huefacet if self.huefacet else None),
                             col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                             row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                             hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                             legend_out = False,
                             sharex = False,
                             sharey = False)
        
        xscale = util.scale_factory(self.xscale, experiment, self.xchannel)
        yscale = util.scale_factory(self.yscale, experiment, self.ychannel)
        
        for ax in grid.axes.flatten():
            ax.set_xscale(self.xscale, **xscale.mpl_params)
            ax.set_yscale(self.yscale, **yscale.mpl_params)

        grid.map(plt.plot, self.xchannel, self.ychannel, **kwargs)

        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.
        
        if self.huefacet:
            current_palette = mpl.rcParams['axes.color_cycle']
            if len(grid.hue_names) > len(current_palette):
                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(sns.color_palette("husl", 
                                                                   n_colors = len(grid.hue_names)))
                cax, _ = mpl.colorbar.make_axes(plt.gca())
                norm = mpl.colors.Normalize(vmin = np.min(grid.hue_names), 
                                            vmax = np.max(grid.hue_names), 
                                            clip = False)
                mpl.colorbar.ColorbarBase(cax, cmap = cmap, norm = norm)
                plt.sca(plot_ax)
            else:
                grid.add_legend()
Exemple #41
0
    def estimate(self, experiment, subset = None):
        """
        Estimate the Gaussian mixture model parameters
        """
        
        if not experiment:
            raise util.CytoflowOpError("No experiment specified")

        if self.xchannel not in experiment.data:
            raise util.CytoflowOpError("Column {0} not found in the experiment"
                                  .format(self.xchannel))
            
        if self.ychannel not in experiment.data:
            raise util.CytoflowOpError("Column {0} not found in the experiment"
                                  .format(self.ychannel))
       
        for b in self.by:
            if b not in experiment.data:
                raise util.CytoflowOpError("Aggregation metadata {0} not found"
                                      " in the experiment"
                                      .format(b))
            if len(experiment.data[b].unique()) > 100: #WARNING - magic number
                raise util.CytoflowOpError("More than 100 unique values found for"
                                      " aggregation metadata {0}.  Did you"
                                      " accidentally specify a data channel?"
                                      .format(b))
                
        if self.by:
            groupby = experiment.data.groupby(self.by)
        else:
            # use a lambda expression to return a group that contains
            # all the events
            groupby = experiment.data.groupby(lambda x: True)
            
        # get the scale. estimate the scale params for the ENTIRE data set,
        # not subsets we get from groupby().  And we need to save it so that
        # the data is transformed the same way when we apply()
        self._xscale = util.scale_factory(self.xscale, experiment, self.xchannel)
        self._yscale = util.scale_factory(self.yscale, experiment, self.ychannel)
            
        for group, data_subset in groupby:
            x = data_subset.loc[:, [self.xchannel, self.ychannel]]
            x[self.xchannel] = self._xscale(x[self.xchannel])
            x[self.ychannel] = self._yscale(x[self.ychannel])
            
            # drop data that isn't in the scale range
            x = x[~(np.isnan(x[self.xchannel]) | np.isnan(x[self.ychannel]))]
            x = x.values
            
            gmm = mixture.GMM(n_components = self.num_components,
                              covariance_type = "full",
                              random_state = 1)
            gmm.fit(x)
            
            if not gmm.converged_:
                raise util.CytoflowOpError("Estimator didn't converge"
                                      " for group {0}"
                                      .format(group))
                
            # in the 1D version, we sort the components by the means -- so
            # the first component has the lowest mean, the second component
            # has the next-lowest mean, etc.  that doesn't work in a 2D area,
            # obviously.
            
            # instead, we assume that the clusters are likely (?) to be
            # arranged along *one* of the axes, so we take the |norm| of the
            # x,y mean of each cluster and sort that way.
            
            norms = (gmm.means_[:, 0] ** 2 + gmm.means_[:, 1] ** 2) ** 0.5
            sort_idx = np.argsort(norms)
            gmm.means_ = gmm.means_[sort_idx]
            gmm.weights_ = gmm.weights_[sort_idx]
            gmm.covars_ = gmm.covars_[sort_idx]
            
            self._gmms[group] = gmm
Exemple #42
0
    def plot(self, experiment, **kwargs):
        """Plot a bar chart"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")

        if not self.channel:
            raise util.CytoflowViewError("Channel not specified")
        
        if self.channel not in experiment.data:
            raise util.CytoflowViewError("Channel {0} isn't in the experiment"
                                    .format(self.channel))
        
        if not self.by:
            raise util.CytoflowViewError("Variable not specified")
        
        if not self.by in experiment.conditions:
            raise util.CytoflowViewError("Variable {0} isn't in the experiment")
        
        if not self.function:
            raise util.CytoflowViewError("Function not specified")
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} isn't in the experiment"
                                    .format(self.xfacet))
        
        if self.yfacet and self.yfacet not in experiment.metadata:
            raise util.CytoflowViewError("Y facet {0} isn't in the experiment"
                                    .format(self.yfacet))

        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError("Hue facet {0} isn't in the experiment"
                                    .format(self.huefacet))
        
#         if self.error_bars == 'data' and self.error_function is None:
#             return False
#         
#         if self.error_bars == 'summary' \
#             and (self.error_function is None 
#                  or not self.error_var in experiment.metadata):
#             return False
        
        if self.subset:
            try:
                data = experiment.query(self.subset)
            except:
                raise util.CytoflowViewError("Subset string {0} isn't valid"
                                        .format(self.subset))
                            
            if len(data.index) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data
            
        sns.factorplot(x = self.by,
                       y = self.channel,
                       data = data,
                       size = 6,
                       aspect = 1.5,
                       row = (self.yfacet if self.yfacet else None),
                       col = (self.xfacet if self.xfacet else None),
                       hue = (self.huefacet if self.huefacet else None),
                       col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                       row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                       hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                       # something buggy here.
                       #orient = ("h" if self.orientation == "horizontal" else "v"),
                       estimator = self.function,
                       ci = None,
                       kind = "bar")
        
        scale = util.scale_factory(self.scale, experiment, self.channel)
        
        # because the bottom of a bar chart is "0", masking out bad
        # values on a log scale doesn't work.  we must clip instead.
        if self.scale == "log":
            scale.mode = "clip"

        plt.yscale(self.scale, **scale.mpl_params)