def plot(self, experiment, **kwargs): """ Plot the plots. Parameters ---------- """ if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") view, trait_name = self._strip_trait(self.op.name) if self.xchannel in self.op._scale: xscale = self.op._scale[self.xchannel] else: xscale = util.scale_factory(self.xscale, experiment, channel = self.xchannel) if self.ychannel in self.op._scale: yscale = self.op._scale[self.ychannel] else: yscale = util.scale_factory(self.yscale, experiment, channel = self.ychannel) super(KMeans2DView, view).plot(experiment, annotation_facet = self.op.name, annotation_trait = trait_name, annotations = self.op._kmeans, xscale = xscale, yscale = yscale, **kwargs)
def plot(self, experiment, plot_name=None, **kwargs): """ Parameters ---------- xlim, ylim : (float, float) Set the range of the plot's axis. """ if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") data = self._make_data(experiment) xscale = util.scale_factory(self.xscale, experiment, statistic=self.xstatistic, error_statistic=self.x_error_statistic) yscale = util.scale_factory(self.yscale, experiment, statistic=self.ystatistic, error_statistic=self.y_error_statistic) super().plot(experiment, data, plot_name, xscale=xscale, yscale=yscale, **kwargs)
def plot(self, experiment, **kwargs): """ Plot the plots. Parameters ---------- """ view, trait_name = self._strip_trait(self.op.name) if self.xchannel in self.op._scale: xscale = self.op._scale[self.xchannel] else: xscale = util.scale_factory(self.xscale, experiment, channel=self.xchannel) if self.ychannel in self.op._scale: yscale = self.op._scale[self.ychannel] else: yscale = util.scale_factory(self.yscale, experiment, channel=self.ychannel) super(GaussianMixture2DView, view).plot(experiment, annotation_facet=self.op.name, annotation_trait=trait_name, annotations=self.op._gmms, xscale=xscale, yscale=yscale, **kwargs)
def plot(self, experiment, **kwargs): """ Plot the plots. Parameters ---------- """ if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") if self.op.num_components == 1: annotation_facet = self.op.name + "_1" else: annotation_facet = self.op.name view, trait_name = self._strip_trait(annotation_facet) if self.xchannel in self.op._scale: xscale = self.op._scale[self.xchannel] else: xscale = util.scale_factory(self.xscale, experiment, channel = self.xchannel) if self.ychannel in self.op._scale: yscale = self.op._scale[self.ychannel] else: yscale = util.scale_factory(self.yscale, experiment, channel = self.ychannel) super(GaussianMixture2DView, view).plot(experiment, annotation_facet = annotation_facet, annotation_trait = trait_name, annotations = self.op._gmms, xscale = xscale, yscale = yscale, **kwargs)
def plot(self, experiment, plot_name = None, **kwargs): data = self._make_data(experiment) if not self.variable: raise util.CytoflowViewError('variable', "variable not set") if self.variable not in experiment.conditions: raise util.CytoflowViewError('variable', "variable {0} not in the experiment" .format(self.variable)) if util.is_numeric(experiment[self.variable]): xscale = util.scale_factory(self.xscale, experiment, condition = self.variable) else: xscale = None yscale = util.scale_factory(self.yscale, experiment, statistic = self.statistic, error_statistic = self.error_statistic) super().plot(experiment, data, plot_name, xscale = xscale, yscale = yscale, **kwargs)
def plot(self, experiment, **kwargs): """ Parameters ---------- xlim, ylim : (float, float) Set the range of the plot's axis. """ if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") if not self.xchannel: raise util.CytoflowViewError('xchannel', "Must specify an xchannel") if self.xchannel not in experiment.data: raise util.CytoflowViewError( 'xchannel', "Channel {} not in the experiment".format(self.xchannel)) if not self.ychannel: raise util.CytoflowViewError('ychannel', "Must specify a ychannel") if self.ychannel not in experiment.data: raise util.CytoflowViewError( 'ychannel', "Channel {} not in the experiment".format(self.ychannel)) # get the scale xscale = kwargs.pop('xscale', None) if xscale is None: xscale = util.scale_factory(self.xscale, experiment, channel=self.xchannel) yscale = kwargs.pop('yscale', None) if yscale is None: yscale = util.scale_factory(self.yscale, experiment, channel=self.ychannel) xlim = kwargs.pop('xlim', None) ylim = kwargs.pop('ylim', None) super().plot(experiment, lim={ self.xchannel: xlim, self.ychannel: ylim }, scale={ self.xchannel: xscale, self.ychannel: yscale }, **kwargs)
def plot(self, experiment, **kwargs): """ Parameters ---------- lim : Dict(Str : (float, float)) Set the range of each channel's axis. If unspecified, assume that the limits are the minimum and maximum of the clipped data """ if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") if len(self.channels) == 0: raise util.CytoflowOpError('channels', "Must set at least one channel") if len(self.channels) != len(set(self.channels)): raise util.CytoflowOpError('channels', "Must not duplicate channels") for c in self.channels: if c not in experiment.data: raise util.CytoflowOpError( 'channels', "Channel {0} not found in the experiment".format(c)) for c in self.scale: if c not in self.channels: raise util.CytoflowOpError( 'scale', "Scale set for channel {0}, but it isn't " "in 'channels'".format(c)) # get the scale scale = {} for c in self.channels: if c in self.scale: scale[c] = util.scale_factory(self.scale[c], experiment, channel=c) else: scale[c] = util.scale_factory(util.get_default_scale(), experiment, channel=c) lim = kwargs.pop("lim", {}) for c in self.channels: if c not in lim: lim[c] = None super().plot(experiment, lim=lim, scale=scale, **kwargs)
def plot(self, experiment, **kwargs): """ Parameters ---------- lim : (float, float) Set the range of the plot's data axis. orientation : {'vertical', 'horizontal'} """ if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") if not self.channel: raise util.CytoflowViewError('channel', "Must specify a channel") if self.channel not in experiment.data: raise util.CytoflowViewError( 'channel', "Channel {0} not in the experiment".format(self.channel)) # get the scale scale = kwargs.pop('scale', None) if scale is None: scale = util.scale_factory(self.scale, experiment, channel=self.channel) lim = kwargs.pop("lim", None) super().plot(experiment, lim={self.channel: lim}, scale={self.channel: scale}, **kwargs)
def plot(self, experiment, plot_name=None, **kwargs): """ Parameters ---------- orientation : {'vertical', 'horizontal'} lim : (float, float) Set the range of the plot's axis. """ if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") data = self._make_data(experiment) if not self.variable: raise util.CytoflowViewError('variable', "variable not set") if self.variable not in experiment.conditions: raise util.CytoflowViewError( 'variable', "variable {0} not in the experiment".format(self.variable)) scale = util.scale_factory(self.scale, experiment, statistic=self.statistic, error_statistic=self.error_statistic) super().plot(experiment, data, plot_name=plot_name, scale=scale, **kwargs)
def plot(self, experiment, **kwargs): """ Plot the plots. Parameters ---------- """ if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") annotations = {} for k in self.op._kmeans: annotations[k] = (self.op._kmeans[k], self.op._peaks[k], self.op._cluster_peak[k]) if self.xchannel in self.op._scale: xscale = self.op._scale[self.xchannel] else: xscale = util.scale_factory(self.xscale, experiment, channel=self.xchannel) if self.ychannel in self.op._scale: yscale = self.op._scale[self.ychannel] else: yscale = util.scale_factory(self.yscale, experiment, channel=self.ychannel) if not self.op._kmeans: raise util.CytoflowViewError( None, "Must estimate a model before plotting " "the density plot.") for k in self.op._kmeans: annotations[k] = (self.op._kmeans[k], self.op._peaks[k], self.op._cluster_peak[k], self.op._density[k]) super().plot(experiment, annotations=annotations, xscale=xscale, yscale=yscale, **kwargs)
def plot(self, experiment, plot_name=None, **kwargs): data = self._make_data(experiment) xscale = util.scale_factory(self.xscale, experiment, condition=self.variable) yscale = util.scale_factory(self.yscale, experiment, statistic=self.statistic, error_statistic=self.error_statistic) super().plot(experiment, data, plot_name, xscale=xscale, yscale=yscale, **kwargs)
def plot(self, experiment, **kwargs): """ Parameters ---------- min_quantile : float (>0.0 and <1.0, default = 0.001) Clip data that is less than this quantile. max_quantile : float (>0.0 and <1.0, default = 1.00) Clip data that is greater than this quantile. xlim : (float, float) Set the range of the plot's x axis. """ if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") if not self.channel: raise util.CytoflowViewError('channel', "Must specify a channel") if self.channel not in experiment.data: raise util.CytoflowViewError('channel', "Channel {0} not in the experiment" .format(self.channel)) # get the scale scale = kwargs.pop('scale', None) if scale is None: scale = util.scale_factory(self.scale, experiment, channel = self.channel) # adjust the limits to clip extreme values min_quantile = kwargs.pop("min_quantile", 0.001) max_quantile = kwargs.pop("max_quantile", 1.0) if min_quantile < 0.0 or min_quantile > 1: raise util.CytoflowViewError('min_quantile', "min_quantile must be between 0 and 1") if max_quantile < 0.0 or max_quantile > 1: raise util.CytoflowViewError('max_quantile', "max_quantile must be between 0 and 1") if min_quantile >= max_quantile: raise util.CytoflowViewError('min_quantile', "min_quantile must be less than max_quantile") xlim = kwargs.pop("xlim", None) if xlim is None: xlim = (experiment[self.channel].quantile(min_quantile), experiment[self.channel].quantile(max_quantile)) xlim = [scale.clip(x) for x in xlim] super().plot(experiment, xlim = xlim, xscale = scale, **kwargs)
def test_logicle_estimate(self): """ Test the parameter estimator against the R implementation """ scale = util.scale_factory("logicle", self.ex, channel = "Y2-A") # these are the values the R implementation gives self.assertAlmostEqual(scale.A, 0.0) self.assertAlmostEqual(scale.W, 0.533191950161284)
def test_run(self): scale = util.scale_factory("log", self.ex, channel="Pacific Blue-A") x = scale(20.0) self.assertTrue(isinstance(x, float)) x = scale([20]) self.assertTrue(isinstance(x, list)) x = scale(pd.Series([20])) self.assertTrue(isinstance(x, pd.Series))
def test_logicle_apply(self): """ Make sure the function applies without segfaulting """ scale = util.scale_factory("logicle", self.ex, channel = "Y2-A") x = scale(20.0) self.assertTrue(isinstance(x, float)) x = scale([20]) self.assertTrue(isinstance(x, list)) x = scale(pd.Series([20])) self.assertTrue(isinstance(x, pd.Series))
def _grid_plot(self, experiment, grid, xlim, ylim, xscale, yscale, **kwargs): kwargs.setdefault('antialiased', False) kwargs.setdefault('linewidth', 0) kwargs.setdefault('edgecolors', 'face') kwargs.setdefault('cmap', plt.get_cmap('viridis')) under_color = kwargs.pop('under_color', None) if under_color is not None: kwargs['cmap'].set_under(color=under_color) else: kwargs['cmap'].set_under(color=kwargs['cmap'](0.0)) bad_color = kwargs.pop('bad_color', None) if bad_color is not None: kwargs['cmap'].set_bad(color=kwargs['cmap'](0.0)) gridsize = kwargs.pop('gridsize', 50) xbins = xscale.inverse( np.linspace(xscale(xlim[0]), xscale(xlim[1]), gridsize)) ybins = yscale.inverse( np.linspace(yscale(ylim[0]), yscale(ylim[1]), gridsize)) # set up the range of the color map if 'norm' not in kwargs: data_max = 0 for _, data_ijk in grid.facet_data(): x = data_ijk[self.xchannel] y = data_ijk[self.ychannel] h, _, _ = np.histogram2d(x, y, bins=[xbins, ybins]) data_max = max(data_max, h.max()) hue_scale = util.scale_factory(self.huescale, experiment, data=np.array([1, data_max])) kwargs['norm'] = hue_scale.color_norm() grid.map(_densityplot, self.xchannel, self.ychannel, xbins=xbins, ybins=ybins, **kwargs) return {'cmap': kwargs['cmap'], 'norm': kwargs['norm']}
def plot(self, experiment, **kwargs): if experiment is None: raise util.CytoflowViewError("No experiment specified") if not self.channel: raise util.CytoflowViewError("Must specify a channel") if self.channel not in experiment.data: raise util.CytoflowViewError( "Channel {0} not in the experiment".format(self.channel)) # get the scale scale = kwargs.pop('scale', None) if scale is None: scale = util.scale_factory(self.scale, experiment, channel=self.channel) # adjust the limits to clip extreme values min_quantile = kwargs.pop("min_quantile", 0.001) max_quantile = kwargs.pop("max_quantile", 1.0) if min_quantile < 0.0 or min_quantile > 1: raise util.CytoflowViewError( "min_quantile must be between 0 and 1") if max_quantile < 0.0 or max_quantile > 1: raise util.CytoflowViewError( "max_quantile must be between 0 and 1") if min_quantile >= max_quantile: raise util.CytoflowViewError( "min_quantile must be less than max_quantile") xlim = kwargs.pop("xlim", None) if xlim is None: xlim = (experiment[self.channel].quantile(min_quantile), experiment[self.channel].quantile(max_quantile)) xlim = [scale.clip(x) for x in xlim] super().plot(experiment, xlim=xlim, xscale=scale, **kwargs)
def estimate(self, experiment, subset = None): """ Estimate the Gaussian mixture model parameters Parameters ---------- experiment : Experiment The data to use to estimate the mixture parameters subset : str (default = None) If set, a Python expression to determine the subset of the data to use to in the estimation. """ if experiment is None: raise util.CytoflowOpError('experiment', "No experiment specified") if len(self.channels) == 0: raise util.CytoflowOpError('channels', "Must set at least one channel") if len(self.channels) != len(set(self.channels)): raise util.CytoflowOpError('channels', "Must not duplicate channels") for c in self.channels: if c not in experiment.data: raise util.CytoflowOpError('channels', "Channel {0} not found in the experiment" .format(c)) for c in self.scale: if c not in self.channels: raise util.CytoflowOpError('channels', "Scale set for channel {0}, but it isn't " "in the experiment" .format(c)) for b in self.by: if b not in experiment.data: raise util.CytoflowOpError('by', "Aggregation metadata {} not found, " "must be one of {}" .format(b, experiment.conditions)) if subset: try: experiment = experiment.query(subset) except: raise util.CytoflowViewError('subset', "Subset string '{0}' isn't valid" .format(subset)) if len(experiment) == 0: raise util.CytoflowViewError('subset', "Subset string '{0}' returned no events" .format(subset)) if self.by: groupby = experiment.data.groupby(self.by) else: # use a lambda expression to return a group that contains # all the events groupby = experiment.data.groupby(lambda _: True) # get the scale. estimate the scale params for the ENTIRE data set, # not subsets we get from groupby(). And we need to save it so that # the data is transformed the same way when we apply() for c in self.channels: if c in self.scale: self._scale[c] = util.scale_factory(self.scale[c], experiment, channel = c) else: self._scale[c] = util.scale_factory(util.get_default_scale(), experiment, channel = c) gmms = {} for group, data_subset in groupby: if len(data_subset) == 0: raise util.CytoflowOpError(None, "Group {} had no data" .format(group)) x = data_subset.loc[:, self.channels[:]] for c in self.channels: x[c] = self._scale[c](x[c]) # drop data that isn't in the scale range for c in self.channels: x = x[~(np.isnan(x[c]))] x = x.values gmm = sklearn.mixture.GaussianMixture(n_components = self.num_components, covariance_type = "full", random_state = 1) gmm.fit(x) if not gmm.converged_: raise util.CytoflowOpError(None, "Estimator didn't converge" " for group {0}" .format(group)) # in the 1D version, we sorted the components by the means -- so # the first component has the lowest mean, the second component # has the next-lowest mean, etc. # that doesn't work in the general case. instead, we assume that # the clusters are likely (?) to be arranged along *one* of the # axes, so we take the |norm| of the mean of each cluster and # sort that way. norms = np.sum(gmm.means_ ** 2, axis = 1) ** 0.5 sort_idx = np.argsort(norms) gmm.means_ = gmm.means_[sort_idx] gmm.weights_ = gmm.weights_[sort_idx] gmm.covariances_ = gmm.covariances_[sort_idx] gmm.precisions_ = gmm.precisions_[sort_idx] gmm.precisions_cholesky_ = gmm.precisions_cholesky_[sort_idx] gmms[group] = gmm self._gmms = gmms
def estimate(self, experiment, subset = None): """ Estimate the Gaussian mixture model parameters """ if not experiment: raise util.CytoflowOpError("No experiment specified") if self.channel not in experiment.data: raise util.CytoflowOpError("Column {0} not found in the experiment" .format(self.channel)) for b in self.by: if b not in experiment.data: raise util.CytoflowOpError("Aggregation metadata {0} not found" " in the experiment" .format(b)) if len(experiment.data[b].unique()) > 100: #WARNING - magic number raise util.CytoflowOpError("More than 100 unique values found for" " aggregation metadata {0}. Did you" " accidentally specify a data channel?" .format(b)) if self.by: groupby = experiment.data.groupby(self.by) else: # use a lambda expression to return a group that contains # all the events groupby = experiment.data.groupby(lambda x: True) # get the scale. estimate the scale params for the ENTIRE data set, # not subsets we get from groupby(). And we need to save it so that # the data is transformed the same way when we apply() self._scale = util.scale_factory(self.scale, experiment, self.channel) for group, data_subset in groupby: x = data_subset[self.channel].reset_index(drop = True) x = self._scale(x) # drop data that isn't in the scale range #x = pd.Series(self._scale(x)).dropna() x = x[~np.isnan(x)] gmm = mixture.GMM(n_components = self.num_components, random_state = 1) gmm.fit(x[:, np.newaxis]) if not gmm.converged_: raise util.CytoflowOpError("Estimator didn't converge" " for group {0}" .format(group)) # to make sure we have a stable ordering, sort the components # by the means (so the first component has the lowest mean, # the next component has the next-lowest, etc.) sort_idx = np.argsort(gmm.means_[:, 0]) gmm.means_ = gmm.means_[sort_idx] gmm.weights_ = gmm.weights_[sort_idx] gmm.covars_ = gmm.covars_[sort_idx] self._gmms[group] = gmm
def apply(self, experiment): """ Applies the binning to an experiment. Parameters ---------- experiment : Experiment the old_experiment to which this op is applied Returns ------- Experiment A new experiment with a condition column named :attr:`name`, which contains the location of the left-most edge of the bin that the event is in. If :attr:`bin_count_name` is set, another column is added with that name as well, containing the number of events in the same bin as the event. """ if experiment is None: raise util.CytoflowOpError('experiment', "no experiment specified") if not self.name: raise util.CytoflowOpError('name', "Name is not set") if self.name != util.sanitize_identifier(self.name): raise util.CytoflowOpError( 'name', "Name can only contain letters, numbers and underscores.". format(self.name)) if self.name in experiment.data.columns: raise util.CytoflowOpError( 'name', "Name {} is in the experiment already".format(self.name)) if self.bin_count_name and self.bin_count_name in experiment.data.columns: raise util.CytoflowOpError( 'bin_count_name', "bin_count_name {} is in the experiment already".format( self.bin_count_name)) if not self.channel: raise util.CytoflowOpError('channel', "channel is not set") if self.channel not in experiment.data.columns: raise util.CytoflowOpError( 'channel', "channel {} isn't in the experiment".format(self.channel)) if not self.bin_width: raise util.CytoflowOpError('bin_width', "must set bin width") if not (self.scale == "linear" or self.scale == "log"): raise util.CytoflowOpError( 'scale', "Can only use binning op with linear or log scale") scale = util.scale_factory(self.scale, experiment, channel=self.channel) scaled_min = scale(scale.clip(experiment.data[self.channel]).min()) scaled_max = scale(scale.clip(experiment.data[self.channel]).max()) if self.scale == 'linear': start = 0 else: start = 1 scaled_bins_left = np.arange(start=-1.0 * start, stop=(-1.0 * scaled_min) + self.bin_width, step=self.bin_width) * -1.0 scaled_bins_left = scaled_bins_left[::-1][:-1] scaled_bins_right = np.arange(start=start, stop=scaled_max + self.bin_width, step=self.bin_width) scaled_bins = np.append(scaled_bins_left, scaled_bins_right) if len(scaled_bins) > self._max_num_bins: raise util.CytoflowOpError( None, "Too many bins! To increase this limit, " "change _max_num_bins (currently {})".format( self._max_num_bins)) if len(scaled_bins) < 2: raise util.CytoflowOpError('bin_width', "Must have more than one bin") # now, back into data space bins = scale.inverse(scaled_bins) # reduce to 4 sig figs bins = ['%.4g' % x for x in bins] bins = [float(x) for x in bins] bins = np.array(bins) # put the data in bins bin_idx = np.digitize(experiment.data[self.channel], bins[1:-1]) new_experiment = experiment.clone() new_experiment.add_condition(self.name, "float64", bins[bin_idx]) # keep track of the bins we used, for prettier plotting later. new_experiment.metadata[self.name]["bin_scale"] = self.scale new_experiment.metadata[self.name]["bins"] = bins if self.bin_count_name: # TODO - this is a HUGE memory hog?! # TODO - fix this, then turn it on by default agg_count = new_experiment.data.groupby(self.name).count() agg_count = agg_count[agg_count.columns[0]] # have to make the condition a float64, because if we're in log # space there may be events that have NaN as the bin number. new_experiment.add_condition( self.bin_count_name, "float64", new_experiment[self.name].map(agg_count)) new_experiment.history.append( self.clone_traits(transient=lambda _: True)) return new_experiment
def plot(self, experiment, **kwargs): """Plot a faceted histogram view of a channel""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.channel: raise util.CytoflowViewError("Must specify a channel") if self.channel not in experiment.data: raise util.CytoflowViewError( "Channel {0} not in the experiment".format(self.channel)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError( "X facet {0} not in the experiment".format(self.xfacet)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError( "Y facet {0} not in the experiment".format(self.yfacet)) if self.huefacet and self.huefacet not in experiment.conditions: raise util.CytoflowViewError( "Hue facet {0} not in the experiment".format(self.huefacet)) facets = filter(lambda x: x, [self.xfacet, self.yfacet, self.huefacet]) if len(facets) != len(set(facets)): raise util.CytoflowViewError("Can't reuse facets") col_wrap = kwargs.pop('col_wrap', None) if col_wrap and self.yfacet: raise util.CytoflowViewError( "Can't set yfacet and col_wrap at the same time.") if col_wrap and not self.xfacet: raise util.CytoflowViewError("Must set xfacet to use col_wrap.") if self.subset: try: data = experiment.query(self.subset).data.reset_index() except util.CytoflowError as e: raise util.CytoflowViewError(str(e)) except Exception as e: raise util.CytoflowViewError( "Subset string '{0}' isn't valid".format(self.subset)) if len(data) == 0: raise util.CytoflowViewError( "Subset string '{0}' returned no events".format( self.subset)) else: data = experiment.data # get the scale scale = kwargs.pop('scale', None) if scale is None: scale = util.scale_factory(self.scale, experiment, channel=self.channel) scaled_data = scale(data[self.channel]) kwargs.setdefault('histtype', 'stepfilled') kwargs.setdefault('alpha', 0.5) kwargs.setdefault('antialiased', True) # estimate a "good" number of bins; see cytoflow.utility.num_hist_bins # for a reference. num_bins = util.num_hist_bins(scaled_data) # clip num_bins to (50, 1000) num_bins = max(min(num_bins, 1000), 50) xmin = bottleneck.nanmin(scaled_data) xmax = bottleneck.nanmax(scaled_data) if (self.huefacet and "bins" in experiment.metadata[self.huefacet] and experiment.metadata[self.huefacet]["bin_scale"] == self.scale): # if we color facet by the result of a BinningOp and we don't # match the BinningOp bins with the histogram bins, we get # gnarly aliasing. # each color gets at least one bin. however, if the estimated # number of bins for the histogram is much larger than the # number of colors, sub-divide each color into multiple bins. bins = experiment.metadata[self.huefacet]["bins"] bins = np.append(bins, xmax) num_hues = len(data[self.huefacet].unique()) bins_per_hue = math.ceil(num_bins / num_hues) new_bins = [xmin] for end in [b for b in bins if (b > xmin and b <= xmax)]: new_bins = np.append( new_bins, np.linspace(new_bins[-1], end, bins_per_hue + 1, endpoint=True)[1:]) bins = scale.inverse(new_bins) else: bin_width = (xmax - xmin) / num_bins bins = scale.inverse(np.arange(xmin, xmax, bin_width)) bins = np.append(bins, scale.inverse(xmax)) # take care of a rare rounding error, where the first observation is # less than the first bin or the last observation is more than the last # bin, which makes plt.hist() puke bins[-1] += 1 bins[0] -= 1 kwargs.setdefault('bins', bins) # mask out the data that's not in the scale domain data = data[~np.isnan(scaled_data)] # adjust the limits to clip extreme values min_quantile = kwargs.pop("min_quantile", 0.001) max_quantile = kwargs.pop("max_quantile", 0.999) xlim = kwargs.pop("xlim", None) if xlim is None: xlim = (data[self.channel].quantile(min_quantile), data[self.channel].quantile(max_quantile)) sharex = kwargs.pop("sharex", True) sharey = kwargs.pop("sharey", True) cols = col_wrap if col_wrap else \ len(data[self.xfacet].unique()) if self.xfacet else 1 g = sns.FacetGrid(data, size=6 / cols, aspect=1.5, col=(self.xfacet if self.xfacet else None), row=(self.yfacet if self.yfacet else None), hue=(self.huefacet if self.huefacet else None), col_order=(np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order=(np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order=(np.sort(data[self.huefacet].unique()) if self.huefacet else None), col_wrap=col_wrap, legend_out=False, sharex=sharex, sharey=sharey, xlim=xlim) # set the scale for each set of axes; can't just call plt.xscale() for ax in g.axes.flatten(): ax.set_xscale(self.scale, **scale.mpl_params) legend = kwargs.pop('legend', True) g.map(plt.hist, self.channel, **kwargs) # if we are sharing y axes, make sure the y scale is the same for each if sharey: fig = plt.gcf() fig_y_max = float("-inf") for ax in fig.get_axes(): _, ax_y_max = ax.get_ylim() if ax_y_max > fig_y_max: fig_y_max = ax_y_max for ax in fig.get_axes(): ax.set_ylim(None, fig_y_max) # if we are sharing x axes, make sure the x scale is the same for each if sharex: fig = plt.gcf() fig_x_min = float("inf") fig_x_max = float("-inf") for ax in fig.get_axes(): ax_x_min, ax_x_max = ax.get_xlim() if ax_x_min < fig_x_min: fig_x_min = ax_x_min if ax_x_max > fig_x_max: fig_x_max = ax_x_max for ax in fig.get_axes(): ax.set_xlim(fig_x_min, fig_x_max) # if we have a hue facet, the y scaling is frequently wrong. if self.huefacet: h = np.histogram(data[self.channel], bins=bins) ymax = np.max(h[0]) plt.ylim(0, 1.1 * ymax) # if we have a hue facet and a lot of hues, make a color bar instead # of a super-long legend. if self.huefacet and legend: current_palette = mpl.rcParams['axes.color_cycle'] if util.is_numeric(experiment.data[self.huefacet]) and \ len(g.hue_names) > len(current_palette): plot_ax = plt.gca() cmap = mpl.colors.ListedColormap( sns.color_palette("husl", n_colors=len(g.hue_names))) cax, _ = mpl.colorbar.make_axes(plt.gca()) norm = mpl.colors.Normalize(vmin=np.min(g.hue_names), vmax=np.max(g.hue_names), clip=False) mpl.colorbar.ColorbarBase(cax, cmap=cmap, norm=norm, label=self.huefacet) plt.sca(plot_ax) else: g.add_legend(title=self.huefacet) return g
def apply(self, experiment): """Applies the threshold to an experiment. Parameters ---------- experiment : Experiment the old :class:`Experiment` to which this op is applied Returns ------- Experiment a new :class:'Experiment`, the same as ``old_experiment`` but with a new column of type `bool` with the same as the operation name. The bool is ``True`` if the event's measurement is within the polygon, and ``False`` otherwise. Raises ------ util.CytoflowOpError if for some reason the operation can't be applied to this experiment. The reason is in :attr:`.CytoflowOpError.args` """ if experiment is None: raise util.CytoflowOpError('experiment', "No experiment specified") if self.name in experiment.data.columns: raise util.CytoflowOpError( 'name', "{} is in the experiment already!".format(self.name)) if not self.xchannel: raise util.CytoflowOpError('xchannel', "Must specify an x channel") if not self.ychannel: raise util.CytoflowOpError('ychannel', "Must specify a y channel") if not self.xchannel in experiment.channels: raise util.CytoflowOpError( 'xchannel', "xchannel {0} is not in the experiment".format(self.xchannel)) if not self.ychannel in experiment.channels: raise util.CytoflowOpError( 'ychannel', "ychannel {0} is not in the experiment".format(self.ychannel)) if len(self.vertices) < 3: raise util.CytoflowOpError('vertices', "Must have at least 3 vertices") if any([len(x) != 2 for x in self.vertices]): return util.CytoflowOpError( 'vertices', "All vertices must be lists or tuples " "of length = 2") # make sure name got set! if not self.name: raise util.CytoflowOpError( 'name', "You have to set the Polygon gate's name " "before applying it!") # make sure old_experiment doesn't already have a column named self.name if (self.name in experiment.data.columns): raise util.CytoflowOpError( 'name', "Experiment already contains a column {0}".format(self.name)) # there's a bit of a subtlety here: if the vertices were # selected with an interactive plot, and that plot had scaled # axes, we need to apply that scale function to both the # vertices and the data before looking for path membership xscale = util.scale_factory(self.xscale, experiment, channel=self.xchannel) yscale = util.scale_factory(self.yscale, experiment, channel=self.ychannel) vertices = [(xscale(x), yscale(y)) for (x, y) in self.vertices] data = experiment.data[[self.xchannel, self.ychannel]].copy() data[self.xchannel] = xscale(data[self.xchannel]) data[self.ychannel] = yscale(data[self.ychannel]) # use a matplotlib Path because testing for membership is a fast C fn. path = mpl.path.Path(np.array(vertices)) xy_data = data[[self.xchannel, self.ychannel]].values new_experiment = experiment.clone() new_experiment.add_condition(self.name, "bool", path.contains_points(xy_data)) new_experiment.history.append( self.clone_traits(transient=lambda _: True)) return new_experiment
def plot(self, experiment, **kwargs): """Plot a faceted histogram view of a channel""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.xchannel: raise util.CytoflowViewError("X channel not specified") if self.xchannel not in experiment.data: raise util.CytoflowViewError("X channel {0} not in the experiment" .format(self.xchannel)) if not self.ychannel: raise util.CytoflowViewError("Y channel not specified") if self.ychannel not in experiment.data: raise util.CytoflowViewError("Y channel {0} not in the experiment") if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment") if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment") if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError("Hue facet {0} not in the experiment") if self.subset: try: data = experiment.query(self.subset).data.reset_index() except: raise util.CytoflowViewError("Subset string \'{0}\' not valid") if len(data.index) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events" .format(self.subset)) else: data = experiment.data xscale = util.scale_factory(self.xscale, experiment, self.xchannel) yscale = util.scale_factory(self.yscale, experiment, self.ychannel) kwargs['xscale'] = xscale kwargs['yscale'] = yscale scaled_xdata = xscale(data[self.xchannel]) data = data[~np.isnan(scaled_xdata)] scaled_xdata = scaled_xdata[~np.isnan(scaled_xdata)] scaled_ydata = yscale(data[self.ychannel]) data = data[~np.isnan(scaled_ydata)] scaled_ydata = scaled_ydata[~np.isnan(scaled_ydata)] # find good bin counts num_xbins = util.num_hist_bins(scaled_xdata) num_ybins = util.num_hist_bins(scaled_ydata) # there are situations where this produces an unreasonable estimate. if num_xbins > self._max_bins: warnings.warn("Capping X bins to {}! To increase this limit, " "change _max_bins" .format(self._max_bins)) num_xbins = self._max_bins if num_ybins > self._max_bins: warnings.warn("Capping Y bins to {}! To increase this limit, " "change _max_bins" .format(self._max_bins)) num_ybins = self._max_bins kwargs.setdefault('smoothed', False) if kwargs['smoothed']: num_xbins /= 2 num_ybins /= 2 _, xedges, yedges = np.histogram2d(scaled_xdata, scaled_ydata, bins = (num_xbins, num_ybins)) kwargs['xedges'] = xscale.inverse(xedges) kwargs['yedges'] = yscale.inverse(yedges) kwargs.setdefault('antialiased', True) g = sns.FacetGrid(data, size = 6, aspect = 1.5, col = (self.xfacet if self.xfacet else None), row = (self.yfacet if self.yfacet else None), hue = (self.huefacet if self.huefacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), sharex = False, sharey = False) for ax in g.axes.flatten(): ax.set_xscale(self.xscale, **xscale.mpl_params) ax.set_yscale(self.yscale, **yscale.mpl_params) g.map(_hist2d, self.xchannel, self.ychannel, **kwargs) # if we have a hue facet and a lot of hues, make a color bar instead # of a super-long legend. if self.huefacet: current_palette = mpl.rcParams['axes.color_cycle'] if len(g.hue_names) > len(current_palette): plot_ax = plt.gca() cmap = mpl.colors.ListedColormap(sns.color_palette("husl", n_colors = len(g.hue_names))) cax, _ = mpl.colorbar.make_axes(plt.gca()) norm = mpl.colors.Normalize(vmin = np.min(g.hue_names), vmax = np.max(g.hue_names), clip = False) mpl.colorbar.ColorbarBase(cax, cmap = cmap, norm = norm, label = self.huefacet) plt.sca(plot_ax) else: g.add_legend(title = self.huefacet)
def apply(self, experiment): """ Applies the binning to an experiment. Parameters ---------- experiment : Experiment the old_experiment to which this op is applied Returns ------- Experiment A new experiment with a condition column named :attr:`name`, which contains the location of the left-most edge of the bin that the event is in. If :attr:`bin_count_name` is set, another column is added with that name as well, containing the number of events in the same bin as the event. """ if experiment is None: raise util.CytoflowOpError('experiment', "no experiment specified") if not self.name: raise util.CytoflowOpError('name', "Name is not set") if self.name in experiment.data.columns: raise util.CytoflowOpError( 'name', "Name {} is in the experiment already".format(self.name)) if self.bin_count_name and self.bin_count_name in experiment.data.columns: raise util.CytoflowOpError( 'bin_count_name', "bin_count_name {} is in the experiment already".format( self.bin_count_name)) if not self.channel: raise util.CytoflowOpError('channel', "channel is not set") if self.channel not in experiment.data.columns: raise util.CytoflowOpError( 'channel', "channel {} isn't in the experiment".format(self.channel)) if not self.num_bins and not self.bin_width: raise util.CytoflowOpError('num_bins', "must set either bin number or width") if self.bin_width \ and not (self.scale == "linear" or self.scale == "log"): raise util.CytoflowOpError( 'scale', "Can only use bin_width with linear or log scale") scale = util.scale_factory(self.scale, experiment, channel=self.channel) scaled_data = scale(experiment.data[self.channel]) scaled_min = bn.nanmin(scaled_data) scaled_max = bn.nanmax(scaled_data) num_bins = self.num_bins if self.num_bins else \ (scaled_max - scaled_min) / self.bin_width if num_bins > self._max_num_bins: raise util.CytoflowOpError( None, "Too many bins! To increase this limit, " "change _max_num_bins (currently {})".format( self._max_num_bins)) scaled_bins = np.linspace(start=scaled_min, stop=scaled_max, num=num_bins) if len(scaled_bins) < 2: raise util.CytoflowOpError('num_bins', "Must have more than one bin") # put the data in bins bin_idx = np.digitize(scaled_data, scaled_bins[1:-1]) # now, back into data space bins = scale.inverse(scaled_bins) new_experiment = experiment.clone() new_experiment.add_condition(self.name, "float", bins[bin_idx]) # keep track of the bins we used, for prettier plotting later. new_experiment.metadata[self.name]["bin_scale"] = self.scale new_experiment.metadata[self.name]["bins"] = bins if self.bin_count_name: # TODO - this is a HUGE memory hog?! # TODO - fix this, then turn it on by default agg_count = new_experiment.data.groupby(self.name).count() agg_count = agg_count[agg_count.columns[0]] # have to make the condition a float64, because if we're in log # space there may be events that have NaN as the bin number. new_experiment.add_condition( self.bin_count_name, "float64", new_experiment[self.name].map(agg_count)) new_experiment.history.append( self.clone_traits(transient=lambda _: True)) return new_experiment
def plot(self, experiment, **kwargs): """Plot a bar chart""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.variable: raise util.CytoflowViewError("variable not set") if self.variable not in experiment.conditions: raise util.CytoflowViewError("variable {0} not in the experiment" .format(self.variable)) if not (experiment.conditions[self.variable] == "float" or experiment.conditions[self.variable] == "int"): raise util.CytoflowViewError("variable {0} isn't numeric" .format(self.variable)) if not self.xchannel: raise util.CytoflowViewError("X channel isn't set.") if self.xchannel not in experiment.data: raise util.CytoflowViewError("X channel {0} isn't in the experiment" .format(self.xchannel)) if not self.xfunction: raise util.CytoflowViewError("X summary function isn't set") if not self.ychannel: raise util.CytoflowViewError("Y channel isn't set.") if self.ychannel not in experiment.data: raise util.CytoflowViewError("Y channel {0} isn't in the experiment" .format(self.ychannel)) if not self.yfunction: raise util.CytoflowViewError("Y summary function isn't set") if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment") if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment") if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError("Hue facet {0} not in the experiment") if self.x_error_bars and self.x_error_bars != 'data' \ and self.x_error_bars not in experiment.conditions: raise util.CytoflowViewError("x_error_bars must be either 'data' or " "a condition in the experiment") if self.x_error_bars and not self.x_error_function: raise util.CytoflowViewError("didn't set an x error function") if self.y_error_bars and self.y_error_bars != 'data' \ and self.y_error_bars not in experiment.conditions: raise util.CytoflowViewError("y_error_bars must be either 'data' or " "a condition in the experiment") if self.y_error_bars and not self.y_error_function: raise util.CytoflowViewError("didn't set an error function") kwargs.setdefault('antialiased', True) if self.subset: try: data = experiment.query(self.subset).data.reset_index() except: raise util.CytoflowViewError("Subset string '{0}' isn't valid" .format(self.subset)) if len(data.index) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events" .format(self.subset)) else: data = experiment.data group_vars = [self.variable] if self.xfacet: group_vars.append(self.xfacet) if self.yfacet: group_vars.append(self.yfacet) if self.huefacet: group_vars.append(self.huefacet) g = data.groupby(by = group_vars) plot_data = pd.DataFrame( {self.xchannel : g[self.xchannel].aggregate(self.xfunction), self.ychannel : g[self.ychannel].aggregate(self.yfunction)}) \ .reset_index() # compute the x error statistic if self.x_error_bars: if self.x_error_bars == 'data': # compute the error statistic on the same subsets as the summary # statistic error_stat = g[self.xchannel].aggregate(self.x_error_function).reset_index() else: # subdivide the data set further by the error_bars condition err_vars = list(group_vars) err_vars.append(self.x_error_bars) # apply the summary statistic to each subgroup data_g = data.groupby(by = err_vars) data_stat = data_g[self.xchannel].aggregate(self.xfunction).reset_index() # apply the error function to the summary statistics err_g = data_stat.groupby(by = group_vars) error_stat = err_g[self.xchannel].aggregate(self.x_error_function).reset_index() x_err_name = util.random_string(6) plot_data[x_err_name] = error_stat[self.xchannel] # compute the y error statistic if self.y_error_bars: if self.y_error_bars == 'data': # compute the error statistic on the same subsets as the summary # statistic error_stat = g[self.ychannel].aggregate(self.y_error_function).reset_index() else: # subdivide the data set further by the error_bars condition err_vars = list(group_vars) err_vars.append(self.y_error_bars) # apply the summary statistic to each subgroup data_g = data.groupby(by = err_vars) data_stat = data_g[self.ychannel].aggregate(self.yfunction).reset_index() # apply the error function to the summary statistics err_g = data_stat.groupby(by = group_vars) error_stat = err_g[self.ychannel].aggregate(self.y_error_function).reset_index() y_err_name = util.random_string(6) plot_data[y_err_name] = error_stat[self.ychannel] grid = sns.FacetGrid(plot_data, size = 6, aspect = 1.5, col = (self.xfacet if self.xfacet else None), row = (self.yfacet if self.yfacet else None), hue = (self.huefacet if self.huefacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), legend_out = False, sharex = False, sharey = False) xscale = util.scale_factory(self.xscale, experiment, self.xchannel) yscale = util.scale_factory(self.yscale, experiment, self.ychannel) for ax in grid.axes.flatten(): ax.set_xscale(self.xscale, **xscale.mpl_params) ax.set_yscale(self.yscale, **yscale.mpl_params) # plot the error bars first so the axis labels don't get overwritten if self.x_error_bars: grid.map(_x_error_bars, self.xchannel, x_err_name, self.ychannel) if self.y_error_bars: grid.map(_y_error_bars, self.xchannel, self.ychannel, y_err_name) grid.map(plt.plot, self.xchannel, self.ychannel, **kwargs) # if we have a hue facet and a lot of hues, make a color bar instead # of a super-long legend. if self.huefacet: current_palette = mpl.rcParams['axes.color_cycle'] if len(grid.hue_names) > len(current_palette): plot_ax = plt.gca() cmap = mpl.colors.ListedColormap(sns.color_palette("husl", n_colors = len(grid.hue_names))) cax, _ = mpl.colorbar.make_axes(plt.gca()) norm = mpl.colors.Normalize(vmin = np.min(grid.hue_names), vmax = np.max(grid.hue_names), clip = False) mpl.colorbar.ColorbarBase(cax, cmap = cmap, norm = norm, label = self.huefacet) plt.sca(plot_ax) else: grid.add_legend(title = self.huefacet)
def plot(self, experiment = None, **kwargs): """Plot a faceted histogram view of a channel""" if experiment is None: raise util.CytoflowViewError("No experiment specified") if not self.op.controls: raise util.CytoflowViewError("No controls specified") if not self.op.spillover: raise util.CytoflowViewError("No spillover matrix specified") kwargs.setdefault('histtype', 'stepfilled') kwargs.setdefault('alpha', 0.5) kwargs.setdefault('antialiased', True) plt.figure() # the completely arbitrary ordering of the channels channels = list(set([x for (x, _) in list(self.op.spillover.keys())])) num_channels = len(channels) for from_idx, from_channel in enumerate(channels): for to_idx, to_channel in enumerate(channels): if from_idx == to_idx: continue check_tube(self.op.controls[from_channel], experiment) tube_exp = ImportOp(tubes = [Tube(file = self.op.controls[from_channel])], channels = {experiment.metadata[c]["fcs_name"] : c for c in experiment.channels}, name_metadata = experiment.metadata['name_metadata']).apply() # apply previous operations for op in experiment.history: tube_exp = op.apply(tube_exp) # subset it if self.subset: try: tube_exp = tube_exp.query(self.subset) except Exception as e: raise util.CytoflowOpError("Subset string '{0}' isn't valid" .format(self.subset)) from e if len(tube_exp.data) == 0: raise util.CytoflowOpError("Subset string '{0}' returned no events" .format(self.subset)) tube_data = tube_exp.data xscale = util.scale_factory("logicle", tube_exp, channel = from_channel) yscale = util.scale_factory("logicle", tube_exp, channel = to_channel) plt.subplot(num_channels, num_channels, from_idx + (to_idx * num_channels) + 1) plt.xscale('logicle', **xscale.mpl_params) plt.yscale('logicle', **yscale.mpl_params) plt.xlabel(from_channel) plt.ylabel(to_channel) plt.scatter(tube_data[from_channel], tube_data[to_channel], alpha = 0.1, s = 1, marker = 'o') xs = np.logspace(-1, math.log(tube_data[from_channel].max(), 10)) ys = xs * self.op.spillover[(from_channel, to_channel)] plt.plot(xs, ys, 'g-', lw=3) plt.tight_layout(pad = 0.8)
def plot(self, experiment, **kwargs): """Plot a faceted 2d kernel density estimate""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.xchannel: raise util.CytoflowViewError("X channel not specified") if self.xchannel not in experiment.data: raise util.CytoflowViewError("X channel {0} not in the experiment" .format(self.xchannel)) if not self.ychannel: raise util.CytoflowViewError("Y channel not specified") if self.ychannel not in experiment.data: raise util.CytoflowViewError("Y channel {0} not in the experiment" .format(self.ychannel)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment" .format(self.xfacet)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment" .format(self.yfacet)) if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError("Hue facet {0} not in the experiment" .format(self.huefacet)) if self.subset: try: data = experiment.query(self.subset).data.reset_index() except: raise util.CytoflowViewError("Subset string '{0}' isn't valid" .format(self.subset)) if len(data.index) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events" .format(self.subset)) else: data = experiment.data kwargs.setdefault('shade', False) kwargs.setdefault('min_alpha', 0.2) kwargs.setdefault('max_alpha', 0.9) kwargs.setdefault('n_levels', 10) g = sns.FacetGrid(data, size = 6, aspect = 1.5, col = (self.xfacet if self.xfacet else None), row = (self.yfacet if self.yfacet else None), hue = (self.huefacet if self.huefacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), legend_out = False, sharex = False, sharey = False) xscale = util.scale_factory(self.xscale, experiment, self.xchannel) yscale = util.scale_factory(self.yscale, experiment, self.ychannel) for ax in g.axes.flatten(): ax.set_xscale(self.xscale, **xscale.mpl_params) ax.set_yscale(self.yscale, **yscale.mpl_params) kwargs['xscale'] = xscale kwargs['yscale'] = yscale g.map(_bivariate_kdeplot, self.xchannel, self.ychannel, **kwargs) if self.huefacet: g.add_legend(title = self.huefacet)
def plot(self, experiment, **kwargs): """Plot a faceted histogram view of a channel""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.channel: raise util.CytoflowViewError("Must specify a channel") if self.channel not in experiment.data: raise util.CytoflowViewError("Channel {0} not in the experiment" .format(self.channel)) if not self.variable: raise util.CytoflowViewError("Variable not specified") if not self.variable in experiment.conditions: raise util.CytoflowViewError("Variable {0} isn't in the experiment") if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment" .format(self.xfacet)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment" .format(self.yfacet)) if self.huefacet and self.huefacet not in experiment.conditions: raise util.CytoflowViewError("Hue facet {0} not in the experiment" .format(self.huefacet)) if self.subset: try: data = experiment.query(self.subset).data.reset_index() except: raise util.CytoflowViewError("Subset string '{0}' isn't valid" .format(self.subset)) if len(data.index) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events" .format(self.subset)) else: data = experiment.data.copy() # get the scale scale = util.scale_factory(self.scale, experiment, self.channel) kwargs['data_scale'] = scale kwargs.setdefault('orient', 'v') g = sns.FacetGrid(data, size = 6, aspect = 1.5, col = (self.xfacet if self.xfacet else None), row = (self.yfacet if self.yfacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), legend_out = False, sharex = False, sharey = False) # set the scale for each set of axes; can't just call plt.xscale() for ax in g.axes.flatten(): if kwargs['orient'] == 'h': ax.set_xscale(self.scale, **scale.mpl_params) else: ax.set_yscale(self.scale, **scale.mpl_params) # this order-dependent thing weirds me out. if kwargs['orient'] == 'h': violin_args = [self.channel, self.variable] else: violin_args = [self.variable, self.channel] if self.huefacet: violin_args.append(self.huefacet) g.map(_violinplot, *violin_args, order = np.sort(data[self.variable].unique()), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), **kwargs) if self.huefacet: g.add_legend(title = self.huefacet)
def estimate(self, experiment, subset = None): """ Split the data set into bins and determine which ones to keep. Parameters ---------- experiment : Experiment The :class:`.Experiment` to use to estimate the gate parameters. subset : Str (default = None) If set, determine the gate parameters on only a subset of the ``experiment`` parameter. """ if experiment is None: raise util.CytoflowOpError('experiment', "No experiment specified") if self.xchannel not in experiment.data: raise util.CytoflowOpError('xchannel', "Column {0} not found in the experiment" .format(self.xchannel)) if self.ychannel not in experiment.data: raise util.CytoflowOpError('ychannel', "Column {0} not found in the experiment" .format(self.ychannel)) if self.min_quantile > 1.0: raise util.CytoflowOpError('min_quantile', "min_quantile must be <= 1.0") if self.max_quantile > 1.0: raise util.CytoflowOpError('max_quantile', "max_quantile must be <= 1.0") if not (self.max_quantile > self.min_quantile): raise util.CytoflowOpError('max_quantile', "max_quantile must be > min_quantile") if self.keep > 1.0: raise util.CytoflowOpError('keep', "keep must be <= 1.0") for b in self.by: if b not in experiment.conditions: raise util.CytoflowOpError('by', "Aggregation metadata {} not found, " "must be one of {}" .format(b, experiment.conditions)) if subset: try: experiment = experiment.query(subset) except: raise util.CytoflowOpError('subset', "Subset string '{0}' isn't valid" .format(subset)) if len(experiment) == 0: raise util.CytoflowOpError('subset', "Subset string '{0}' returned no events" .format(subset)) if self.by: groupby = experiment.data.groupby(self.by) else: # use a lambda expression to return a group that contains # all the events groupby = experiment.data.groupby(lambda _: True) # get the scale. estimate the scale params for the ENTIRE data set, # not subsets we get from groupby(). And we need to save it so that # the data is transformed the same way when we apply() self._xscale = xscale = util.scale_factory(self.xscale, experiment, channel = self.xchannel) self._yscale = yscale = util.scale_factory(self.yscale, experiment, channel = self.ychannel) xlim = (xscale.clip(experiment[self.xchannel].quantile(self.min_quantile)), xscale.clip(experiment[self.xchannel].quantile(self.max_quantile))) ylim = (yscale.clip(experiment[self.ychannel].quantile(self.min_quantile)), yscale.clip(experiment[self.ychannel].quantile(self.max_quantile))) self._xbins = xbins = xscale.inverse(np.linspace(xscale(xlim[0]), xscale(xlim[1]), self.bins)) self._ybins = ybins = yscale.inverse(np.linspace(yscale(ylim[0]), yscale(ylim[1]), self.bins)) for group, group_data in groupby: if len(group_data) == 0: raise util.CytoflowOpError('by', "Group {} had no data" .format(group)) h, _, _ = np.histogram2d(group_data[self.xchannel], group_data[self.ychannel], bins=[xbins, ybins]) h = scipy.ndimage.filters.gaussian_filter(h, sigma = self.sigma) i = scipy.stats.rankdata(h, method = "ordinal") - 1 i = np.unravel_index(np.argsort(-i), h.shape) goal_count = self.keep * len(group_data) curr_count = 0 num_bins = 0 while(curr_count < goal_count and num_bins < i[0].size): curr_count += h[i[0][num_bins], i[1][num_bins]] num_bins += 1 self._keep_xbins[group] = i[0][0:num_bins] self._keep_ybins[group] = i[1][0:num_bins] self._histogram[group] = h
def plot(self, experiment, **kwargs): """Plot a faceted histogram view of a channel""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.channel: raise util.CytoflowViewError("Must specify a channel") if self.channel not in experiment.data: raise util.CytoflowViewError("Channel {0} not in the experiment" .format(self.channel)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment" .format(self.xfacet)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment" .format(self.yfacet)) if self.huefacet and self.huefacet not in experiment.conditions: raise util.CytoflowViewError("Hue facet {0} not in the experiment" .format(self.huefacet)) if self.subset: try: data = experiment.query(self.subset).data.reset_index() except: raise util.CytoflowViewError("Subset string '{0}' isn't valid" .format(self.subset)) if len(data.index) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events" .format(self.subset)) else: data = experiment.data #print scaled_data kwargs.setdefault('shade', True) kwargs['label'] = self.name g = sns.FacetGrid(data, size = 6, aspect = 1.5, col = (self.xfacet if self.xfacet else None), row = (self.yfacet if self.yfacet else None), hue = (self.huefacet if self.huefacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), legend_out = False, sharex = False, sharey = False) # get the scale kwargs['scale'] = scale = util.scale_factory(self.scale, experiment, self.channel) # set the scale for each set of axes; can't just call plt.xscale() for ax in g.axes.flatten(): ax.set_xscale(self.scale, **scale.mpl_params) g.map(_univariate_kdeplot, self.channel, **kwargs) if self.huefacet: g.add_legend(title = self.huefacet)
def plot(self, experiment, **kwargs): """Plot a faceted scatter plot view of a channel""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.xchannel: raise util.CytoflowViewError("X channel not specified") if self.xchannel not in experiment.data: raise util.CytoflowViewError("X channel {0} not in the experiment".format(self.xchannel)) if not self.ychannel: raise util.CytoflowViewError("Y channel not specified") if self.ychannel not in experiment.data: raise util.CytoflowViewError("Y channel {0} not in the experiment".format(self.ychannel)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment".format(self.xfacet)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment".format(self.yfacet)) if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError("Hue facet {0} not in the experiment".format(self.huefacet)) if self.subset: try: data = experiment.query(self.subset) except: raise util.CytoflowViewError("Subset string '{0}' isn't valid".format(self.subset)) if len(data.index) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events".format(self.subset)) else: data = experiment.data kwargs.setdefault("alpha", 0.25) kwargs.setdefault("s", 2) kwargs.setdefault("marker", "o") kwargs.setdefault("antialiased", True) g = sns.FacetGrid( data, size=6, aspect=1.5, col=(self.xfacet if self.xfacet else None), row=(self.yfacet if self.yfacet else None), hue=(self.huefacet if self.huefacet else None), col_order=(np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order=(np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order=(np.sort(data[self.huefacet].unique()) if self.huefacet else None), legend_out=False, sharex=False, sharey=False, ) xscale = util.scale_factory(self.xscale, experiment, self.xchannel) yscale = util.scale_factory(self.yscale, experiment, self.ychannel) for ax in g.axes.flatten(): ax.set_xscale(self.xscale, **xscale.mpl_params) ax.set_yscale(self.yscale, **yscale.mpl_params) g.map(plt.scatter, self.xchannel, self.ychannel, **kwargs) # if we have a hue facet and a lot of hues, make a color bar instead # of a super-long legend. if self.huefacet: current_palette = mpl.rcParams["axes.color_cycle"] if len(g.hue_names) > len(current_palette): plot_ax = plt.gca() cmap = mpl.colors.ListedColormap(sns.color_palette("husl", n_colors=len(g.hue_names))) cax, _ = mpl.colorbar.make_axes(plt.gca()) norm = mpl.colors.Normalize(vmin=np.min(g.hue_names), vmax=np.max(g.hue_names), clip=False) mpl.colorbar.ColorbarBase(cax, cmap=cmap, norm=norm) plt.sca(plot_ax) else: g.add_legend()
def plot(self, experiment, **kwargs): """Plot a faceted histogram view of a channel""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.channel: raise util.CytoflowViewError("Must specify a channel") if self.channel not in experiment.data: raise util.CytoflowViewError("Channel {0} not in the experiment" .format(self.channel)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment" .format(self.xfacet)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment" .format(self.yfacet)) if self.huefacet and self.huefacet not in experiment.conditions: raise util.CytoflowViewError("Hue facet {0} not in the experiment" .format(self.huefacet)) if self.subset: try: data = experiment.query(self.subset).data.reset_index() except: raise util.CytoflowViewError("Subset string '{0}' isn't valid" .format(self.subset)) if len(experiment.data) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events" .format(self.subset)) else: data = experiment.data # get the scale scale = util.scale_factory(self.scale, experiment, self.channel) scaled_data = scale(data[self.channel]) #print scaled_data kwargs.setdefault('histtype', 'stepfilled') kwargs.setdefault('alpha', 0.5) kwargs.setdefault('antialiased', True) # estimate a "good" number of bins; see cytoflow.utility.num_hist_bins # for a reference. num_bins = util.num_hist_bins(scaled_data) # clip num_bins to (50, 1000) num_bins = max(min(num_bins, 1000), 50) xmin = bottleneck.nanmin(scaled_data) xmax = bottleneck.nanmax(scaled_data) if (self.huefacet and "bins" in experiment.metadata[self.huefacet] and experiment.metadata[self.huefacet]["bin_scale"] == self.scale): # if we color facet by the result of a BinningOp and we don't # match the BinningOp bins with the histogram bins, we get # gnarly aliasing. # each color gets at least one bin. however, if the estimated # number of bins for the histogram is much larger than the # number of colors, sub-divide each color into multiple bins. bins = experiment.metadata[self.huefacet]["bins"] bins = np.append(bins, xmax) num_hues = len(data[self.huefacet].unique()) bins_per_hue = math.ceil(num_bins / num_hues) new_bins = [xmin] for end in [b for b in bins if (b > xmin and b <= xmax)]: new_bins = np.append(new_bins, np.linspace(new_bins[-1], end, bins_per_hue + 1, endpoint = True)[1:]) bins = scale.inverse(new_bins) else: bin_width = (xmax - xmin) / num_bins bins = scale.inverse(np.arange(xmin, xmax, bin_width)) bins = np.append(bins, scale.inverse(xmax)) # take care of a rare rounding error, where the last observation is # a liiiitle bit more than the last bin, which makes plt.hist() puke bins[-1] += 1 kwargs.setdefault('bins', bins) # mask out the data that's not in the scale domain data = data[~np.isnan(scaled_data)] g = sns.FacetGrid(data, size = 6, aspect = 1.5, col = (self.xfacet if self.xfacet else None), row = (self.yfacet if self.yfacet else None), hue = (self.huefacet if self.huefacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), legend_out = False, sharex = False, sharey = False) # set the scale for each set of axes; can't just call plt.xscale() for ax in g.axes.flatten(): ax.set_xscale(self.scale, **scale.mpl_params) g.map(plt.hist, self.channel, **kwargs) # if we have a hue facet and a lot of hues, make a color bar instead # of a super-long legend. if self.huefacet: current_palette = mpl.rcParams['axes.color_cycle'] if len(g.hue_names) > len(current_palette): plot_ax = plt.gca() cmap = mpl.colors.ListedColormap(sns.color_palette("husl", n_colors = len(g.hue_names))) cax, _ = mpl.colorbar.make_axes(plt.gca()) norm = mpl.colors.Normalize(vmin = np.min(g.hue_names), vmax = np.max(g.hue_names), clip = False) mpl.colorbar.ColorbarBase(cax, cmap = cmap, norm = norm, label = self.huefacet) plt.sca(plot_ax) else: g.add_legend(title = self.huefacet)
def plot(self, experiment = None, **kwargs): """ Plot a diagnostic of the bleedthrough model computation. """ if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") if not self.op.controls: raise util.CytoflowViewError('op', "No controls specified") if not self.op.spillover: raise util.CytoflowViewError('op', "No spillover matrix specified") kwargs.setdefault('histtype', 'stepfilled') kwargs.setdefault('alpha', 0.5) kwargs.setdefault('antialiased', True) plt.figure() # the completely arbitrary ordering of the channels channels = list(set([x for (x, _) in list(self.op.spillover.keys())])) num_channels = len(channels) for from_idx, from_channel in enumerate(channels): for to_idx, to_channel in enumerate(channels): if from_idx == to_idx: continue tube_data = self.op._sample[from_channel] # for ReadTheDocs, which doesn't have swig import sys if sys.modules['cytoflow.utility.logicle_ext.Logicle'].__name__ != 'cytoflow.utility.logicle_ext.Logicle': scale_name = 'log' else: scale_name = 'logicle' xscale = util.scale_factory(scale_name, experiment, channel = from_channel) yscale = util.scale_factory(scale_name, experiment, channel = to_channel) plt.subplot(num_channels, num_channels, from_idx + (to_idx * num_channels) + 1) plt.xscale(scale_name, **xscale.mpl_params) plt.yscale(scale_name, **yscale.mpl_params) plt.xlabel(from_channel) plt.ylabel(to_channel) plt.scatter(tube_data[from_channel], tube_data[to_channel], alpha = 1, s = 1, marker = 'o') xs = np.logspace(-1, math.log(tube_data[from_channel].max(), 10)) ys = xs * self.op.spillover[(from_channel, to_channel)] plt.plot(xs, ys, 'g-', lw=3) plt.tight_layout(pad = 0.8)
def apply(self, experiment): """Applies the binning to an experiment. Parameters ---------- experiment : Experiment the old_experiment to which this op is applied Returns ------- a new experiment, the same as old_experiment but with a new column the same as the operation name. The bool is True if the event's measurement in self.channel is greater than self.low and less than self.high; it is False otherwise. """ if not experiment: raise util.CytoflowOpError("no experiment specified") if not self.name: raise util.CytoflowOpError("name is not set") if self.name in experiment.data.columns: raise util.CytoflowOpError("name {0} is in the experiment already" .format(self.name)) if self.bin_count_name and self.bin_count_name in experiment.data.columns: raise util.CytoflowOpError("bin_count_name {0} is in the experiment already" .format(self.bin_count_name)) if not self.channel: raise util.CytoflowOpError("channel is not set") if self.channel not in experiment.data.columns: raise util.CytoflowOpError("channel {0} isn't in the experiment" .format(self.channel)) if self.num_bins is Undefined and self.bin_width is Undefined: raise util.CytoflowOpError("must set either bin number or width") if self.num_bins is Undefined \ and not (self.scale == "linear" or self.scale == "log"): raise util.CytoflowOpError("Can only use bin_width with linear or log scale") scale = util.scale_factory(self.scale, experiment, self.channel) scaled_data = scale(experiment.data[self.channel]) channel_min = bn.nanmin(scaled_data) channel_max = bn.nanmax(scaled_data) num_bins = self.num_bins if self.num_bins is not Undefined else \ (channel_max - channel_min) / self.bin_width bins = np.linspace(start = channel_min, stop = channel_max, num = num_bins) # bins need to be internal; drop the first and last one bins = bins[1:-1] new_experiment = experiment.clone() new_experiment.add_condition(self.name, "int", np.digitize(scaled_data, bins)) # if we're log-scaled (for example), don't label data that isn't # showable on a log scale! new_experiment.data.ix[np.isnan(scaled_data), self.name] = np.NaN # keep track of the bins we used, for pretty plotting later. new_experiment.metadata[self.name]["bin_scale"] = self.scale new_experiment.metadata[self.name]["bins"] = bins if self.bin_count_name: # TODO - this is a HUGE memory hog?! agg_count = new_experiment.data.groupby(self.name).count() agg_count = agg_count[agg_count.columns[0]] # have to make the condition a float64, because if we're in log # space there may be events that have NaN as the bin number. new_experiment.add_condition( self.bin_count_name, "float64", new_experiment[self.name].map(agg_count)) new_experiment.history.append(self.clone_traits()) return new_experiment
def plot(self, experiment, **kwargs): """Plot a faceted 2d kernel density estimate""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.xchannel: raise util.CytoflowViewError("X channel not specified") if self.xchannel not in experiment.data: raise util.CytoflowViewError( "X channel {0} not in the experiment".format(self.xchannel)) if not self.ychannel: raise util.CytoflowViewError("Y channel not specified") if self.ychannel not in experiment.data: raise util.CytoflowViewError( "Y channel {0} not in the experiment".format(self.ychannel)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError( "X facet {0} not in the experiment".format(self.xfacet)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError( "Y facet {0} not in the experiment".format(self.yfacet)) if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError( "Hue facet {0} not in the experiment".format(self.huefacet)) facets = filter(lambda x: x, [self.xfacet, self.yfacet, self.huefacet]) if len(facets) != len(set(facets)): raise util.CytoflowViewError("Can't reuse facets") col_wrap = kwargs.pop('col_wrap', None) if col_wrap and self.yfacet: raise util.CytoflowViewError( "Can't set yfacet and col_wrap at the same time.") if col_wrap and not self.xfacet: raise util.CytoflowViewError("Must set xfacet to use col_wrap.") if self.subset: try: data = experiment.query(self.subset).data.reset_index() except: raise util.CytoflowViewError( "Subset string '{0}' isn't valid".format(self.subset)) if len(data) == 0: raise util.CytoflowViewError( "Subset string '{0}' returned no events".format( self.subset)) else: data = experiment.data kwargs.setdefault('shade', False) kwargs.setdefault('min_alpha', 0.2) kwargs.setdefault('max_alpha', 0.9) kwargs.setdefault('n_levels', 10) xscale = util.scale_factory(self.xscale, experiment, channel=self.xchannel) yscale = util.scale_factory(self.yscale, experiment, channel=self.ychannel) # adjust the limits to clip extreme values min_quantile = kwargs.pop("min_quantile", 0.001) max_quantile = kwargs.pop("max_quantile", 0.999) xlim = kwargs.pop("xlim", None) if xlim is None: xlim = (xscale.clip(data[self.xchannel].quantile(min_quantile)), xscale.clip(data[self.xchannel].quantile(max_quantile))) ylim = kwargs.pop("ylim", None) if ylim is None: ylim = (yscale.clip(data[self.ychannel].quantile(min_quantile)), yscale.clip(data[self.ychannel].quantile(max_quantile))) sharex = kwargs.pop('sharex', True) sharey = kwargs.pop('sharey', True) cols = col_wrap if col_wrap else \ len(data[self.xfacet].unique()) if self.xfacet else 1 g = sns.FacetGrid(data, size=(6 / cols), aspect=1.5, col=(self.xfacet if self.xfacet else None), row=(self.yfacet if self.yfacet else None), hue=(self.huefacet if self.huefacet else None), col_order=(np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order=(np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order=(np.sort(data[self.huefacet].unique()) if self.huefacet else None), col_wrap=col_wrap, legend_out=False, sharex=sharex, sharey=sharey, xlim=xlim, ylim=ylim) for ax in g.axes.flatten(): ax.set_xscale(self.xscale, **xscale.mpl_params) ax.set_yscale(self.yscale, **yscale.mpl_params) kwargs['xscale'] = xscale kwargs['yscale'] = yscale g.map(_bivariate_kdeplot, self.xchannel, self.ychannel, **kwargs) # if we are sharing y axes, make sure the y scale is the same for each if sharey: fig = plt.gcf() fig_y_min = float("inf") fig_y_max = float("-inf") for ax in fig.get_axes(): ax_y_min, ax_y_max = ax.get_ylim() if ax_y_min < fig_y_min: fig_y_min = ax_y_min if ax_y_max > fig_y_max: fig_y_max = ax_y_max for ax in fig.get_axes(): ax.set_ylim(fig_y_min, fig_y_max) # if we have are sharing x axes, make sure the x scale is the same for each if sharex: fig = plt.gcf() fig_x_min = float("inf") fig_x_max = float("-inf") for ax in fig.get_axes(): ax_x_min, ax_x_max = ax.get_xlim() if ax_x_min < fig_x_min: fig_x_min = ax_x_min if ax_x_max > fig_x_max: fig_x_max = ax_x_max for ax in fig.get_axes(): ax.set_xlim(fig_x_min, fig_x_max) if self.huefacet: current_palette = mpl.rcParams['axes.color_cycle'] if util.is_numeric(experiment.data[self.huefacet]) and \ len(g.hue_names) > len(current_palette): plot_ax = plt.gca() cmap = mpl.colors.ListedColormap( sns.color_palette("husl", n_colors=len(g.hue_names))) cax, _ = mpl.colorbar.make_axes(plt.gca()) hue_scale = util.scale_factory(self.huescale, experiment, condition=self.huefacet) mpl.colorbar.ColorbarBase(cax, cmap=cmap, norm=hue_scale.color_norm(), label=self.huefacet) plt.sca(plot_ax) else: g.add_legend(title=self.huefacet)
def plot(self, experiment, data, **kwargs): """ Base function for facetted plotting Parameters ---------- experiment: Experiment The :class:`.Experiment` to plot using this view. title : str Set the plot title xlabel, ylabel : str Set the X and Y axis labels huelabel : str Set the label for the hue facet (in the legend) legend : bool Plot a legend for the color or hue facet? Defaults to `True`. sharex, sharey : bool If there are multiple subplots, should they share axes? Defaults to `True`. col_wrap : int If `xfacet` is set and `yfacet` is not set, you can "wrap" the subplots around so that they form a multi-row grid by setting `col_wrap` to the number of columns you want. sns_style : {"darkgrid", "whitegrid", "dark", "white", "ticks"} Which `seaborn` style to apply to the plot? Default is `whitegrid`. sns_context : {"paper", "notebook", "talk", "poster"} Which `seaborn` context to use? Controls the scaling of plot elements such as tick labels and the legend. Default is `talk`. despine : Bool Remove the top and right axes from the plot? Default is `True`. Other Parameters ---------------- cmap : matplotlib colormap If plotting a huefacet with many values, use this color map instead of the default. norm : matplotlib.colors.Normalize If plotting a huefacet with many values, use this object for color scale normalization. """ if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") col_wrap = kwargs.pop('col_wrap', None) if col_wrap is not None and self.yfacet: raise util.CytoflowViewError( 'yfacet', "Can't set yfacet and col_wrap at the same time.") if col_wrap is not None and not self.xfacet: raise util.CytoflowViewError('xfacet', "Must set xfacet to use col_wrap.") if col_wrap is not None and col_wrap < 2: raise util.CytoflowViewError(None, "col_wrap must be None or > 1") title = kwargs.pop("title", None) xlabel = kwargs.pop("xlabel", None) ylabel = kwargs.pop("ylabel", None) huelabel = kwargs.pop("huelabel", self.huefacet) sharex = kwargs.pop("sharex", True) sharey = kwargs.pop("sharey", True) legend = kwargs.pop('legend', True) sns_style = kwargs.pop('sns_style', 'whitegrid') sns_context = kwargs.pop('sns_context', 'talk') despine = kwargs.pop('despine', False) cols = col_wrap if col_wrap else \ len(data[self.xfacet].unique()) if self.xfacet else 1 sns.set_style(sns_style) sns.set_context(sns_context) g = sns.FacetGrid(data, size=6 / cols, aspect=1.5, col=(self.xfacet if self.xfacet else None), row=(self.yfacet if self.yfacet else None), hue=(self.huefacet if self.huefacet else None), col_order=(np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order=(np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order=(np.sort(data[self.huefacet].unique()) if self.huefacet else None), col_wrap=col_wrap, legend_out=False, sharex=sharex, sharey=sharey) plot_ret = self._grid_plot(experiment=experiment, grid=g, **kwargs) kwargs.update(plot_ret) xscale = kwargs.pop("xscale", None) yscale = kwargs.pop("yscale", None) xlim = kwargs.pop("xlim", None) ylim = kwargs.pop("ylim", None) for ax in g.axes.flatten(): if xscale: ax.set_xscale(xscale.name, **xscale.mpl_params) if yscale: ax.set_yscale(yscale.name, **yscale.mpl_params) if xlim: ax.set_xlim(xlim) if ylim: ax.set_ylim(ylim) # if we are sharing x axes, make sure the x limits are the same for each if sharex: fig = plt.gcf() fig_x_min = float("inf") fig_x_max = float("-inf") for ax in fig.get_axes(): ax_x_min, ax_x_max = ax.get_xlim() if ax_x_min < fig_x_min: fig_x_min = ax_x_min if ax_x_max > fig_x_max: fig_x_max = ax_x_max for ax in fig.get_axes(): ax.set_xlim(fig_x_min, fig_x_max) # if we are sharing y axes, make sure the y limits are the same for each if sharey: fig = plt.gcf() fig_y_max = float("-inf") for ax in fig.get_axes(): _, ax_y_max = ax.get_ylim() if ax_y_max > fig_y_max: fig_y_max = ax_y_max for ax in fig.get_axes(): ax.set_ylim(None, fig_y_max) # if we have a hue facet and a lot of hues, make a color bar instead # of a super-long legend. cmap = kwargs.pop('cmap', None) norm = kwargs.pop('norm', None) legend_data = kwargs.pop('legend_data', None) if legend: if cmap and norm: plot_ax = plt.gca() cax, _ = mpl.colorbar.make_axes(plt.gcf().get_axes()) mpl.colorbar.ColorbarBase(cax, cmap, norm) plt.sca(plot_ax) elif self.huefacet: current_palette = mpl.rcParams['axes.prop_cycle'] if util.is_numeric(data[self.huefacet]) and \ len(g.hue_names) > len(current_palette): cmap = mpl.colors.ListedColormap( sns.color_palette("husl", n_colors=len(g.hue_names))) hue_scale = util.scale_factory( self.huescale, experiment, data=data[self.huefacet].values) plot_ax = plt.gca() cax, _ = mpl.colorbar.make_axes(plt.gcf().get_axes()) mpl.colorbar.ColorbarBase(cax, cmap=cmap, norm=hue_scale.norm(), label=huelabel) plt.sca(plot_ax) else: g.add_legend(title=huelabel, legend_data=legend_data) ax = g.axes.flat[0] legend = ax.legend_ for lh in legend.legendHandles: lh.set_alpha(1.0) if title: plt.title(title) if xlabel == "": xlabel = None if ylabel == "": ylabel = None g.set_axis_labels(xlabel, ylabel) sns.despine(top=despine, right=despine, bottom=False, left=False)
def estimate(self, experiment, subset=None): """ Estimate the Gaussian mixture model parameters """ if experiment is None: raise util.CytoflowOpError("No experiment specified") if len(self.channels) == 0: raise util.CytoflowOpError("Must set at least one channel") for c in self.channels: if c not in experiment.data: raise util.CytoflowOpError( "Channel {0} not found in the experiment".format(c)) for c in self.scale: if c not in self.channels: raise util.CytoflowOpError( "Scale set for channel {0}, but it isn't " "in the experiment".format(c)) for b in self.by: if b not in experiment.data: raise util.CytoflowOpError("Aggregation metadata {0} not found" " in the experiment".format(b)) if len(experiment.data[b].unique()) > 100: #WARNING - magic number raise util.CytoflowOpError( "More than 100 unique values found for" " aggregation metadata {0}. Did you" " accidentally specify a data channel?".format(b)) if subset: try: experiment = experiment.query(subset) except: raise util.CytoflowViewError( "Subset string '{0}' isn't valid".format(subset)) if len(experiment) == 0: raise util.CytoflowViewError( "Subset string '{0}' returned no events".format(subset)) if self.by: groupby = experiment.data.groupby(self.by) else: # use a lambda expression to return a group that contains # all the events groupby = experiment.data.groupby(lambda _: True) # get the scale. estimate the scale params for the ENTIRE data set, # not subsets we get from groupby(). And we need to save it so that # the data is transformed the same way when we apply() for c in self.channels: if c in self.scale: self._scale[c] = util.scale_factory(self.scale[c], experiment, channel=c) # if self.scale[c] == 'log': # self._scale[c].mode = 'mask' else: self._scale[c] = util.scale_factory(util.get_default_scale(), experiment, channel=c) for data_group, data_subset in groupby: if len(data_subset) == 0: raise util.CytoflowOpError( "Group {} had no data".format(data_group)) x = data_subset.loc[:, self.channels[:]] for c in self.channels: x[c] = self._scale[c](x[c]) # drop data that isn't in the scale range for c in self.channels: x = x[~(np.isnan(x[c]))] x = x.values #### choose the number of clusters and fit the kmeans num_clusters = [ util.num_hist_bins(x[:, c]) for c in range(len(self.channels)) ] num_clusters = np.ceil(np.median(num_clusters)) num_clusters = int(num_clusters) self._kmeans[data_group] = kmeans = \ sklearn.cluster.MiniBatchKMeans(n_clusters = num_clusters) kmeans.fit(x) x_labels = kmeans.predict(x) d = len(self.channels) #### use the kmeans centroids to parameterize a finite gaussian #### mixture model which estimates the density function d = len(self.channels) s0 = np.zeros([d, d]) for j in range(d): r = x[d].max() - x[d].min() s0[j, j] = (r / (num_clusters**(1. / d)))**0.5 means = [] weights = [] normals = [] beta_max = [] for k in range(num_clusters): xk = x[x_labels == k] num_k = np.sum(x_labels == k) weight_k = num_k / len(x_labels) mu = xk.mean(axis=0) means.append(mu) s = np.cov(xk, rowvar=False) el = num_k / (num_clusters + num_k) s_smooth = el * self.h * s + (1.0 - el) * self.h0 * s0 n = scipy.stats.multivariate_normal(mean=mu, cov=s_smooth) weights.append(weight_k) normals.append(lambda x, n=n: n.pdf(x)) # get appropriate step size for peak finding min_b = np.inf for b in np.diagonal(s_smooth): if np.sqrt(b) < min_b: min_b = np.sqrt(b) beta_max.append(b) self._normals[data_group] = normals self._density[ data_group] = density = lambda x, weights=weights, normals=normals: np.sum( [w * n(x) for w, n in zip(weights, normals)], axis=0) ### use optimization on the finite gmm to find the local peak for ### each kmeans cluster peaks = [] peak_clusters = [] # peak idx --> list of clusters min_mu = [np.inf] * len(self.channels) max_mu = [-1.0 * np.inf] * len(self.channels) for k in range(num_clusters): mu = means[k] for ci in range(len(self.channels)): if mu[ci] < min_mu[ci]: min_mu[ci] = mu[ci] if mu[ci] > max_mu[ci]: max_mu[ci] = mu[ci] constraints = [] for ci, c in enumerate(self.channels): constraints.append({ 'type': 'ineq', 'fun': lambda x, min_mu=min_mu[ci]: x - min_mu }) constraints.append({ 'type': 'ineq', 'fun': lambda x, max_mu=max_mu[ci]: max_mu - x }) for k in range(num_clusters): mu = means[k] f = lambda x: -1.0 * density(x) res = scipy.optimize.minimize(f, mu, method='COBYLA', constraints=constraints, options={ 'rhobeg': beta_max[k], 'maxiter': 5000 }) if not res.success: raise util.CytoflowOpError( "Peak finding failed for cluster {}: {}".format( k, res.message)) # ### The peak-searching algorithm from the paper. works fine, # ### but slow! we get similar results with the COBYLA # ### optimization method from scipy, using an appropriate rho # x0 = x = means[k] # k0 = k # b = beta_max[k] / 10.0 # Nsuc = 0 # n = 0 # # while(n < 1000): # # df = scipy.misc.derivative(density, x, 1e-6) # df = statsmodels.tools.numdiff.approx_fprime(x, density) # if np.linalg.norm(df) < 1e-3: # break # # y = x + b * df / np.linalg.norm(df) # if density(y) <= density(x): # Nsuc = 0 # b = b / 2.0 # continue # # Nsuc += 1 # if Nsuc >= 2: # b = min(2*b, beta_max[k]) # # ky = kmeans.predict(y[np.newaxis, :])[0] # if ky == k: # x = y # else: # k = ky # b = beta_max[k] / 10.0 # mu = means[k] # if density(mu) > density(y): # x = mu # else: # x = y # # n += 1 # # # # print("{} --> {}, {}".format(x0, x, n)) merged = False for pi, p in enumerate(peaks): if np.linalg.norm(p - res.x) < (1e-2): peak_clusters[pi].append(k) merged = True break if not merged: peak_clusters.append([k]) peaks.append(res.x) self._peaks[data_group] = peaks ### merge peaks that are sufficiently close groups = [[x] for x in range(len(peaks))] peak_groups = [x for x in range(len(peaks)) ] # peak idx --> group idx def max_tol(x, y): f = lambda a: density(a[np.newaxis, :]) # lx = kmeans.predict(x[np.newaxis, :])[0] # ly = kmeans.predict(y[np.newaxis, :])[0] n = len(x) n_scale = 1 # n_scale = np.sqrt(((nx + ny) / 2.0) / (n / num_clusters)) def tol(t): zt = x + t * (y - x) fhat_zt = f(x) + t * (f(y) - f(x)) return -1.0 * abs((f(zt) - fhat_zt) / fhat_zt) * n_scale res = scipy.optimize.minimize_scalar(tol, bounds=[0, 1], method='Bounded') if res.status != 0: raise util.CytoflowOpError( "tol optimization failed for {}, {}".format(x, y)) return -1.0 * res.fun def nearest_neighbor_dist(k): min_dist = np.inf for i in range(num_clusters): if i == k: continue dist = np.linalg.norm(means[k] - means[i]) if dist < min_dist: min_dist = dist return min_dist sk = [nearest_neighbor_dist(x) for x in range(num_clusters)] def s(x): k = kmeans.predict(x[np.newaxis, :])[0] return sk[k] def can_merge(g, h): for pg in g: for ph in h: vg = peaks[pg] vh = peaks[ph] dist_gh = np.linalg.norm(vg - vh) if max_tol(vg, vh) < self.tol and dist_gh / ( s(vg) + s(vh)) <= self.merge_dist: return True return False while True: if len(groups) == 1: break # find closest mergable groups min_dist = np.inf for gi in range(len(groups)): g = groups[gi] for hi in range(gi + 1, len(groups)): h = groups[hi] if can_merge(g, h): dist_gh = np.inf for pg in g: vg = peaks[pg] for ph in h: vh = peaks[ph] # print("vg {} vh {}".format(vg, vh)) dist_gh = min(dist_gh, np.linalg.norm(vg - vh)) if dist_gh < min_dist: min_gi = gi min_hi = hi min_dist = dist_gh if min_dist == np.inf: break # merge the groups groups[min_gi].extend(groups[min_hi]) for g in groups[min_hi]: peak_groups[g] = min_gi del groups[min_hi] cluster_group = [0] * num_clusters cluster_peaks = [0] * num_clusters for gi, g in enumerate(groups): for p in g: for cluster in peak_clusters[p]: cluster_group[cluster] = gi cluster_peaks[cluster] = p self._peaks[data_group] = peaks self._cluster_peak[data_group] = cluster_peaks self._cluster_group[data_group] = cluster_group
def plot(self, experiment, **kwargs): """Plot a faceted histogram view of a channel""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.xchannel: raise util.CytoflowViewError("X channel not specified") if self.xchannel not in experiment.data: raise util.CytoflowViewError("X channel {0} not in the experiment" .format(self.xchannel)) if not self.ychannel: raise util.CytoflowViewError("Y channel not specified") if self.ychannel not in experiment.data: raise util.CytoflowViewError("Y channel {0} not in the experiment") if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment") if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment") if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError("Hue facet {0} not in the experiment") if self.subset: try: data = experiment.query(self.subset) except: raise util.CytoflowViewError("Subset string \'{0}\' not valid") if len(data.index) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events" .format(self.subset)) else: data = experiment.data #kwargs.setdefault('histtype', 'stepfilled') #kwargs.setdefault('alpha', 0.5) kwargs.setdefault('edgecolor', 'none') #kwargs.setdefault('mincnt', 1) #kwargs.setdefault('bins', 'log') kwargs.setdefault('antialiased', True) xmin, xmax = (np.amin(data[self.xchannel]), np.amax(data[self.xchannel])) ymin, ymax = (np.amin(data[self.ychannel]), np.amax(data[self.ychannel])) # to avoid issues with singular data, expand the min/max pairs xmin, xmax = mtrans.nonsingular(xmin, xmax, expander=0.1) ymin, ymax = mtrans.nonsingular(ymin, ymax, expander=0.1) extent = (xmin, xmax, ymin, ymax) kwargs.setdefault('extent', extent) xbins = util.num_hist_bins(experiment[self.xchannel]) ybins = util.num_hist_bins(experiment[self.ychannel]) bins = np.mean([xbins, ybins]) kwargs.setdefault('bins', bins) # Do not move above. don't ask. g = sns.FacetGrid(data, size = 6, aspect = 1.5, col = (self.xfacet if self.xfacet else None), row = (self.yfacet if self.yfacet else None), hue = (self.huefacet if self.huefacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), sharex = False, sharey = False) if(self.xscale != "linear" or self.yscale != "linear"): warnings.warn("hexbin is broken with scales other than \"linear\"", util.CytoflowViewWarning) xscale = util.scale_factory(self.xscale, experiment, self.xchannel) yscale = util.scale_factory(self.yscale, experiment, self.ychannel) for ax in g.axes.flatten(): ax.set_xscale(self.xscale, **xscale.mpl_params) ax.set_yscale(self.yscale, **yscale.mpl_params) g.map(plt.hexbin, self.xchannel, self.ychannel, **kwargs)
def estimate(self, experiment, subset = None): """ Estimate the Gaussian mixture model parameters. Parameters ---------- experiment : Experiment The data to use to estimate the mixture parameters subset : str (default = None) If set, a Python expression to determine the subset of the data to use to in the estimation. """ warn("GaussianMixture1DOp is DEPRECATED. Please use GaussianMixtureOp.", util.CytoflowOpWarning) if experiment is None: raise util.CytoflowOpError('experiment', "No experiment specified") if self.channel not in experiment.data: raise util.CytoflowOpError('channel', "Column {0} not found in the experiment" .format(self.channel)) for b in self.by: if b not in experiment.data: raise util.CytoflowOpError('by', "Aggregation metadata {} not found, " "must be one of {}" .format(b, experiment.conditions)) if self.num_components == 1 and self.posteriors: raise util.CytoflowOpError('num_components', "If num_components == 1, all posteriors are 1.") if subset: try: experiment = experiment.query(subset) except Exception as e: raise util.CytoflowOpError('subset', "Subset string '{0}' isn't valid" .format(subset)) from e if len(experiment) == 0: raise util.CytoflowOpError('subset', "Subset string '{0}' returned no events" .format(subset)) if self.by: by = sorted(self.by) groupby = experiment.data.groupby(by) else: # use a lambda expression to return a group that contains # all the events groupby = experiment.data.groupby(lambda _: True) # get the scale. estimate the scale params for the ENTIRE data set, # not subsets we get from groupby(). And we need to save it so that # the data is transformed the same way when we apply() self._scale = util.scale_factory(self.scale, experiment, channel = self.channel) gmms = {} for group, data_subset in groupby: if len(data_subset) == 0: raise util.CytoflowOpError(None, "Group {} had no data".format(group)) x = data_subset[self.channel].reset_index(drop = True) x = self._scale(x) # drop data that isn't in the scale range #x = pd.Series(self._scale(x)).dropna() x = x[~np.isnan(x)] gmm = mixture.GaussianMixture(n_components = self.num_components, random_state = 1) gmm.fit(x[:, np.newaxis]) if not gmm.converged_: raise util.CytoflowOpError(None, "Estimator didn't converge" " for group {0}" .format(group)) # to make sure we have a stable ordering, sort the components # by the means (so the first component has the lowest mean, # the next component has the next-lowest, etc.) sort_idx = np.argsort(gmm.means_[:, 0]) gmm.means_ = gmm.means_[sort_idx] gmm.weights_ = gmm.weights_[sort_idx] gmm.covariances_ = gmm.covariances_[sort_idx] gmms[group] = gmm self._gmms = gmms
def plot(self, experiment, **kwargs): """Plot a bar chart""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.by: raise util.CytoflowViewError("Independent variable 'by' not set") if self.by not in experiment.conditions: raise util.CytoflowViewError("Independent variable {0} not in the experiment" .format(self.by)) if not (experiment.conditions[self.by] == "float" or experiment.conditions[self.by] == "int"): raise util.CytoflowViewError("by variable {0} isn't numeric" .format(self.by)) if not self.xchannel: raise util.CytoflowViewError("X channel isn't set.") if self.xchannel not in experiment.data: raise util.CytoflowViewError("X channel {0} isn't in the experiment" .format(self.xchannel)) if not self.xfunction: raise util.CytoflowViewError("X summary function isn't set") if not self.ychannel: raise util.CytoflowViewError("Y channel isn't set.") if self.ychannel not in experiment.data: raise util.CytoflowViewError("Y channel {0} isn't in the experiment" .format(self.ychannel)) if not self.yfunction: raise util.CytoflowViewError("Y summary function isn't set") if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment") if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment") if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError("Hue facet {0} not in the experiment") kwargs.setdefault('antialiased', True) if self.subset: try: data = experiment.query(self.subset) except: raise util.CytoflowViewError("Subset string '{0}' isn't valid" .format(self.subset)) if len(data.index) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events" .format(self.subset)) else: data = experiment.data group_vars = [self.by] if self.xfacet: group_vars.append(self.xfacet) if self.yfacet: group_vars.append(self.yfacet) if self.huefacet: group_vars.append(self.huefacet) g = data.groupby(by = group_vars) plot_data = pd.DataFrame( {self.xchannel : g[self.xchannel].aggregate(self.xfunction), self.ychannel : g[self.ychannel].aggregate(self.yfunction)}) \ .reset_index() grid = sns.FacetGrid(plot_data, size = 6, aspect = 1.5, col = (self.xfacet if self.xfacet else None), row = (self.yfacet if self.yfacet else None), hue = (self.huefacet if self.huefacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), legend_out = False, sharex = False, sharey = False) xscale = util.scale_factory(self.xscale, experiment, self.xchannel) yscale = util.scale_factory(self.yscale, experiment, self.ychannel) for ax in grid.axes.flatten(): ax.set_xscale(self.xscale, **xscale.mpl_params) ax.set_yscale(self.yscale, **yscale.mpl_params) grid.map(plt.plot, self.xchannel, self.ychannel, **kwargs) # if we have a hue facet and a lot of hues, make a color bar instead # of a super-long legend. if self.huefacet: current_palette = mpl.rcParams['axes.color_cycle'] if len(grid.hue_names) > len(current_palette): plot_ax = plt.gca() cmap = mpl.colors.ListedColormap(sns.color_palette("husl", n_colors = len(grid.hue_names))) cax, _ = mpl.colorbar.make_axes(plt.gca()) norm = mpl.colors.Normalize(vmin = np.min(grid.hue_names), vmax = np.max(grid.hue_names), clip = False) mpl.colorbar.ColorbarBase(cax, cmap = cmap, norm = norm) plt.sca(plot_ax) else: grid.add_legend()
def estimate(self, experiment, subset = None): """ Estimate the Gaussian mixture model parameters """ if not experiment: raise util.CytoflowOpError("No experiment specified") if self.xchannel not in experiment.data: raise util.CytoflowOpError("Column {0} not found in the experiment" .format(self.xchannel)) if self.ychannel not in experiment.data: raise util.CytoflowOpError("Column {0} not found in the experiment" .format(self.ychannel)) for b in self.by: if b not in experiment.data: raise util.CytoflowOpError("Aggregation metadata {0} not found" " in the experiment" .format(b)) if len(experiment.data[b].unique()) > 100: #WARNING - magic number raise util.CytoflowOpError("More than 100 unique values found for" " aggregation metadata {0}. Did you" " accidentally specify a data channel?" .format(b)) if self.by: groupby = experiment.data.groupby(self.by) else: # use a lambda expression to return a group that contains # all the events groupby = experiment.data.groupby(lambda x: True) # get the scale. estimate the scale params for the ENTIRE data set, # not subsets we get from groupby(). And we need to save it so that # the data is transformed the same way when we apply() self._xscale = util.scale_factory(self.xscale, experiment, self.xchannel) self._yscale = util.scale_factory(self.yscale, experiment, self.ychannel) for group, data_subset in groupby: x = data_subset.loc[:, [self.xchannel, self.ychannel]] x[self.xchannel] = self._xscale(x[self.xchannel]) x[self.ychannel] = self._yscale(x[self.ychannel]) # drop data that isn't in the scale range x = x[~(np.isnan(x[self.xchannel]) | np.isnan(x[self.ychannel]))] x = x.values gmm = mixture.GMM(n_components = self.num_components, covariance_type = "full", random_state = 1) gmm.fit(x) if not gmm.converged_: raise util.CytoflowOpError("Estimator didn't converge" " for group {0}" .format(group)) # in the 1D version, we sort the components by the means -- so # the first component has the lowest mean, the second component # has the next-lowest mean, etc. that doesn't work in a 2D area, # obviously. # instead, we assume that the clusters are likely (?) to be # arranged along *one* of the axes, so we take the |norm| of the # x,y mean of each cluster and sort that way. norms = (gmm.means_[:, 0] ** 2 + gmm.means_[:, 1] ** 2) ** 0.5 sort_idx = np.argsort(norms) gmm.means_ = gmm.means_[sort_idx] gmm.weights_ = gmm.weights_[sort_idx] gmm.covars_ = gmm.covars_[sort_idx] self._gmms[group] = gmm
def plot(self, experiment, **kwargs): """Plot a bar chart""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.channel: raise util.CytoflowViewError("Channel not specified") if self.channel not in experiment.data: raise util.CytoflowViewError("Channel {0} isn't in the experiment" .format(self.channel)) if not self.by: raise util.CytoflowViewError("Variable not specified") if not self.by in experiment.conditions: raise util.CytoflowViewError("Variable {0} isn't in the experiment") if not self.function: raise util.CytoflowViewError("Function not specified") if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} isn't in the experiment" .format(self.xfacet)) if self.yfacet and self.yfacet not in experiment.metadata: raise util.CytoflowViewError("Y facet {0} isn't in the experiment" .format(self.yfacet)) if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError("Hue facet {0} isn't in the experiment" .format(self.huefacet)) # if self.error_bars == 'data' and self.error_function is None: # return False # # if self.error_bars == 'summary' \ # and (self.error_function is None # or not self.error_var in experiment.metadata): # return False if self.subset: try: data = experiment.query(self.subset) except: raise util.CytoflowViewError("Subset string {0} isn't valid" .format(self.subset)) if len(data.index) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events" .format(self.subset)) else: data = experiment.data sns.factorplot(x = self.by, y = self.channel, data = data, size = 6, aspect = 1.5, row = (self.yfacet if self.yfacet else None), col = (self.xfacet if self.xfacet else None), hue = (self.huefacet if self.huefacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), # something buggy here. #orient = ("h" if self.orientation == "horizontal" else "v"), estimator = self.function, ci = None, kind = "bar") scale = util.scale_factory(self.scale, experiment, self.channel) # because the bottom of a bar chart is "0", masking out bad # values on a log scale doesn't work. we must clip instead. if self.scale == "log": scale.mode = "clip" plt.yscale(self.scale, **scale.mpl_params)