def plot(self, experiment, **kwargs): """Plot a faceted histogram view of a channel""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.channel: raise util.CytoflowViewError("Must specify a channel") if self.channel not in experiment.data: raise util.CytoflowViewError("Channel {0} not in the experiment" .format(self.channel)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment" .format(self.xfacet)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment" .format(self.yfacet)) if self.huefacet and self.huefacet not in experiment.conditions: raise util.CytoflowViewError("Hue facet {0} not in the experiment" .format(self.huefacet)) if self.subset: try: data = experiment.query(self.subset).data.reset_index() except: raise util.CytoflowViewError("Subset string '{0}' isn't valid" .format(self.subset)) if len(experiment.data) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events" .format(self.subset)) else: data = experiment.data # get the scale scale = util.scale_factory(self.scale, experiment, self.channel) scaled_data = scale(data[self.channel]) #print scaled_data kwargs.setdefault('histtype', 'stepfilled') kwargs.setdefault('alpha', 0.5) kwargs.setdefault('antialiased', True) # estimate a "good" number of bins; see cytoflow.utility.num_hist_bins # for a reference. num_bins = util.num_hist_bins(scaled_data) # clip num_bins to (50, 1000) num_bins = max(min(num_bins, 1000), 50) xmin = bottleneck.nanmin(scaled_data) xmax = bottleneck.nanmax(scaled_data) if (self.huefacet and "bins" in experiment.metadata[self.huefacet] and experiment.metadata[self.huefacet]["bin_scale"] == self.scale): # if we color facet by the result of a BinningOp and we don't # match the BinningOp bins with the histogram bins, we get # gnarly aliasing. # each color gets at least one bin. however, if the estimated # number of bins for the histogram is much larger than the # number of colors, sub-divide each color into multiple bins. bins = experiment.metadata[self.huefacet]["bins"] bins = np.append(bins, xmax) num_hues = len(data[self.huefacet].unique()) bins_per_hue = math.ceil(num_bins / num_hues) new_bins = [xmin] for end in [b for b in bins if (b > xmin and b <= xmax)]: new_bins = np.append(new_bins, np.linspace(new_bins[-1], end, bins_per_hue + 1, endpoint = True)[1:]) bins = scale.inverse(new_bins) else: bin_width = (xmax - xmin) / num_bins bins = scale.inverse(np.arange(xmin, xmax, bin_width)) bins = np.append(bins, scale.inverse(xmax)) # take care of a rare rounding error, where the last observation is # a liiiitle bit more than the last bin, which makes plt.hist() puke bins[-1] += 1 kwargs.setdefault('bins', bins) # mask out the data that's not in the scale domain data = data[~np.isnan(scaled_data)] g = sns.FacetGrid(data, size = 6, aspect = 1.5, col = (self.xfacet if self.xfacet else None), row = (self.yfacet if self.yfacet else None), hue = (self.huefacet if self.huefacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), legend_out = False, sharex = False, sharey = False) # set the scale for each set of axes; can't just call plt.xscale() for ax in g.axes.flatten(): ax.set_xscale(self.scale, **scale.mpl_params) g.map(plt.hist, self.channel, **kwargs) # if we have a hue facet and a lot of hues, make a color bar instead # of a super-long legend. if self.huefacet: current_palette = mpl.rcParams['axes.color_cycle'] if len(g.hue_names) > len(current_palette): plot_ax = plt.gca() cmap = mpl.colors.ListedColormap(sns.color_palette("husl", n_colors = len(g.hue_names))) cax, _ = mpl.colorbar.make_axes(plt.gca()) norm = mpl.colors.Normalize(vmin = np.min(g.hue_names), vmax = np.max(g.hue_names), clip = False) mpl.colorbar.ColorbarBase(cax, cmap = cmap, norm = norm, label = self.huefacet) plt.sca(plot_ax) else: g.add_legend(title = self.huefacet)
def estimate(self, experiment, subset = None): """ Estimate the Gaussian mixture model parameters """ if not experiment: raise util.CytoflowOpError("No experiment specified") if self.xchannel not in experiment.data: raise util.CytoflowOpError("Column {0} not found in the experiment" .format(self.xchannel)) if self.ychannel not in experiment.data: raise util.CytoflowOpError("Column {0} not found in the experiment" .format(self.ychannel)) for b in self.by: if b not in experiment.data: raise util.CytoflowOpError("Aggregation metadata {0} not found" " in the experiment" .format(b)) if len(experiment.data[b].unique()) > 100: #WARNING - magic number raise util.CytoflowOpError("More than 100 unique values found for" " aggregation metadata {0}. Did you" " accidentally specify a data channel?" .format(b)) if self.by: groupby = experiment.data.groupby(self.by) else: # use a lambda expression to return a group that contains # all the events groupby = experiment.data.groupby(lambda x: True) # get the scale. estimate the scale params for the ENTIRE data set, # not subsets we get from groupby(). And we need to save it so that # the data is transformed the same way when we apply() self._xscale = util.scale_factory(self.xscale, experiment, self.xchannel) self._yscale = util.scale_factory(self.yscale, experiment, self.ychannel) for group, data_subset in groupby: x = data_subset.loc[:, [self.xchannel, self.ychannel]] x[self.xchannel] = self._xscale(x[self.xchannel]) x[self.ychannel] = self._yscale(x[self.ychannel]) # drop data that isn't in the scale range x = x[~(np.isnan(x[self.xchannel]) | np.isnan(x[self.ychannel]))] x = x.values gmm = mixture.GMM(n_components = self.num_components, covariance_type = "full", random_state = 1) gmm.fit(x) if not gmm.converged_: raise util.CytoflowOpError("Estimator didn't converge" " for group {0}" .format(group)) # in the 1D version, we sort the components by the means -- so # the first component has the lowest mean, the second component # has the next-lowest mean, etc. that doesn't work in a 2D area, # obviously. # instead, we assume that the clusters are likely (?) to be # arranged along *one* of the axes, so we take the |norm| of the # x,y mean of each cluster and sort that way. norms = (gmm.means_[:, 0] ** 2 + gmm.means_[:, 1] ** 2) ** 0.5 sort_idx = np.argsort(norms) gmm.means_ = gmm.means_[sort_idx] gmm.weights_ = gmm.weights_[sort_idx] gmm.covars_ = gmm.covars_[sort_idx] self._gmms[group] = gmm
def plot(self, experiment, **kwargs): """Plot a bar chart""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.variable: raise util.CytoflowViewError("variable not set") if self.variable not in experiment.conditions: raise util.CytoflowViewError("variable {0} not in the experiment" .format(self.variable)) if not (experiment.conditions[self.variable] == "float" or experiment.conditions[self.variable] == "int"): raise util.CytoflowViewError("variable {0} isn't numeric" .format(self.variable)) if not self.xchannel: raise util.CytoflowViewError("X channel isn't set.") if self.xchannel not in experiment.data: raise util.CytoflowViewError("X channel {0} isn't in the experiment" .format(self.xchannel)) if not self.xfunction: raise util.CytoflowViewError("X summary function isn't set") if not self.ychannel: raise util.CytoflowViewError("Y channel isn't set.") if self.ychannel not in experiment.data: raise util.CytoflowViewError("Y channel {0} isn't in the experiment" .format(self.ychannel)) if not self.yfunction: raise util.CytoflowViewError("Y summary function isn't set") if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment") if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment") if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError("Hue facet {0} not in the experiment") if self.x_error_bars and self.x_error_bars != 'data' \ and self.x_error_bars not in experiment.conditions: raise util.CytoflowViewError("x_error_bars must be either 'data' or " "a condition in the experiment") if self.x_error_bars and not self.x_error_function: raise util.CytoflowViewError("didn't set an x error function") if self.y_error_bars and self.y_error_bars != 'data' \ and self.y_error_bars not in experiment.conditions: raise util.CytoflowViewError("y_error_bars must be either 'data' or " "a condition in the experiment") if self.y_error_bars and not self.y_error_function: raise util.CytoflowViewError("didn't set an error function") kwargs.setdefault('antialiased', True) if self.subset: try: data = experiment.query(self.subset).data.reset_index() except: raise util.CytoflowViewError("Subset string '{0}' isn't valid" .format(self.subset)) if len(data.index) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events" .format(self.subset)) else: data = experiment.data group_vars = [self.variable] if self.xfacet: group_vars.append(self.xfacet) if self.yfacet: group_vars.append(self.yfacet) if self.huefacet: group_vars.append(self.huefacet) g = data.groupby(by = group_vars) plot_data = pd.DataFrame( {self.xchannel : g[self.xchannel].aggregate(self.xfunction), self.ychannel : g[self.ychannel].aggregate(self.yfunction)}) \ .reset_index() # compute the x error statistic if self.x_error_bars: if self.x_error_bars == 'data': # compute the error statistic on the same subsets as the summary # statistic error_stat = g[self.xchannel].aggregate(self.x_error_function).reset_index() else: # subdivide the data set further by the error_bars condition err_vars = list(group_vars) err_vars.append(self.x_error_bars) # apply the summary statistic to each subgroup data_g = data.groupby(by = err_vars) data_stat = data_g[self.xchannel].aggregate(self.xfunction).reset_index() # apply the error function to the summary statistics err_g = data_stat.groupby(by = group_vars) error_stat = err_g[self.xchannel].aggregate(self.x_error_function).reset_index() x_err_name = util.random_string(6) plot_data[x_err_name] = error_stat[self.xchannel] # compute the y error statistic if self.y_error_bars: if self.y_error_bars == 'data': # compute the error statistic on the same subsets as the summary # statistic error_stat = g[self.ychannel].aggregate(self.y_error_function).reset_index() else: # subdivide the data set further by the error_bars condition err_vars = list(group_vars) err_vars.append(self.y_error_bars) # apply the summary statistic to each subgroup data_g = data.groupby(by = err_vars) data_stat = data_g[self.ychannel].aggregate(self.yfunction).reset_index() # apply the error function to the summary statistics err_g = data_stat.groupby(by = group_vars) error_stat = err_g[self.ychannel].aggregate(self.y_error_function).reset_index() y_err_name = util.random_string(6) plot_data[y_err_name] = error_stat[self.ychannel] grid = sns.FacetGrid(plot_data, size = 6, aspect = 1.5, col = (self.xfacet if self.xfacet else None), row = (self.yfacet if self.yfacet else None), hue = (self.huefacet if self.huefacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), legend_out = False, sharex = False, sharey = False) xscale = util.scale_factory(self.xscale, experiment, self.xchannel) yscale = util.scale_factory(self.yscale, experiment, self.ychannel) for ax in grid.axes.flatten(): ax.set_xscale(self.xscale, **xscale.mpl_params) ax.set_yscale(self.yscale, **yscale.mpl_params) # plot the error bars first so the axis labels don't get overwritten if self.x_error_bars: grid.map(_x_error_bars, self.xchannel, x_err_name, self.ychannel) if self.y_error_bars: grid.map(_y_error_bars, self.xchannel, self.ychannel, y_err_name) grid.map(plt.plot, self.xchannel, self.ychannel, **kwargs) # if we have a hue facet and a lot of hues, make a color bar instead # of a super-long legend. if self.huefacet: current_palette = mpl.rcParams['axes.color_cycle'] if len(grid.hue_names) > len(current_palette): plot_ax = plt.gca() cmap = mpl.colors.ListedColormap(sns.color_palette("husl", n_colors = len(grid.hue_names))) cax, _ = mpl.colorbar.make_axes(plt.gca()) norm = mpl.colors.Normalize(vmin = np.min(grid.hue_names), vmax = np.max(grid.hue_names), clip = False) mpl.colorbar.ColorbarBase(cax, cmap = cmap, norm = norm, label = self.huefacet) plt.sca(plot_ax) else: grid.add_legend(title = self.huefacet)
def plot(self, experiment, **kwargs): """Plot a faceted scatter plot view of a channel""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.xchannel: raise util.CytoflowViewError("X channel not specified") if self.xchannel not in experiment.data: raise util.CytoflowViewError("X channel {0} not in the experiment".format(self.xchannel)) if not self.ychannel: raise util.CytoflowViewError("Y channel not specified") if self.ychannel not in experiment.data: raise util.CytoflowViewError("Y channel {0} not in the experiment".format(self.ychannel)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment".format(self.xfacet)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment".format(self.yfacet)) if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError("Hue facet {0} not in the experiment".format(self.huefacet)) if self.subset: try: data = experiment.query(self.subset) except: raise util.CytoflowViewError("Subset string '{0}' isn't valid".format(self.subset)) if len(data.index) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events".format(self.subset)) else: data = experiment.data kwargs.setdefault("alpha", 0.25) kwargs.setdefault("s", 2) kwargs.setdefault("marker", "o") kwargs.setdefault("antialiased", True) g = sns.FacetGrid( data, size=6, aspect=1.5, col=(self.xfacet if self.xfacet else None), row=(self.yfacet if self.yfacet else None), hue=(self.huefacet if self.huefacet else None), col_order=(np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order=(np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order=(np.sort(data[self.huefacet].unique()) if self.huefacet else None), legend_out=False, sharex=False, sharey=False, ) xscale = util.scale_factory(self.xscale, experiment, self.xchannel) yscale = util.scale_factory(self.yscale, experiment, self.ychannel) for ax in g.axes.flatten(): ax.set_xscale(self.xscale, **xscale.mpl_params) ax.set_yscale(self.yscale, **yscale.mpl_params) g.map(plt.scatter, self.xchannel, self.ychannel, **kwargs) # if we have a hue facet and a lot of hues, make a color bar instead # of a super-long legend. if self.huefacet: current_palette = mpl.rcParams["axes.color_cycle"] if len(g.hue_names) > len(current_palette): plot_ax = plt.gca() cmap = mpl.colors.ListedColormap(sns.color_palette("husl", n_colors=len(g.hue_names))) cax, _ = mpl.colorbar.make_axes(plt.gca()) norm = mpl.colors.Normalize(vmin=np.min(g.hue_names), vmax=np.max(g.hue_names), clip=False) mpl.colorbar.ColorbarBase(cax, cmap=cmap, norm=norm) plt.sca(plot_ax) else: g.add_legend()
def plot(self, experiment, **kwargs): """Plot a faceted histogram view of a channel""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.xchannel: raise util.CytoflowViewError("X channel not specified") if self.xchannel not in experiment.data: raise util.CytoflowViewError("X channel {0} not in the experiment" .format(self.xchannel)) if not self.ychannel: raise util.CytoflowViewError("Y channel not specified") if self.ychannel not in experiment.data: raise util.CytoflowViewError("Y channel {0} not in the experiment") if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment") if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment") if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError("Hue facet {0} not in the experiment") if self.subset: try: data = experiment.query(self.subset).data.reset_index() except: raise util.CytoflowViewError("Subset string \'{0}\' not valid") if len(data.index) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events" .format(self.subset)) else: data = experiment.data xscale = util.scale_factory(self.xscale, experiment, self.xchannel) yscale = util.scale_factory(self.yscale, experiment, self.ychannel) kwargs['xscale'] = xscale kwargs['yscale'] = yscale scaled_xdata = xscale(data[self.xchannel]) data = data[~np.isnan(scaled_xdata)] scaled_xdata = scaled_xdata[~np.isnan(scaled_xdata)] scaled_ydata = yscale(data[self.ychannel]) data = data[~np.isnan(scaled_ydata)] scaled_ydata = scaled_ydata[~np.isnan(scaled_ydata)] # find good bin counts num_xbins = util.num_hist_bins(scaled_xdata) num_ybins = util.num_hist_bins(scaled_ydata) # there are situations where this produces an unreasonable estimate. if num_xbins > self._max_bins: warnings.warn("Capping X bins to {}! To increase this limit, " "change _max_bins" .format(self._max_bins)) num_xbins = self._max_bins if num_ybins > self._max_bins: warnings.warn("Capping Y bins to {}! To increase this limit, " "change _max_bins" .format(self._max_bins)) num_ybins = self._max_bins kwargs.setdefault('smoothed', False) if kwargs['smoothed']: num_xbins /= 2 num_ybins /= 2 _, xedges, yedges = np.histogram2d(scaled_xdata, scaled_ydata, bins = (num_xbins, num_ybins)) kwargs['xedges'] = xscale.inverse(xedges) kwargs['yedges'] = yscale.inverse(yedges) kwargs.setdefault('antialiased', True) g = sns.FacetGrid(data, size = 6, aspect = 1.5, col = (self.xfacet if self.xfacet else None), row = (self.yfacet if self.yfacet else None), hue = (self.huefacet if self.huefacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), sharex = False, sharey = False) for ax in g.axes.flatten(): ax.set_xscale(self.xscale, **xscale.mpl_params) ax.set_yscale(self.yscale, **yscale.mpl_params) g.map(_hist2d, self.xchannel, self.ychannel, **kwargs) # if we have a hue facet and a lot of hues, make a color bar instead # of a super-long legend. if self.huefacet: current_palette = mpl.rcParams['axes.color_cycle'] if len(g.hue_names) > len(current_palette): plot_ax = plt.gca() cmap = mpl.colors.ListedColormap(sns.color_palette("husl", n_colors = len(g.hue_names))) cax, _ = mpl.colorbar.make_axes(plt.gca()) norm = mpl.colors.Normalize(vmin = np.min(g.hue_names), vmax = np.max(g.hue_names), clip = False) mpl.colorbar.ColorbarBase(cax, cmap = cmap, norm = norm, label = self.huefacet) plt.sca(plot_ax) else: g.add_legend(title = self.huefacet)
def apply(self, experiment): """Applies the binning to an experiment. Parameters ---------- experiment : Experiment the old_experiment to which this op is applied Returns ------- a new experiment, the same as old_experiment but with a new column the same as the operation name. The bool is True if the event's measurement in self.channel is greater than self.low and less than self.high; it is False otherwise. """ if not experiment: raise util.CytoflowOpError("no experiment specified") if not self.name: raise util.CytoflowOpError("name is not set") if self.name in experiment.data.columns: raise util.CytoflowOpError("name {0} is in the experiment already" .format(self.name)) if self.bin_count_name and self.bin_count_name in experiment.data.columns: raise util.CytoflowOpError("bin_count_name {0} is in the experiment already" .format(self.bin_count_name)) if not self.channel: raise util.CytoflowOpError("channel is not set") if self.channel not in experiment.data.columns: raise util.CytoflowOpError("channel {0} isn't in the experiment" .format(self.channel)) if self.num_bins is Undefined and self.bin_width is Undefined: raise util.CytoflowOpError("must set either bin number or width") if self.num_bins is Undefined \ and not (self.scale == "linear" or self.scale == "log"): raise util.CytoflowOpError("Can only use bin_width with linear or log scale") scale = util.scale_factory(self.scale, experiment, self.channel) scaled_data = scale(experiment.data[self.channel]) channel_min = bn.nanmin(scaled_data) channel_max = bn.nanmax(scaled_data) num_bins = self.num_bins if self.num_bins is not Undefined else \ (channel_max - channel_min) / self.bin_width bins = np.linspace(start = channel_min, stop = channel_max, num = num_bins) # bins need to be internal; drop the first and last one bins = bins[1:-1] new_experiment = experiment.clone() new_experiment.add_condition(self.name, "int", np.digitize(scaled_data, bins)) # if we're log-scaled (for example), don't label data that isn't # showable on a log scale! new_experiment.data.ix[np.isnan(scaled_data), self.name] = np.NaN # keep track of the bins we used, for pretty plotting later. new_experiment.metadata[self.name]["bin_scale"] = self.scale new_experiment.metadata[self.name]["bins"] = bins if self.bin_count_name: # TODO - this is a HUGE memory hog?! agg_count = new_experiment.data.groupby(self.name).count() agg_count = agg_count[agg_count.columns[0]] # have to make the condition a float64, because if we're in log # space there may be events that have NaN as the bin number. new_experiment.add_condition( self.bin_count_name, "float64", new_experiment[self.name].map(agg_count)) new_experiment.history.append(self.clone_traits()) return new_experiment
def plot(self, experiment, **kwargs): """Plot a faceted 2d kernel density estimate""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.xchannel: raise util.CytoflowViewError("X channel not specified") if self.xchannel not in experiment.data: raise util.CytoflowViewError("X channel {0} not in the experiment" .format(self.xchannel)) if not self.ychannel: raise util.CytoflowViewError("Y channel not specified") if self.ychannel not in experiment.data: raise util.CytoflowViewError("Y channel {0} not in the experiment" .format(self.ychannel)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment" .format(self.xfacet)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment" .format(self.yfacet)) if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError("Hue facet {0} not in the experiment" .format(self.huefacet)) if self.subset: try: data = experiment.query(self.subset).data.reset_index() except: raise util.CytoflowViewError("Subset string '{0}' isn't valid" .format(self.subset)) if len(data.index) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events" .format(self.subset)) else: data = experiment.data kwargs.setdefault('shade', False) kwargs.setdefault('min_alpha', 0.2) kwargs.setdefault('max_alpha', 0.9) kwargs.setdefault('n_levels', 10) g = sns.FacetGrid(data, size = 6, aspect = 1.5, col = (self.xfacet if self.xfacet else None), row = (self.yfacet if self.yfacet else None), hue = (self.huefacet if self.huefacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), legend_out = False, sharex = False, sharey = False) xscale = util.scale_factory(self.xscale, experiment, self.xchannel) yscale = util.scale_factory(self.yscale, experiment, self.ychannel) for ax in g.axes.flatten(): ax.set_xscale(self.xscale, **xscale.mpl_params) ax.set_yscale(self.yscale, **yscale.mpl_params) kwargs['xscale'] = xscale kwargs['yscale'] = yscale g.map(_bivariate_kdeplot, self.xchannel, self.ychannel, **kwargs) if self.huefacet: g.add_legend(title = self.huefacet)
def plot(self, experiment, **kwargs): """Plot a faceted histogram view of a channel""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.channel: raise util.CytoflowViewError("Must specify a channel") if self.channel not in experiment.data: raise util.CytoflowViewError("Channel {0} not in the experiment" .format(self.channel)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment" .format(self.xfacet)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment" .format(self.yfacet)) if self.huefacet and self.huefacet not in experiment.conditions: raise util.CytoflowViewError("Hue facet {0} not in the experiment" .format(self.huefacet)) if self.subset: try: data = experiment.query(self.subset).data.reset_index() except: raise util.CytoflowViewError("Subset string '{0}' isn't valid" .format(self.subset)) if len(data.index) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events" .format(self.subset)) else: data = experiment.data #print scaled_data kwargs.setdefault('shade', True) kwargs['label'] = self.name g = sns.FacetGrid(data, size = 6, aspect = 1.5, col = (self.xfacet if self.xfacet else None), row = (self.yfacet if self.yfacet else None), hue = (self.huefacet if self.huefacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), legend_out = False, sharex = False, sharey = False) # get the scale kwargs['scale'] = scale = util.scale_factory(self.scale, experiment, self.channel) # set the scale for each set of axes; can't just call plt.xscale() for ax in g.axes.flatten(): ax.set_xscale(self.scale, **scale.mpl_params) g.map(_univariate_kdeplot, self.channel, **kwargs) if self.huefacet: g.add_legend(title = self.huefacet)
def plot(self, experiment, **kwargs): """Plot a faceted histogram view of a channel""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.xchannel: raise util.CytoflowViewError("X channel not specified") if self.xchannel not in experiment.data: raise util.CytoflowViewError("X channel {0} not in the experiment" .format(self.xchannel)) if not self.ychannel: raise util.CytoflowViewError("Y channel not specified") if self.ychannel not in experiment.data: raise util.CytoflowViewError("Y channel {0} not in the experiment") if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment") if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment") if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError("Hue facet {0} not in the experiment") if self.subset: try: data = experiment.query(self.subset) except: raise util.CytoflowViewError("Subset string \'{0}\' not valid") if len(data.index) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events" .format(self.subset)) else: data = experiment.data #kwargs.setdefault('histtype', 'stepfilled') #kwargs.setdefault('alpha', 0.5) kwargs.setdefault('edgecolor', 'none') #kwargs.setdefault('mincnt', 1) #kwargs.setdefault('bins', 'log') kwargs.setdefault('antialiased', True) xmin, xmax = (np.amin(data[self.xchannel]), np.amax(data[self.xchannel])) ymin, ymax = (np.amin(data[self.ychannel]), np.amax(data[self.ychannel])) # to avoid issues with singular data, expand the min/max pairs xmin, xmax = mtrans.nonsingular(xmin, xmax, expander=0.1) ymin, ymax = mtrans.nonsingular(ymin, ymax, expander=0.1) extent = (xmin, xmax, ymin, ymax) kwargs.setdefault('extent', extent) xbins = util.num_hist_bins(experiment[self.xchannel]) ybins = util.num_hist_bins(experiment[self.ychannel]) bins = np.mean([xbins, ybins]) kwargs.setdefault('bins', bins) # Do not move above. don't ask. g = sns.FacetGrid(data, size = 6, aspect = 1.5, col = (self.xfacet if self.xfacet else None), row = (self.yfacet if self.yfacet else None), hue = (self.huefacet if self.huefacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), sharex = False, sharey = False) if(self.xscale != "linear" or self.yscale != "linear"): warnings.warn("hexbin is broken with scales other than \"linear\"", util.CytoflowViewWarning) xscale = util.scale_factory(self.xscale, experiment, self.xchannel) yscale = util.scale_factory(self.yscale, experiment, self.ychannel) for ax in g.axes.flatten(): ax.set_xscale(self.xscale, **xscale.mpl_params) ax.set_yscale(self.yscale, **yscale.mpl_params) g.map(plt.hexbin, self.xchannel, self.ychannel, **kwargs)
def plot(self, experiment, **kwargs): """Plot a bar chart""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.by: raise util.CytoflowViewError("Independent variable 'by' not set") if self.by not in experiment.conditions: raise util.CytoflowViewError("Independent variable {0} not in the experiment" .format(self.by)) if not (experiment.conditions[self.by] == "float" or experiment.conditions[self.by] == "int"): raise util.CytoflowViewError("by variable {0} isn't numeric" .format(self.by)) if not self.xchannel: raise util.CytoflowViewError("X channel isn't set.") if self.xchannel not in experiment.data: raise util.CytoflowViewError("X channel {0} isn't in the experiment" .format(self.xchannel)) if not self.xfunction: raise util.CytoflowViewError("X summary function isn't set") if not self.ychannel: raise util.CytoflowViewError("Y channel isn't set.") if self.ychannel not in experiment.data: raise util.CytoflowViewError("Y channel {0} isn't in the experiment" .format(self.ychannel)) if not self.yfunction: raise util.CytoflowViewError("Y summary function isn't set") if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment") if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment") if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError("Hue facet {0} not in the experiment") kwargs.setdefault('antialiased', True) if self.subset: try: data = experiment.query(self.subset) except: raise util.CytoflowViewError("Subset string '{0}' isn't valid" .format(self.subset)) if len(data.index) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events" .format(self.subset)) else: data = experiment.data group_vars = [self.by] if self.xfacet: group_vars.append(self.xfacet) if self.yfacet: group_vars.append(self.yfacet) if self.huefacet: group_vars.append(self.huefacet) g = data.groupby(by = group_vars) plot_data = pd.DataFrame( {self.xchannel : g[self.xchannel].aggregate(self.xfunction), self.ychannel : g[self.ychannel].aggregate(self.yfunction)}) \ .reset_index() grid = sns.FacetGrid(plot_data, size = 6, aspect = 1.5, col = (self.xfacet if self.xfacet else None), row = (self.yfacet if self.yfacet else None), hue = (self.huefacet if self.huefacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), legend_out = False, sharex = False, sharey = False) xscale = util.scale_factory(self.xscale, experiment, self.xchannel) yscale = util.scale_factory(self.yscale, experiment, self.ychannel) for ax in grid.axes.flatten(): ax.set_xscale(self.xscale, **xscale.mpl_params) ax.set_yscale(self.yscale, **yscale.mpl_params) grid.map(plt.plot, self.xchannel, self.ychannel, **kwargs) # if we have a hue facet and a lot of hues, make a color bar instead # of a super-long legend. if self.huefacet: current_palette = mpl.rcParams['axes.color_cycle'] if len(grid.hue_names) > len(current_palette): plot_ax = plt.gca() cmap = mpl.colors.ListedColormap(sns.color_palette("husl", n_colors = len(grid.hue_names))) cax, _ = mpl.colorbar.make_axes(plt.gca()) norm = mpl.colors.Normalize(vmin = np.min(grid.hue_names), vmax = np.max(grid.hue_names), clip = False) mpl.colorbar.ColorbarBase(cax, cmap = cmap, norm = norm) plt.sca(plot_ax) else: grid.add_legend()
def estimate(self, experiment, subset = None): """ Estimate the Gaussian mixture model parameters """ if not experiment: raise util.CytoflowOpError("No experiment specified") if self.channel not in experiment.data: raise util.CytoflowOpError("Column {0} not found in the experiment" .format(self.channel)) for b in self.by: if b not in experiment.data: raise util.CytoflowOpError("Aggregation metadata {0} not found" " in the experiment" .format(b)) if len(experiment.data[b].unique()) > 100: #WARNING - magic number raise util.CytoflowOpError("More than 100 unique values found for" " aggregation metadata {0}. Did you" " accidentally specify a data channel?" .format(b)) if self.by: groupby = experiment.data.groupby(self.by) else: # use a lambda expression to return a group that contains # all the events groupby = experiment.data.groupby(lambda x: True) # get the scale. estimate the scale params for the ENTIRE data set, # not subsets we get from groupby(). And we need to save it so that # the data is transformed the same way when we apply() self._scale = util.scale_factory(self.scale, experiment, self.channel) for group, data_subset in groupby: x = data_subset[self.channel].reset_index(drop = True) x = self._scale(x) # drop data that isn't in the scale range #x = pd.Series(self._scale(x)).dropna() x = x[~np.isnan(x)] gmm = mixture.GMM(n_components = self.num_components, random_state = 1) gmm.fit(x[:, np.newaxis]) if not gmm.converged_: raise util.CytoflowOpError("Estimator didn't converge" " for group {0}" .format(group)) # to make sure we have a stable ordering, sort the components # by the means (so the first component has the lowest mean, # the next component has the next-lowest, etc.) sort_idx = np.argsort(gmm.means_[:, 0]) gmm.means_ = gmm.means_[sort_idx] gmm.weights_ = gmm.weights_[sort_idx] gmm.covars_ = gmm.covars_[sort_idx] self._gmms[group] = gmm
def plot(self, experiment, **kwargs): """Plot a faceted histogram view of a channel""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.channel: raise util.CytoflowViewError("Must specify a channel") if self.channel not in experiment.data: raise util.CytoflowViewError("Channel {0} not in the experiment" .format(self.channel)) if not self.variable: raise util.CytoflowViewError("Variable not specified") if not self.variable in experiment.conditions: raise util.CytoflowViewError("Variable {0} isn't in the experiment") if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment" .format(self.xfacet)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment" .format(self.yfacet)) if self.huefacet and self.huefacet not in experiment.conditions: raise util.CytoflowViewError("Hue facet {0} not in the experiment" .format(self.huefacet)) if self.subset: try: data = experiment.query(self.subset).data.reset_index() except: raise util.CytoflowViewError("Subset string '{0}' isn't valid" .format(self.subset)) if len(data.index) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events" .format(self.subset)) else: data = experiment.data.copy() # get the scale scale = util.scale_factory(self.scale, experiment, self.channel) kwargs['data_scale'] = scale kwargs.setdefault('orient', 'v') g = sns.FacetGrid(data, size = 6, aspect = 1.5, col = (self.xfacet if self.xfacet else None), row = (self.yfacet if self.yfacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), legend_out = False, sharex = False, sharey = False) # set the scale for each set of axes; can't just call plt.xscale() for ax in g.axes.flatten(): if kwargs['orient'] == 'h': ax.set_xscale(self.scale, **scale.mpl_params) else: ax.set_yscale(self.scale, **scale.mpl_params) # this order-dependent thing weirds me out. if kwargs['orient'] == 'h': violin_args = [self.channel, self.variable] else: violin_args = [self.variable, self.channel] if self.huefacet: violin_args.append(self.huefacet) g.map(_violinplot, *violin_args, order = np.sort(data[self.variable].unique()), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), **kwargs) if self.huefacet: g.add_legend(title = self.huefacet)
def plot(self, experiment, **kwargs): """Plot a bar chart""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.channel: raise util.CytoflowViewError("Channel not specified") if self.channel not in experiment.data: raise util.CytoflowViewError("Channel {0} isn't in the experiment" .format(self.channel)) if not self.by: raise util.CytoflowViewError("Variable not specified") if not self.by in experiment.conditions: raise util.CytoflowViewError("Variable {0} isn't in the experiment") if not self.function: raise util.CytoflowViewError("Function not specified") if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} isn't in the experiment" .format(self.xfacet)) if self.yfacet and self.yfacet not in experiment.metadata: raise util.CytoflowViewError("Y facet {0} isn't in the experiment" .format(self.yfacet)) if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError("Hue facet {0} isn't in the experiment" .format(self.huefacet)) # if self.error_bars == 'data' and self.error_function is None: # return False # # if self.error_bars == 'summary' \ # and (self.error_function is None # or not self.error_var in experiment.metadata): # return False if self.subset: try: data = experiment.query(self.subset) except: raise util.CytoflowViewError("Subset string {0} isn't valid" .format(self.subset)) if len(data.index) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no events" .format(self.subset)) else: data = experiment.data sns.factorplot(x = self.by, y = self.channel, data = data, size = 6, aspect = 1.5, row = (self.yfacet if self.yfacet else None), col = (self.xfacet if self.xfacet else None), hue = (self.huefacet if self.huefacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), # something buggy here. #orient = ("h" if self.orientation == "horizontal" else "v"), estimator = self.function, ci = None, kind = "bar") scale = util.scale_factory(self.scale, experiment, self.channel) # because the bottom of a bar chart is "0", masking out bad # values on a log scale doesn't work. we must clip instead. if self.scale == "log": scale.mode = "clip" plt.yscale(self.scale, **scale.mpl_params)