class BinningOp(HasStrictTraits): """ Bin data along an axis. This operation creates equally spaced bins (in linear or log space) along an axis and adds a condition assigning each event to a bin. The value of the event's condition is the left end of the bin's interval in which the event is located. Attributes ---------- name : Str The operation name. Used to name the new metadata field in the experiment that's created by apply() channel : Str The name of the channel along which to bin. scale : {"linear", "log", "logicle"} Make the bins equidistant along what scale? bin_width : Float The width of the bins. If :attr:`scale` is ``log``, :attr:`bin_width` is in log-10 units; if :attr:`scale` is ``logicle``, and error is thrown because the units are ill-defined. Examples -------- Create a small experiment: .. plot:: :context: close-figs >>> import cytoflow as flow >>> import_op = flow.ImportOp() >>> import_op.tubes = [flow.Tube(file = "tasbe/rby.fcs")] >>> ex = import_op.apply() Create and parameterize the operation .. plot:: :context: close-figs >>> bin_op = flow.BinningOp() >>> bin_op.name = "Bin" >>> bin_op.channel = "FITC-A" >>> bin_op.scale = "log" >>> bin_op.bin_width = 0.2 Apply the operation to the experiment .. plot:: :context: close-figs >>> ex2 = bin_op.apply(ex) Plot the result .. plot:: :context: close-figs >>> bin_op.default_view().plot(ex2) """ # traits id = Constant('edu.mit.synbio.cytoflow.operations.binning') friendly_id = Constant("Binning") name = CStr() bin_count_name = CStr() channel = Str() num_bins = util.Removed(err_string="'num_bins' was removed in 0.9") bin_width = util.PositiveFloat(0, allow_zero=True) scale = util.ScaleEnum _max_num_bins = Int(100) def apply(self, experiment): """ Applies the binning to an experiment. Parameters ---------- experiment : Experiment the old_experiment to which this op is applied Returns ------- Experiment A new experiment with a condition column named :attr:`name`, which contains the location of the left-most edge of the bin that the event is in. If :attr:`bin_count_name` is set, another column is added with that name as well, containing the number of events in the same bin as the event. """ if experiment is None: raise util.CytoflowOpError('experiment', "no experiment specified") if not self.name: raise util.CytoflowOpError('name', "Name is not set") if self.name != util.sanitize_identifier(self.name): raise util.CytoflowOpError( 'name', "Name can only contain letters, numbers and underscores.". format(self.name)) if self.name in experiment.data.columns: raise util.CytoflowOpError( 'name', "Name {} is in the experiment already".format(self.name)) if self.bin_count_name and self.bin_count_name in experiment.data.columns: raise util.CytoflowOpError( 'bin_count_name', "bin_count_name {} is in the experiment already".format( self.bin_count_name)) if not self.channel: raise util.CytoflowOpError('channel', "channel is not set") if self.channel not in experiment.data.columns: raise util.CytoflowOpError( 'channel', "channel {} isn't in the experiment".format(self.channel)) if not self.bin_width: raise util.CytoflowOpError('bin_width', "must set bin width") if not (self.scale == "linear" or self.scale == "log"): raise util.CytoflowOpError( 'scale', "Can only use binning op with linear or log scale") scale = util.scale_factory(self.scale, experiment, channel=self.channel) scaled_min = scale(scale.clip(experiment.data[self.channel]).min()) scaled_max = scale(scale.clip(experiment.data[self.channel]).max()) if self.scale == 'linear': start = 0 else: start = 1 scaled_bins_left = np.arange(start=-1.0 * start, stop=(-1.0 * scaled_min) + self.bin_width, step=self.bin_width) * -1.0 scaled_bins_left = scaled_bins_left[::-1][:-1] scaled_bins_right = np.arange(start=start, stop=scaled_max + self.bin_width, step=self.bin_width) scaled_bins = np.append(scaled_bins_left, scaled_bins_right) if len(scaled_bins) > self._max_num_bins: raise util.CytoflowOpError( None, "Too many bins! To increase this limit, " "change _max_num_bins (currently {})".format( self._max_num_bins)) if len(scaled_bins) < 2: raise util.CytoflowOpError('bin_width', "Must have more than one bin") # now, back into data space bins = scale.inverse(scaled_bins) # reduce to 4 sig figs bins = ['%.4g' % x for x in bins] bins = [float(x) for x in bins] bins = np.array(bins) # put the data in bins bin_idx = np.digitize(experiment.data[self.channel], bins[1:-1]) new_experiment = experiment.clone() new_experiment.add_condition(self.name, "float64", bins[bin_idx]) # keep track of the bins we used, for prettier plotting later. new_experiment.metadata[self.name]["bin_scale"] = self.scale new_experiment.metadata[self.name]["bins"] = bins if self.bin_count_name: # TODO - this is a HUGE memory hog?! # TODO - fix this, then turn it on by default agg_count = new_experiment.data.groupby(self.name).count() agg_count = agg_count[agg_count.columns[0]] # have to make the condition a float64, because if we're in log # space there may be events that have NaN as the bin number. new_experiment.add_condition( self.bin_count_name, "float64", new_experiment[self.name].map(agg_count)) new_experiment.history.append( self.clone_traits(transient=lambda _: True)) return new_experiment def default_view(self, **kwargs): """ Returns a diagnostic plot to check the binning. Returns ------- IView An view instance, call :meth:`plot()` to plot the bins. """ v = BinningView(op=self) v.trait_set(**kwargs) return v
class Base1DStatisticsView(BaseStatisticsView): """ The base class for 1-dimensional statistic views -- ie, the :attr:`variable` attribute is on the x axis, and the statistic value is on the y axis. Attributes ---------- statistic : (str, str) The name of the statistic to plot. Must be a key in the :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment` being plotted. error_statistic : (str, str) The name of the statistic used to plot error bars. Must be a key in the :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment` being plotted. scale : {'linear', 'log', 'logicle'} The scale applied to the data before plotting it. """ REMOVED_ERROR = "Statistics changed dramatically in 0.5; please see the documentation" by = util.Removed(err_string=REMOVED_ERROR) yfunction = util.Removed(err_string=REMOVED_ERROR) ychannel = util.Removed(err_string=REMOVED_ERROR) channel = util.Removed(err_string=REMOVED_ERROR) function = util.Removed(err_string=REMOVED_ERROR) error_bars = util.Removed(err_string=REMOVED_ERROR) xvariable = util.Deprecated(new="variable") statistic = Tuple(Str, Str) error_statistic = Tuple(Str, Str) scale = util.ScaleEnum def enum_plots(self, experiment): if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") data = self._make_data(experiment) return super().enum_plots(experiment, data) def plot(self, experiment, plot_name=None, **kwargs): """ Parameters ---------- orientation : {'vertical', 'horizontal'} lim : (float, float) Set the range of the plot's axis. """ if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") data = self._make_data(experiment) if not self.variable: raise util.CytoflowViewError('variable', "variable not set") if self.variable not in experiment.conditions: raise util.CytoflowViewError( 'variable', "variable {0} not in the experiment".format(self.variable)) scale = util.scale_factory(self.scale, experiment, statistic=self.statistic, error_statistic=self.error_statistic) super().plot(experiment, data, plot_name=plot_name, scale=scale, **kwargs) def _make_data(self, experiment): if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") if not self.statistic: raise util.CytoflowViewError('statistic', "Statistic not set") if self.statistic not in experiment.statistics: raise util.CytoflowViewError( 'statistic', "Can't find the statistic {} in the experiment".format( self.statistic)) else: stat = experiment.statistics[self.statistic] if not util.is_numeric(stat): raise util.CytoflowViewError('statistic', "Statistic must be numeric") if self.error_statistic[0]: if self.error_statistic not in experiment.statistics: raise util.CytoflowViewError( 'error_statistic', "Can't find the error statistic in the experiment") else: error_stat = experiment.statistics[self.error_statistic] else: error_stat = None if error_stat is not None: try: error_stat.index = error_stat.index.reorder_levels( stat.index.names) error_stat.sort_index(inplace=True) except AttributeError: pass if not stat.index.equals(error_stat.index): raise util.CytoflowViewError( 'error_statistic', "Data statistic and error statistic " " don't have the same index.") if stat.name == error_stat.name: raise util.CytoflowViewError( 'error_statistic', "Data statistic and error statistic can " "not have the same name.") data = pd.DataFrame(index=stat.index) data[stat.name] = stat if error_stat is not None: data[error_stat.name] = error_stat return data
class Base2DStatisticsView(BaseStatisticsView): """ The base class for 2-dimensional statistic views -- ie, the :attr:`variable` attribute varies independently, and the corresponding values from the x and y statistics are plotted on the x and y axes. Attributes ---------- xstatistic, ystatistic : (str, str) The name of the statistics to plot. Must be a keys in the :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment` being plotted. x_error_statistic, y_error_statistic : (str, str) The name of the statistics used to plot error bars. Must be keys in the :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment` being plotted. xscale, yscale : {'linear', 'log', 'logicle'} The scales applied to the data before plotting it. """ STATS_REMOVED = "{} has been removed. Statistics changed dramatically in 0.5; please see the documentation." xchannel = util.Removed(err_string=STATS_REMOVED) xfunction = util.Removed(err_string=STATS_REMOVED) ychannel = util.Removed(err_string=STATS_REMOVED) yfunction = util.Removed(err_string=STATS_REMOVED) xstatistic = Tuple(Str, Str) ystatistic = Tuple(Str, Str) x_error_statistic = Tuple(Str, Str) y_error_statistic = Tuple(Str, Str) xscale = util.ScaleEnum yscale = util.ScaleEnum def enum_plots(self, experiment): if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") data = self._make_data(experiment) return super().enum_plots(experiment, data) def plot(self, experiment, plot_name=None, **kwargs): """ Parameters ---------- xlim, ylim : (float, float) Set the range of the plot's axis. """ if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") data = self._make_data(experiment) xscale = util.scale_factory(self.xscale, experiment, statistic=self.xstatistic, error_statistic=self.x_error_statistic) yscale = util.scale_factory(self.yscale, experiment, statistic=self.ystatistic, error_statistic=self.y_error_statistic) super().plot(experiment, data, plot_name, xscale=xscale, yscale=yscale, **kwargs) def _make_data(self, experiment): if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") if not self.xstatistic: raise util.CytoflowViewError('xstatistic', "X Statistic not set") if self.xstatistic not in experiment.statistics: raise util.CytoflowViewError( 'xstatistic', "Can't find the statistic {} in the experiment".format( self.xstatistic)) else: xstat = experiment.statistics[self.xstatistic] if not util.is_numeric(xstat): raise util.CytoflowViewError('xstatistic', "X statistic must be numeric") if self.x_error_statistic[0]: if self.x_error_statistic not in experiment.statistics: raise util.CytoflowViewError( 'x_error_statistic', "Can't find the X error statistic in the experiment") else: x_error_stat = experiment.statistics[self.x_error_statistic] else: x_error_stat = None if x_error_stat is not None: try: x_error_stat.index = x_error_stat.index.reorder_levels( xstat.index.names) x_error_stat.sort_index(inplace=True) except AttributeError: pass if not xstat.index.equals(x_error_stat.index): raise util.CytoflowViewError( 'x_error_statistic', "Data statistic and error statistic " " don't have the same index.") if xstat.name == x_error_stat.name: raise util.CytoflowViewError( 'x_error_statistic', "Data statistic and error statistic can " "not have the same name.") if not self.ystatistic: raise util.CytoflowViewError('ystatistic', "Y statistic not set") if self.ystatistic not in experiment.statistics: raise util.CytoflowViewError( 'ystatistic', "Can't find the Y statistic {} in the experiment".format( self.ystatistic)) else: ystat = experiment.statistics[self.ystatistic] if not util.is_numeric(ystat): raise util.CytoflowViewError('ystatistic', "Y statistic must be numeric") if self.y_error_statistic[0]: if self.y_error_statistic not in experiment.statistics: raise util.CytoflowViewError( 'y_error_statistic', "Can't find the Y error statistic in the experiment") else: y_error_stat = experiment.statistics[self.y_error_statistic] else: y_error_stat = None if y_error_stat is not None: try: y_error_stat.index = y_error_stat.index.reorder_levels( ystat.index.names) y_error_stat.sort_index(inplace=True) except AttributeError: pass if not ystat.index.equals(y_error_stat.index): raise util.CytoflowViewError( 'y_error_statistic', "Data statistic and error statistic " " don't have the same index.") if ystat.name == y_error_stat.name: raise util.CytoflowViewError( 'y_error_statistic', "Data statistic and error statistic can " "not have the same name.") if xstat.name == ystat.name: raise util.CytoflowViewError( 'ystatistic', "X and Y statistics can " "not have the same name.") try: ystat.index = ystat.index.reorder_levels(xstat.index.names) ystat.sort_index(inplace=True) except AttributeError: pass intersect_idx = xstat.index.intersection(ystat.index) xstat = xstat.reindex(intersect_idx) xstat.sort_index(inplace=True) ystat = ystat.reindex(intersect_idx) ystat.sort_index(inplace=True) if self.x_error_statistic[0]: if self.x_error_statistic not in experiment.statistics: raise util.CytoflowViewError( 'x_error_statistic', "X error statistic not in experiment") else: x_error_stat = experiment.statistics[self.x_error_statistic] if set(x_error_stat.index.names) != set(xstat.index.names): raise util.CytoflowViewError( 'x_error_statistic', "X error statistic doesn't have the " "same indices as the X statistic") try: x_error_stat.index = x_error_stat.index.reorder_levels( xstat.index.names) x_error_stat.sort_index(inplace=True) except AttributeError: pass x_error_stat = x_error_stat.reindex(intersect_idx) x_error_stat.sort_index(inplace=True) if not x_error_stat.index.equals(xstat.index): raise util.CytoflowViewError( 'x_error_statistic', "X error statistic doesn't have the " "same indices as the X statistic") else: x_error_stat = None if self.y_error_statistic[0]: if self.y_error_statistic not in experiment.statistics: raise util.CytoflowViewError( 'y_error_statistic', "Y error statistic not in experiment") else: y_error_stat = experiment.statistics[self.y_error_statistic] if set(y_error_stat.index.names) != set(ystat.index.names): raise util.CytoflowViewError( 'y_error_statistic', "Y error statistic doesn't have the " "same indices as the Y statistic") try: y_error_stat.index = y_error_stat.index.reorder_levels( ystat.index.names) y_error_stat.sort_index(inplace=True) except AttributeError: pass y_error_stat = y_error_stat.reindex(intersect_idx) y_error_stat.sort_index(inplace=True) if not y_error_stat.index.equals(ystat.index): raise util.CytoflowViewError( 'y_error_statistic', "Y error statistic doesn't have the " "same values as the Y statistic") else: y_error_stat = None data = pd.DataFrame(index=xstat.index) data[xstat.name] = xstat data[ystat.name] = ystat if x_error_stat is not None: data[x_error_stat.name] = x_error_stat if y_error_stat is not None: data[y_error_stat.name] = y_error_stat return data
class ColorTranslationOp(HasStrictTraits): """ Translate measurements from one color's scale to another, using a two-color or three-color control. To use, set up the :attr:`controls` dictionary with the channels to convert and the FCS files to compute the mapping. Call :meth:`estimate` to paramterize the module; check that the plots look good by calling the :meth:`~ColorTranslationDiagnostic.plot` method of the :class:`ColorTranslationDiagnostic` instance returned by :meth:`default_view`; then call :meth:`apply` to apply the translation to an :class:`.Experiment`. Attributes ---------- controls : Dict((Str, Str), File) Two-color controls used to determine the mapping. They keys are tuples of **from-channel** and **to-channel**. The values are FCS files containing two-color constitutive fluorescent expression data for the mapping. mixture_model : Bool (default = False) If ``True``, try to model the **from** channel as a mixture of expressing cells and non-expressing cells (as you would get with a transient transfection), then weight the regression by the probability that the the cell is from the top (transfected) distribution. Make sure you check the diagnostic plots to see that this worked! linear_model : Bool (default = False) Set this to ``True`` to get a scaling that is strictly multiplicative, mirroring the TASBE approach. Do check the diagnostic plot, though, to see how well (or poorly) your model fits the data. control_conditions : Dict((Str, Str), Dict(Str, Any)) Occasionally, you'll need to specify the experimental conditions that the bleedthrough tubes were collected under (to apply the operations in the history.) Specify them here. The key is a tuple of channel names; the value is a dictionary of the conditions (same as you would specify for a :class:`~.Tube` ) Notes ----- In the TASBE workflow, this operation happens *after* the application of :class:`.AutofluorescenceOp` and :class:`.BleedthroughLinearOp`. The entire operation history of the :class:`.Experiment` that is passed to :meth:`estimate` is replayed on the control files in :attr:`controls`, so they are also corrected for autofluorescence and bleedthrough, and have metadata for subsetting. Examples -------- Create a small experiment: .. plot:: :context: close-figs >>> import cytoflow as flow >>> import_op = flow.ImportOp() >>> import_op.tubes = [flow.Tube(file = "tasbe/mkate.fcs")] >>> ex = import_op.apply() Create and parameterize the operation .. plot:: :context: close-figs >>> color_op = flow.ColorTranslationOp() >>> color_op.controls = {("Pacific Blue-A", "FITC-A") : "tasbe/rby.fcs", ... ("PE-Tx-Red-YG-A", "FITC-A") : "tasbe/rby.fcs"} >>> color_op.mixture_model = True Estimate the model parameters .. plot:: :context: close-figs >>> color_op.estimate(ex) Plot the diagnostic plot .. plot:: :context: close-figs >>> color_op.default_view().plot(ex) Apply the operation to the experiment .. plot:: :context: close-figs >>> ex = color_op.apply(ex) """ # traits id = Constant('edu.mit.synbio.cytoflow.operations.color_translation') friendly_id = Constant("Color translation") name = Constant("Color Translation") translation = util.Removed( err_string= "'translation' is removed; the same info is found in 'controls'", warning=True) controls = Dict(Tuple(Str, Str), File) controls_frames = Dict(Tuple(Str, Str), Instance(DataFrame)) mixture_model = Bool(False) linear_model = Bool(False) control_conditions = Dict(Tuple(Str, Str), Dict(Str, Any), {}) # The regression coefficients determined by `estimate()`, used to map # colors between channels. The keys are tuples of (*from-channel*, # *to-channel) (corresponding to key-value pairs in `translation`). The # values are lists of Float, the log-log coefficients for the color # translation (determined by `estimate()`). # TODO - why can't i make the value List(Float)? _coefficients = Dict(Tuple(Str, Str), Any, transient=True) _trans_fn = Dict(Tuple(Str, Str), Callable, transient=True) _sample = Dict(Tuple(Str, Str), Any, transient=True) _means = Dict(Tuple(Str, Str), Tuple(Float, Float), transient=True) def estimate(self, experiment, subset=None): """ Estimate the mapping from the two-channel controls Parameters ---------- experiment : Experiment The :class:`.Experiment` used to check the voltages, etc. of the control tubes. Also the source of the operation history that is replayed on the control tubes. subset : Str A Python expression used to subset the controls before estimating the color translation parameters. """ if experiment is None: raise util.CytoflowOpError('experiment', "No experiment specified") if not self.controls and not self.controls_frames: raise util.CytoflowOpError('controls', "No controls specified") self._coefficients.clear() self._trans_fn.clear() self._sample.clear() self._means.clear() tubes = {} if (self.controls != {}): controls = self.controls else: controls = self.controls_frames translation = {x[0]: x[1] for x in list(controls.keys())} for from_channel, to_channel in translation.items(): if from_channel not in experiment.channels: raise util.CytoflowOpError( 'translation', "Channel {0} not in the experiment".format(from_channel)) if to_channel not in experiment.channels: raise util.CytoflowOpError( 'translation', "Channel {0} not in the experiment".format(to_channel)) if (from_channel, to_channel) not in controls: raise util.CytoflowOpError( 'translation', "Control file for {0} --> {1} " "not specified".format(from_channel, to_channel)) tube_file_or_frame = controls[(from_channel, to_channel)] tube_file_or_frame_key = (from_channel, to_channel) tube_conditions = self.control_conditions[(from_channel, to_channel)] \ if (from_channel, to_channel) in self.control_conditions \ else {} conditions = { k: experiment.data[k].dtype.name for k in tube_conditions.keys() } if tube_file_or_frame_key not in tubes: # if True: channels = { experiment.metadata[c]["fcs_name"]: c for c in experiment.channels } name_metadata = experiment.metadata['name_metadata'] if (self.controls != {}): # make a little Experiment check_tube(controls[tube_file_or_frame_key], experiment) tube_exp = ImportOp(tubes=[ Tube(file=controls[tube_file_or_frame_key], conditions=tube_conditions) ], conditions=conditions, channels=channels, name_metadata=name_metadata).apply() else: tube_exp = ImportOp(tubes=[ Tube(frame=controls[tube_file_or_frame_key], conditions=tube_conditions) ], conditions=conditions, channels=channels, name_metadata=name_metadata).apply() # apply previous operations for op in experiment.history: if hasattr(op, 'by'): for by in op.by: if 'experiment' in experiment.metadata[by]: raise util.CytoflowOpError( 'experiment', "Prior to applying this operation, " "you must not apply any operation with 'by' " "set to an experimental condition.") tube_exp = op.apply(tube_exp) # subset the events if subset: try: tube_exp = tube_exp.query(subset) except Exception as e: raise util.CytoflowOpError( 'subset', "Subset string '{0}' isn't valid".format( subset)) from e if len(tube_exp.data) == 0: raise util.CytoflowOpError( 'subset', "Subset string '{0}' returned no events".format( subset)) tube_data = tube_exp.data tubes[tube_file_or_frame_key] = tube_data data = tubes[tube_file_or_frame_key][[from_channel, to_channel]].copy() data = data[data[from_channel] > 0] data = data[data[to_channel] > 0] _ = data.reset_index(drop=True, inplace=True) # self._sample[(from_channel, to_channel)] = data.sample(n = min(len(data), 5000)) self._sample[(from_channel, to_channel)] = data.sample(n=min(len(data), 100)) data[from_channel] = np.log10(data[from_channel]) data[to_channel] = np.log10(data[to_channel]) if self.mixture_model: gmm = sklearn.mixture.BayesianGaussianMixture(n_components=2, random_state=1) fit = gmm.fit(data) self._means[(from_channel), (to_channel)] = \ (10 ** fit.means_[0][0], 10 ** fit.means_[1][0]) # pick the component with the maximum mean idx = 0 if fit.means_[0][0] > fit.means_[1][0] else 1 weights = [x[idx] for x in fit.predict_proba(data)] else: weights = [1] * len(data.index) if self.linear_model: # this mimics the TASBE approach, which constrains the fit to # a multiplicative scaling (eg, a linear fit with an intercept # of 0.) I disagree that this is the right approach, which is # why it's not the default. f = lambda x: weights * (data[to_channel] - x[0] * data[ from_channel]) x0 = [1] trans_fn = lambda data, x: np.power(data, x[0]) else: # this code uses a different approach from TASBE. instead of # computing a multiplicative scaling constant, it computes a # full linear regression on the log-scaled data (ie, allowing # the intercept to vary as well as the slope). this is a # more general model of the underlying physical behavior, and # fits the data better -- but it may not be more "correct." f = lambda x: weights * (data[to_channel] - x[0] * data[ from_channel] - x[1]) x0 = [1, 0] trans_fn = lambda data, x: (10**x[1]) * np.power(data, x[0]) opt = scipy.optimize.least_squares(f, x0) self._coefficients[(from_channel, to_channel)] = opt.x self._trans_fn[( from_channel, to_channel)] = lambda data, x=opt.x: trans_fn(data, x) def apply(self, experiment): """Applies the color translation to an experiment Parameters ---------- experiment : Experiment the old_experiment to which this op is applied Returns ------- Experiment a new experiment with the color translation applied. The corrected channels also have the following new metadata: **channel_translation** : Str Which channel was this one translated to? **channel_translation_fn** : Callable (pandas.Series --> pandas.Series) The function that translated this channel """ if experiment is None: raise util.CytoflowOpError('experiment', "No experiment specified") if not self.controls and not self.controls_frames: raise util.CytoflowOpError('controls', "No controls specified") if not self._trans_fn: raise util.CytoflowOpError( None, "Transfer functions aren't set. " "Did you forget to call estimate()?") if (self.controls != {}): controls = self.controls else: controls = self.controls_frames translation = {x[0]: x[1] for x in list(controls.keys())} from_channels = [x[0] for x in list(controls.keys())] for key, val in translation.items(): if (key, val) not in self._coefficients: raise util.CytoflowOpError( None, "Coefficients aren't set for translation " "{} --> {}. Did you call estimate()?".format(key, val)) new_experiment = experiment.clone() for channel in from_channels: new_experiment.data = \ new_experiment.data[new_experiment.data[channel] > 0] for from_channel, to_channel in translation.items(): trans_fn = self._trans_fn[(from_channel, to_channel)] new_experiment[from_channel] = trans_fn(experiment[from_channel]) new_experiment.metadata[from_channel][ 'channel_translation_fn'] = trans_fn new_experiment.metadata[from_channel][ 'channel_translation'] = to_channel new_experiment.history.append( self.clone_traits(transient=lambda _: True)) return new_experiment def default_view(self, **kwargs): """ Returns a diagnostic plot to see if the bleedthrough spline estimation is working. Returns ------- IView A diagnostic view, call :meth:`ColorTranslationDiagnostic.plot` to see the diagnostic plots """ v = ColorTranslationDiagnostic(op=self) v.trait_set(**kwargs) return v
class Stats1DView(Base1DStatisticsView): """ Plot a statistic. The value of the statistic will be plotted on the Y axis; a numeric conditioning variable must be chosen for the X axis. Every variable in the statistic must be specified as either the `variable` or one of the plot facets. Attributes ---------- variable_scale : {'linear', 'log', 'logicle'} The scale applied to the variable (on the X axis) Examples -------- .. plot:: :context: close-figs Make a little data set. >>> import cytoflow as flow >>> import_op = flow.ImportOp() >>> import_op.tubes = [flow.Tube(file = "Plate01/RFP_Well_A3.fcs", ... conditions = {'Dox' : 10.0}), ... flow.Tube(file = "Plate01/CFP_Well_A4.fcs", ... conditions = {'Dox' : 1.0})] >>> import_op.conditions = {'Dox' : 'float'} >>> ex = import_op.apply() Create and a new statistic. .. plot:: :context: close-figs >>> ch_op = flow.ChannelStatisticOp(name = 'MeanByDox', ... channel = 'Y2-A', ... function = flow.geom_mean, ... by = ['Dox']) >>> ex2 = ch_op.apply(ex) View the new statistic .. plot:: :context: close-figs >>> flow.Stats1DView(variable = 'Dox', ... statistic = ('MeanByDox', 'geom_mean'), ... variable_scale = 'log', ... scale = 'log').plot(ex2) """ # traits id = Constant("edu.mit.synbio.cytoflow.view.stats1d") friendly_id = Constant("1D Statistics View") REMOVED_ERROR = Constant( "Statistics changed dramatically in 0.5; please see the documentation") by = util.Removed(err_string=REMOVED_ERROR) yfunction = util.Removed(err_string=REMOVED_ERROR) ychannel = util.Removed(err_string=REMOVED_ERROR) xvariable = util.Deprecated(new="variable") xscale = util.Deprecated(new='variable_scale') variable_scale = util.ScaleEnum def enum_plots(self, experiment): """ Returns an iterator over the possible plots that this View can produce. The values returned can be passed to :meth:`plot`. """ return super().enum_plots(experiment) def plot(self, experiment, plot_name=None, **kwargs): """Plot a chart of a variable's values against a statistic. Parameters ---------- variable_lim : (float, float) The limits on the variable axis color : a matplotlib color The color to plot with. Overridden if `huefacet` is not `None` linewidth : float The width of the line, in points linestyle : ['solid' | 'dashed', 'dashdot', 'dotted' | (offset, on-off-dash-seq) | '-' | '--' | '-.' | ':' | 'None' | ' ' | ''] marker : a matplotlib marker style See http://matplotlib.org/api/markers_api.html#module-matplotlib.markers markersize : int The marker size in points markerfacecolor : a matplotlib color The color to make the markers. Overridden (?) if `huefacet` is not `None` alpha : the alpha blending value, from 0.0 (transparent) to 1.0 (opaque) capsize : scalar The size of the error bar caps, in points shade_error : bool If `False` (the default), plot the error statistic as traditional "error bars." If `True`, plot error statistic as a filled, shaded region. shade_alpha : float The transparency of the shaded error region, from 0.0 (transparent) to 1.0 (opaque.) Default is 0.2. Notes ----- Other `kwargs` are passed to `matplotlib.pyplot.plot <https://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.plot.html>`_ """ if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") if self.variable not in experiment.conditions: raise util.CytoflowError( 'variable', "Variable {} not in the experiment".format(self.variable)) if not util.is_numeric(experiment[self.variable]): raise util.CytoflowError( 'variable', "Variable {} must be numeric".format(self.variable)) variable_scale = util.scale_factory(self.variable_scale, experiment, condition=self.variable) super().plot(experiment, plot_name, variable_scale=variable_scale, **kwargs) def _grid_plot(self, experiment, grid, **kwargs): data = grid.data data_scale = kwargs.pop('scale') variable_scale = kwargs.pop('variable_scale') stat = experiment.statistics[self.statistic] stat_name = stat.name if self.error_statistic[0]: err_stat = experiment.statistics[self.error_statistic] err_stat_name = err_stat.name else: err_stat = None variable_lim = kwargs.pop("variable_lim", None) if variable_lim is None: variable_lim = (variable_scale.clip( data[self.variable].min() * 0.9), variable_scale.clip(data[self.variable].max() * 1.1)) lim = kwargs.pop("lim", None) if lim is None: lim = (data_scale.clip(data[stat_name].min() * 0.9), data_scale.clip(data[stat_name].max() * 1.1)) if self.error_statistic[0]: try: lim = (data_scale.clip( min([x[0] for x in data[err_stat_name]]) * 0.9), data_scale.clip( max([x[1] for x in data[err_stat_name]]) * 1.1)) except (TypeError, IndexError): lim = (data_scale.clip( (data[stat_name].min() - data[err_stat_name].min()) * 0.9), data_scale.clip((data[stat_name].max() + data[err_stat_name].max()) * 1.1)) orientation = kwargs.pop('orientation', 'vertical') capsize = kwargs.pop('capsize', None) shade_error = kwargs.pop('shade_error', False) shade_alpha = kwargs.pop('shade_alpha', 0.2) if orientation == 'vertical': # plot the error bars first so the axis labels don't get overwritten if err_stat is not None: if shade_error: grid.map(_v_error_shade, self.variable, stat_name, err_stat_name, alpha=shade_alpha) else: grid.map(_v_error_bars, self.variable, stat_name, err_stat_name, capsize=capsize) grid.map(plt.plot, self.variable, stat_name, **kwargs) return dict(xscale=variable_scale, xlim=variable_lim, yscale=data_scale, ylim=lim) else: # plot the error bars first so the axis labels don't get overwritten if err_stat is not None: if shade_error: grid.map(_h_error_shade, stat_name, self.variable, err_stat_name, alpha=shade_alpha) else: grid.map(_h_error_bars, stat_name, self.variable, err_stat_name, capsize=capsize) grid.map(plt.plot, stat_name, self.variable, **kwargs) return dict(yscale=variable_scale, ylim=variable_lim, xscale=data_scale, xlim=lim)
class BarChartView(Base1DStatisticsView): """Plots a bar chart of some summary statistic Attributes ---------- name : Str The bar chart's name statistic : Tuple(Str, Str) the statistic we're plotting scale : Enum("linear", "log", "logicle") (default = "linear") The scale to use on the Y axis. variable : Str the name of the conditioning variable to group the chart's bars error_statistic : Tuple(Str, Str) if specified, a statistic to draw error bars. if values are numeric, the bars are drawn +/- the value. if the values are tuples, then the first element is the low error and the second element is the high error. xfacet : Str the conditioning variable for horizontal subplots yfacet : Str the conditioning variable for vertical subplots huefacet : Str the conditioning variable to make multiple bar colors subset : String Passed to pandas.DataFrame.query(), to get a subset of the statistic before we plot it. Examples -------- >>> bar = flow.BarChartView() >>> bar.name = "Bar Chart" >>> bar.channel = 'Y2-A' >>> bar.variable = 'Y2-A+' >>> bar.huefacet = 'Dox' >>> bar.function = len >>> bar.plot(ex) """ # traits id = "edu.mit.synbio.cytoflow.view.barchart" friendly_id = "Bar Chart" orientation = util.Removed( err_string="`orientation` is now a parameter to `plot`") def enum_plots(self, experiment): """ Returns an iterator over the possible plots that this View can produce. The values returned can be passed to "plot". """ return super().enum_plots(experiment) def plot(self, experiment, plot_name=None, **kwargs): """ Plot a bar chart Parameters ---------- orientation : ['vertical', 'horizontal'] Sets the orientation to vertical (the default) or horizontal color : a matplotlib color Sets the colors of all the bars, even if there is a hue facet errwidth : scalar The width of the error bars, in points errcolor : a matplotlib color The color of the error bars capsize : scalar The size of the error bar caps, in points Other Parameters ---------------- Other `kwargs` are passed to matplotlib.axes.Axes.bar_. .. _matplotlib.axes.Axes.bar_: https://matplotlib.org/devdocs/api/_as_gen/matplotlib.axes.Axes.bar.html See Also -------- BaseView.plot : common parameters for data views """ super().plot(experiment, plot_name, **kwargs) def _grid_plot(self, experiment, grid, xlim, ylim, xscale, yscale, **kwargs): # because the bottom of a bar chart is "0", masking out bad # values on a log scale doesn't work. we must clip instead. orient = kwargs.pop('orientation', 'vertical') # Base1DStatistic uses xscale for the variable and yscale for # the statistic. if yscale.name == "log": yscale.mode = "clip" # set the scale for each set of axes; can't just call plt.xscale() for ax in grid.axes.flatten(): if orient == 'horizontal': ax.set_xscale(yscale.name, **yscale.mpl_params) elif orient == 'vertical': ax.set_yscale(yscale.name, **yscale.mpl_params) else: raise util.CytoflowViewError( "'orient' param must be 'h' or 'v'") stat = experiment.statistics[self.statistic] map_args = [self.variable, stat.name] if self.huefacet: map_args.append(self.huefacet) if self.error_statistic[0]: error_stat = experiment.statistics[self.error_statistic] map_args.append(error_stat.name) else: error_stat = None grid.map(_barplot, *map_args, view=self, stat_name=stat.name, error_name=error_stat.name if error_stat else None, orient=orient, grid=grid, **kwargs) return {}
class Base2DStatisticsView(BaseStatisticsView): STATS_REMOVED = "{} has been removed. Statistics changed dramatically in 0.5; please see the documentation." xchannel = util.Removed(err_string=STATS_REMOVED) xfunction = util.Removed(err_string=STATS_REMOVED) ychannel = util.Removed(err_string=STATS_REMOVED) yfunction = util.Removed(err_string=STATS_REMOVED) xstatistic = Tuple(Str, Str) ystatistic = Tuple(Str, Str) x_error_statistic = Tuple(Str, Str) y_error_statistic = Tuple(Str, Str) def enum_plots(self, experiment): data = self._make_data(experiment) return super().enum_plots(experiment, data) def plot(self, experiment, plot_name=None, **kwargs): data = self._make_data(experiment) xscale = util.scale_factory(self.xscale, experiment, condition=self.variable) yscale = util.scale_factory(self.yscale, experiment, statistic=self.statistic, error_statistic=self.error_statistic) super().plot(experiment, data, plot_name, xscale=xscale, yscale=yscale, **kwargs) def _make_data(self, experiment): if experiment is None: raise util.CytoflowViewError("No experiment specified") if not self.xstatistic: raise util.CytoflowViewError("X Statistic not set") if self.xstatistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find the statistic {} in the experiment".format( self.xstatistic)) else: xstat = experiment.statistics[self.xstatistic] if not util.is_numeric(xstat): raise util.CytoflowViewError("X statistic must be numeric") if self.x_error_statistic[0]: if self.x_error_statistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find the X error statistic in the experiment") else: x_error_stat = experiment.statistics[self.x_error_statistic] else: x_error_stat = None if x_error_stat is not None: if not xstat.index.equals(x_error_stat.index): raise util.CytoflowViewError( "Data statistic and error statistic " " don't have the same index.") if xstat.name == x_error_stat.name: raise util.CytoflowViewError( "Data statistic and error statistic can " "not have the same name.") if not self.ystatistic: raise util.CytoflowViewError("Y statistic not set") if self.ystatistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find the Y statistic {} in the experiment".format( self.ystatistic)) else: ystat = experiment.statistics[self.ystatistic] if not util.is_numeric(ystat): raise util.CytoflowViewError("Y statistic must be numeric") if self.y_error_statistic[0]: if self.y_error_statistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find the Y error statistic in the experiment") else: y_error_stat = experiment.statistics[self.y_error_statistic] else: y_error_stat = None if y_error_stat is not None: if not ystat.index.equals(y_error_stat.index): raise util.CytoflowViewError( "Data statistic and error statistic " " don't have the same index.") if ystat.name == y_error_stat.name: raise util.CytoflowViewError( "Data statistic and error statistic can " "not have the same name.") if xstat.name == ystat.name: raise util.CytoflowViewError("X and Y statistics can " "not have the same name.") try: ystat.index = ystat.index.reorder_levels(xstat.index.names) ystat.sort_index(inplace=True) except AttributeError: pass intersect_idx = xstat.index.intersection(ystat.index) xstat = xstat.reindex(intersect_idx) xstat.sort_index(inplace=True) ystat = ystat.reindex(intersect_idx) ystat.sort_index(inplace=True) if self.x_error_statistic[0]: if self.x_error_statistic not in experiment.statistics: raise util.CytoflowViewError( "X error statistic not in experiment") else: x_error_stat = experiment.statistics[self.x_error_statistic] if set(x_error_stat.index.names) != set(xstat.index.names): raise util.CytoflowViewError( "X error statistic doesn't have the " "same indices as the X statistic") try: x_error_stat.index = x_error_stat.index.reorder_levels( xstat.index.names) x_error_stat.sort_index(inplace=True) except AttributeError: pass x_error_stat = x_error_stat.reindex(intersect_idx) x_error_stat.sort_index(inplace=True) if not x_error_stat.index.equals(xstat.index): raise util.CytoflowViewError( "X error statistic doesn't have the " "same values as the X statistic") else: x_error_stat = None if self.y_error_statistic[0]: if self.y_error_statistic not in experiment.statistics: raise util.CytoflowViewError( "Y error statistic not in experiment") else: y_error_stat = experiment.statistics[self.y_error_statistic] if set(y_error_stat.index.names) != set(ystat.index.names): raise util.CytoflowViewError( "Y error statistic doesn't have the " "same indices as the Y statistic") try: y_error_stat.index = y_error_stat.index.reorder_levels( ystat.index.names) y_error_stat.sort_index(inplace=True) except AttributeError: pass y_error_stat = y_error_stat.reindex(intersect_idx) y_error_stat.sort_index(inplace=True) if not y_error_stat.index.equals(ystat.index): raise util.CytoflowViewError( "Y error statistic doesn't have the " "same values as the Y statistic") else: y_error_stat = None data = pd.DataFrame(index=xstat.index) data[xstat.name] = xstat data[ystat.name] = ystat if x_error_stat is not None: data[x_error_stat.name] = x_error_stat if y_error_stat is not None: data[y_error_stat.name] = y_error_stat return data
class Stats2DView(HasStrictTraits): """ Plot two statistics on a scatter plot. A point (X,Y) is drawn for every pair of elements with the same value of `variable`; the X value is from `xstatistic` and the Y value is from `ystatistic`. Attributes ---------- name : Str The plot's name variable : Str the name of the conditioning variable xstatistic : Tuple(Str, Str) The statistic to plot on the X axis. Must have the same indices as `ystatistic`. xscale : Enum("linear", "log", "logicle") (default = "linear") What scale to use on the X axis ystatistic : Tuple(Str, Str) The statistic to plot on the Y axis. Must have the same indices as `xstatistic`. yscale : Enum("linear", "log", "logicle") (default = "linear") What scale to use on the Y axis xfacet : Str the conditioning variable for horizontal subplots yfacet : Str the conditioning variable for vertical subplots huefacet : the conditioning variable for color. huescale : Enum("linear", "log", "logicle") (default = "linear") scale for the hue facet, if there are a lot of hue values. x_error_statistic, y_error_statistic : Tuple(Str, Str) if specified, draw error bars. must be the name of a statistic, with the same indices as `xstatistic` and `ystatistic`. subset : Str What subset of the data to plot? Examples -------- Assume we want an input-output curve for a repressor that's under the control of a Dox-inducible promoter. We have an "input" channel `(Dox --> eYFP, FITC-A channel)` and an output channel `(Dox --> repressor --| eBFP, Pacific Blue channel)` as well as a constitutive expression channel (mKate, PE-Tx-Red-YG-A channel). We have induced several wells with different amounts of Dox. We want to plot the relationship between the input and output channels (binned by input channel intensity) as we vary Dox, faceted by constitutive channel bin. >>> cfp_bin_op = flow.BinningOp(name = "CFP_Bin", ... channel = "PE-Tx-Red-YG-A", ... scale = "log", ... bin_width = 0.1) >>> ifp_bin_op = flow.BinningOp(name = "IFP_Bin", ... channel = "Pacific Blue-A", ... scale = "log", ... bin_width = 0.1).apply(ex_cfp_binned) >>> ifp_mean = flow.ChannelStatisticOp(name = "IFP", ... channel = "FITC-A", ... by = ["IFP_Bin", "CFP_Bin"], ... function = flow.geom_mean) >>> ofp_mean = flow.ChannelStatisticOp(name = "OFP", ... channel = "Pacific_Blue-A", ... by = ["IFP_Bin", "CFP_Bin"], ... function = flow.geom_mean) >>> ex = cfp_bin_op.apply(ex) >>> ex = ifp_bin_op.apply(ex) >>> ex = ifp_mean.apply(ex) >>> ex = ofp_mean.apply(ex) >>> view = flow.Stats2DView(name = "IFP vs OFP", ... variable = "IFP_Bin", ... xstatistic = ("IFP", "geom_mean"), ... ystatistic = ("OFP", "geom_mean"), ... huefacet = "CFP_Bin").plot(ex_ifp_binned) >>> view.plot(ex_binned) """ # traits id = "edu.mit.synbio.cytoflow.view.stats2d" friendly_id = "2D Statistics View" # deprecated or removed attributes give warnings & errors, respectively by = util.Deprecated( new='variable', err_string="'by' is deprecated, please use 'variable'") STATS_REMOVED = "{} has been removed. Statistics changed dramatically in 0.5; please see the documentation." xchannel = util.Removed(err_string=STATS_REMOVED) xfunction = util.Removed(err_string=STATS_REMOVED) ychannel = util.Removed(err_string=STATS_REMOVED) yfunction = util.Removed(err_string=STATS_REMOVED) name = Str variable = Str xstatistic = Tuple(Str, Str) xscale = util.ScaleEnum ystatistic = Tuple(Str, Str) yscale = util.ScaleEnum xfacet = Str yfacet = Str huefacet = Str huescale = util.ScaleEnum x_error_statistic = Tuple(Str, Str) y_error_statistic = Tuple(Str, Str) subset = Str def enum_plots(self, experiment): """ Returns an iterator over the possible plots that this View can produce. The values returned can be passed to "plot". """ # TODO - all this is copied from below. can we abstract it out somehow? if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.variable: raise util.CytoflowViewError("variable not set") if self.variable not in experiment.conditions: raise util.CytoflowViewError( "variable {0} not in the experiment".format(self.variable)) if not self.xstatistic: raise util.CytoflowViewError("X statistic not set") if self.xstatistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find X statistic {} in experiment".format( self.ystatistic)) else: xstat = experiment.statistics[self.xstatistic] if not self.ystatistic: raise util.CytoflowViewError("Y statistic not set") if self.ystatistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find Y statistic {} in experiment".format( self.ystatistic)) else: ystat = experiment.statistics[self.ystatistic] if not xstat.index.equals(ystat.index): raise util.CytoflowViewError( "X statistic and Y statistic must have " "the same indices: {}".format(xstat.index.names)) if self.x_error_statistic[0]: if self.x_error_statistic not in experiment.statistics: raise util.CytoflowViewError( "X error statistic not in experiment") else: x_error_stat = experiment.statistics[self.x_error_statistic] if not x_error_stat.index.equals(xstat.index): raise util.CytoflowViewError( "X error statistic doesn't have the " "same indices as the X statistic") else: x_error_stat = None if self.y_error_statistic[0]: if self.y_error_statistic not in experiment.statistics: raise util.CytoflowViewError( "Y error statistic not in experiment") else: y_error_stat = experiment.statistics[self.y_error_statistic] if not y_error_stat.index.equals(ystat.index): raise util.CytoflowViewError( "Y error statistic doesn't have the " "same indices as the Y statistic") else: y_error_stat = None data = pd.DataFrame(index=xstat.index) xname = util.random_string(6) data[xname] = xstat yname = util.random_string(6) data[yname] = ystat if x_error_stat is not None: #x_error_data = x_error_stat.reset_index() x_error_name = util.random_string(6) data[x_error_name] = x_error_stat if y_error_stat is not None: y_error_name = util.random_string(6) data[y_error_name] = y_error_stat if y_error_stat is not None: y_error_data = y_error_stat.reset_index() y_error_name = util.random_string() data[y_error_name] = y_error_data[y_error_stat.name] if self.subset: try: # TODO - either sanitize column names, or check to see that # all conditions are valid Python variables data = data.query(self.subset) except: raise util.CytoflowViewError( "Subset string '{0}' isn't valid".format(self.subset)) if len(data) == 0: raise util.CytoflowViewError( "Subset string '{0}' returned no values".format( self.subset)) names = list(data.index.names) for name in names: unique_values = data.index.get_level_values(name).unique() if len(unique_values) == 1: warn("Only one value for level {}; dropping it.".format(name), util.CytoflowViewWarning) try: data.index = data.index.droplevel(name) except AttributeError: raise util.CytoflowViewError("Must have more than one " "value to plot.") names = list(data.index.names) if not self.variable in experiment.conditions: raise util.CytoflowViewError( "Variable {} not in experiment".format(self.variable)) if not self.variable in data.index.names: raise util.CytoflowViewError( "Variable {} not in statistic; must be one of {}".format( self.variable, data.index.names)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError( "X facet {} not in the experiment".format(self.xfacet)) if self.xfacet and self.xfacet not in data.index.names: raise util.CytoflowViewError( "X facet {} not in statistics; must be one of {}".format( self.xfacet, data.index.names)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError( "Y facet {} not in the experiment".format(self.yfacet)) if self.yfacet and self.yfacet not in data.index.names: raise util.CytoflowViewError( "Y facet {} not in statistics; must be one of {}".format( self.yfacet, data.index.names)) if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError( "Hue facet {} not in the experiment".format(self.huefacet)) if self.huefacet and self.huefacet not in data.index.names: raise util.CytoflowViewError( "Hue facet {} not in statistics; must be one of {}".format( self.huefacet, data.index.names)) facets = filter( lambda x: x, [self.variable, self.xfacet, self.yfacet, self.huefacet]) if len(facets) != len(set(facets)): raise util.CytoflowViewError("Can't reuse facets") by = list(set(names) - set(facets)) class plot_enum(object): def __init__(self, experiment, by): self._iter = None self._returned = False if by: self._iter = experiment.data.groupby(by).__iter__() def __iter__(self): return self def next(self): if self._iter: return self._iter.next()[0] else: if self._returned: raise StopIteration else: self._returned = True return None return plot_enum(experiment, by) def plot(self, experiment, plot_name=None, **kwargs): """Plot a bar chart""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.variable: raise util.CytoflowViewError("variable not set") if self.variable not in experiment.conditions: raise util.CytoflowViewError( "variable {0} not in the experiment".format(self.variable)) if not self.xstatistic: raise util.CytoflowViewError("X statistic not set") if self.xstatistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find X statistic {} in experiment".format( self.ystatistic)) else: xstat = experiment.statistics[self.xstatistic] if not self.ystatistic: raise util.CytoflowViewError("Y statistic not set") if self.ystatistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find Y statistic {} in experiment".format( self.ystatistic)) else: ystat = experiment.statistics[self.ystatistic] if not xstat.index.equals(ystat.index): raise util.CytoflowViewError( "X statistic and Y statistic must have " "the same indices: {}".format(xstat.index.names)) if self.x_error_statistic[0]: if self.x_error_statistic not in experiment.statistics: raise util.CytoflowViewError( "X error statistic not in experiment") else: x_error_stat = experiment.statistics[self.x_error_statistic] if not x_error_stat.index.equals(xstat.index): raise util.CytoflowViewError( "X error statistic doesn't have the " "same indices as the X statistic") else: x_error_stat = None if self.y_error_statistic[0]: if self.y_error_statistic not in experiment.statistics: raise util.CytoflowViewError( "Y error statistic not in experiment") else: y_error_stat = experiment.statistics[self.y_error_statistic] if not y_error_stat.index.equals(ystat.index): raise util.CytoflowViewError( "Y error statistic doesn't have the " "same indices as the Y statistic") else: y_error_stat = None col_wrap = kwargs.pop('col_wrap', None) if col_wrap and self.yfacet: raise util.CytoflowViewError( "Can't set yfacet and col_wrap at the same time.") data = pd.DataFrame(index=xstat.index) xname = util.random_string(6) data[xname] = xstat yname = util.random_string(6) data[yname] = ystat if x_error_stat is not None: #x_error_data = x_error_stat.reset_index() x_error_name = util.random_string(6) data[x_error_name] = x_error_stat if y_error_stat is not None: y_error_name = util.random_string(6) data[y_error_name] = y_error_stat if y_error_stat is not None: y_error_data = y_error_stat.reset_index() y_error_name = util.random_string() data[y_error_name] = y_error_data[y_error_stat.name] if self.subset: try: # TODO - either sanitize column names, or check to see that # all conditions are valid Python variables data = data.query(self.subset) except: raise util.CytoflowViewError( "Subset string '{0}' isn't valid".format(self.subset)) if len(data) == 0: raise util.CytoflowViewError( "Subset string '{0}' returned no values".format( self.subset)) names = list(data.index.names) for name in names: unique_values = data.index.get_level_values(name).unique() if len(unique_values) == 1: warn("Only one value for level {}; dropping it.".format(name), util.CytoflowViewWarning) try: data.index = data.index.droplevel(name) except AttributeError: raise util.CytoflowViewError("Must have more than one " "value to plot.") names = list(data.index.names) if not self.variable in experiment.conditions: raise util.CytoflowViewError( "Variable {} not in experiment".format(self.variable)) if not self.variable in data.index.names: raise util.CytoflowViewError( "Variable {} not in statistic; must be one of {}".format( self.variable, data.index.names)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError( "X facet {} not in the experiment".format(self.xfacet)) if self.xfacet and self.xfacet not in data.index.names: raise util.CytoflowViewError( "X facet {} not in statistics; must be one of {}".format( self.xfacet, data.index.names)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError( "Y facet {} not in the experiment".format(self.yfacet)) if self.yfacet and self.yfacet not in data.index.names: raise util.CytoflowViewError( "Y facet {} not in statistics; must be one of {}".format( self.yfacet, data.index.names)) if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError( "Hue facet {} not in the experiment".format(self.huefacet)) if self.huefacet and self.huefacet not in data.index.names: raise util.CytoflowViewError( "Hue facet {} not in statistics; must be one of {}".format( self.huefacet, data.index.names)) col_wrap = kwargs.pop('col_wrap', None) if col_wrap and self.yfacet: raise util.CytoflowViewError( "Can't set yfacet and col_wrap at the same time.") if col_wrap and not self.xfacet: raise util.CytoflowViewError("Must set xfacet to use col_wrap.") facets = filter( lambda x: x, [self.variable, self.xfacet, self.yfacet, self.huefacet]) if len(facets) != len(set(facets)): raise util.CytoflowViewError("Can't reuse facets") unused_names = list(set(names) - set(facets)) if plot_name is not None and not unused_names: raise util.CytoflowViewError("You specified a plot name, but all " "the facets are already used") data.reset_index(inplace=True) if unused_names: groupby = data.groupby(unused_names) if plot_name is None: raise util.CytoflowViewError( "You must use facets {} in either the " "plot variables or the plot name. " "Possible plot names: {}".format(unused_names, groupby.groups.keys())) if plot_name not in set(groupby.groups.keys()): raise util.CytoflowViewError( "Plot {} not from plot_enum; must " "be one of {}".format(plot_name, groupby.groups.keys())) data = groupby.get_group(plot_name) if self.x_error_statistic is not None: xscale = util.scale_factory(self.xscale, experiment, statistic=self.xstatistic) else: xscale = util.scale_factory(self.xscale, experiment, statistic=self.x_error_statistic) if self.y_error_statistic is not None: yscale = util.scale_factory(self.yscale, experiment, statistic=self.ystatistic) else: yscale = util.scale_factory(self.yscale, experiment, statistic=self.y_error_statistic) xlim = kwargs.pop("xlim", None) if xlim is None: xlim = (xscale.clip(data[xname].min() * 0.9), xscale.clip(data[xname].max() * 1.1)) if x_error_stat is not None: try: xlim = (xscale.clip( min([x[0] for x in x_error_stat]) * 0.9), xscale.clip( max([x[1] for x in x_error_stat]) * 1.1)) except IndexError: xlim = (xscale.clip(x_error_stat.min() * 0.9), xscale.clip(x_error_stat.max() * 1.1)) ylim = kwargs.pop("ylim", None) if ylim is None: ylim = (yscale.clip(data[yname].min() * 0.9), yscale.clip(data[yname].max() * 1.1)) if y_error_stat is not None: try: ylim = (yscale.clip( min([x[0] for x in y_error_stat]) * 0.9), yscale.clip( max([x[1] for x in y_error_stat]) * 1.1)) except IndexError: ylim = (yscale.clip(y_error_stat.min() * 0.9), yscale.clip(y_error_stat.max() * 1.1)) kwargs.setdefault('antialiased', True) cols = col_wrap if col_wrap else \ len(data[self.xfacet].unique()) if self.xfacet else 1 sharex = kwargs.pop('sharex', True) sharey = kwargs.pop('sharey', True) grid = sns.FacetGrid(data, size=(6 / cols), aspect=1.5, col=(self.xfacet if self.xfacet else None), row=(self.yfacet if self.yfacet else None), hue=(self.huefacet if self.huefacet else None), col_order=(np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order=(np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order=(np.sort(data[self.huefacet].unique()) if self.huefacet else None), col_wrap=col_wrap, legend_out=False, sharex=sharex, sharey=sharey, xlim=xlim, ylim=ylim) for ax in grid.axes.flatten(): ax.set_xscale(self.xscale, **xscale.mpl_params) ax.set_yscale(self.yscale, **yscale.mpl_params) # plot the error bars first so the axis labels don't get overwritten if x_error_stat: grid.map(_x_error_bars, xname, yname, x_error_name) if y_error_stat: grid.map(_y_error_bars, xname, yname, y_error_name) grid.map(plt.plot, xname, yname, **kwargs) # if we have an xfacet, make sure the y scale is the same for each fig = plt.gcf() fig_y_min = float("inf") fig_y_max = float("-inf") for ax in fig.get_axes(): ax_y_min, ax_y_max = ax.get_ylim() if ax_y_min < fig_y_min: fig_y_min = ax_y_min if ax_y_max > fig_y_max: fig_y_max = ax_y_max for ax in fig.get_axes(): ax.set_ylim(fig_y_min, fig_y_max) # if we have a yfacet, make sure the x scale is the same for each fig = plt.gcf() fig_x_min = float("inf") fig_x_max = float("-inf") for ax in fig.get_axes(): ax_x_min, ax_x_max = ax.get_xlim() if ax_x_min < fig_x_min: fig_x_min = ax_x_min if ax_x_max > fig_x_max: fig_x_max = ax_x_max # if we have a hue facet and a lot of hues, make a color bar instead # of a super-long legend. if self.huefacet: current_palette = mpl.rcParams['axes.color_cycle'] if util.is_numeric(experiment.data[self.huefacet]) and \ len(grid.hue_names) > len(current_palette): plot_ax = plt.gca() cmap = mpl.colors.ListedColormap( sns.color_palette("husl", n_colors=len(grid.hue_names))) cax, _ = mpl.colorbar.make_axes(plt.gca()) hue_scale = util.scale_factory(self.huescale, experiment, condition=self.huefacet) mpl.colorbar.ColorbarBase(cax, cmap=cmap, norm=hue_scale.color_norm(), label=self.huefacet) plt.sca(plot_ax) else: grid.add_legend(title=self.huefacet) plt.xlabel(self.xstatistic) plt.ylabel(self.ystatistic) if unused_names and plot_name is not None: plt.title("{0} = {1}".format(unused_names, plot_name))
class TableView(HasStrictTraits): """ "Plot" a tabular view of a statistic. Mostly useful for GUIs. Each level of the statistic's index must be used in :attr:`row_facet`, :attr:`column_facet`, :attr:`subrow_facet`, or :attr:`subcolumn_facet`. This module can't "plot" a statistic with more than four index levels unless :attr:`subset` is set and that results in extra levels being dropped. Attributes ---------- statistic : (str, str) The name of the statistic to plot. Must be a key in the :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment` being plotted. Each level of the statistic's index must be used in :attr:`row_facet`, :attr:`column_facet`, :attr:`subrow_facet`, or :attr:`subcolumn_facet`. row_facet, column_facet : str The statistic facets to be used as row and column headers. subrow_facet, subcolumn_facet : str The statistic facets to be used as subrow and subcolumn headers. subset : str A Python expression used to select a subset of the statistic to plot. Examples -------- Make a little data set. .. plot:: :context: close-figs >>> import cytoflow as flow >>> import_op = flow.ImportOp() >>> import_op.tubes = [flow.Tube(file = "Plate01/RFP_Well_A3.fcs", ... conditions = {'Dox' : 10.0}), ... flow.Tube(file = "Plate01/CFP_Well_A4.fcs", ... conditions = {'Dox' : 1.0})] >>> import_op.conditions = {'Dox' : 'float'} >>> ex = import_op.apply() Add a threshold gate .. plot:: :context: close-figs >>> ex2 = flow.ThresholdOp(name = 'Threshold', ... channel = 'Y2-A', ... threshold = 2000).apply(ex) Add a statistic .. plot:: :context: close-figs >>> ex3 = flow.ChannelStatisticOp(name = "ByDox", ... channel = "Y2-A", ... by = ['Dox', 'Threshold'], ... function = len).apply(ex2) "Plot" the table .. plot:: :context: close-figs >>> flow.TableView(statistic = ("ByDox", "len"), ... row_facet = "Dox", ... column_facet = "Threshold").plot(ex3) """ # traits id = Constant("edu.mit.synbio.cytoflow.view.table") friendly_id = Constant("Table View") REMOVED_ERROR = Constant("Statistics have changed dramatically in 0.5; please see the documentation") channel = util.Removed(err_string = REMOVED_ERROR) function = util.Removed(err_string = REMOVED_ERROR) statistic = Tuple(Str, Str) row_facet = Str subrow_facet = Str column_facet = Str subcolumn_facet = Str subset = Str def plot(self, experiment, plot_name = None, **kwargs): """Plot a table""" if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") if self.statistic not in experiment.statistics: raise util.CytoflowViewError('statistic', "Can't find the statistic {} in the experiment" .format(self.statistic)) else: stat = experiment.statistics[self.statistic] data = pd.DataFrame(index = stat.index) data[stat.name] = stat if self.subset: try: data = data.query(self.subset) except Exception as e: raise util.CytoflowViewError('subset', "Subset string '{0}' isn't valid" .format(self.subset)) from e if len(data) == 0: raise util.CytoflowViewError('subset', "Subset string '{0}' returned no values" .format(self.subset)) names = list(data.index.names) for name in names: unique_values = data.index.get_level_values(name).unique() if len(unique_values) == 1: warn("Only one value for level {}; dropping it.".format(name), util.CytoflowViewWarning) try: data.index = data.index.droplevel(name) except AttributeError as e: raise util.CytoflowViewError(None, "Must have more than one " "value to plot.") from e if not (self.row_facet or self.column_facet): raise util.CytoflowViewError('row_facet', "Must set at least one of row_facet " "or column_facet") if self.subrow_facet and not self.row_facet: raise util.CytoflowViewError('subrow_facet', "Must set row_facet before using " "subrow_facet") if self.subcolumn_facet and not self.column_facet: raise util.CytoflowViewError('subcolumn_facet', "Must set column_facet before using " "subcolumn_facet") if self.row_facet and self.row_facet not in experiment.conditions: raise util.CytoflowViewError('row_facet', "Row facet {} not in the experiment, " "must be one of {}" .format(self.row_facet, experiment.conditions)) if self.row_facet and self.row_facet not in data.index.names: raise util.CytoflowViewError('row_facet', "Row facet {} not a statistic index; " "must be one of {}" .format(self.row_facet, data.index.names)) if self.subrow_facet and self.subrow_facet not in experiment.conditions: raise util.CytoflowViewError('subrow_facet', "Subrow facet {} not in the experiment, " "must be one of {}" .format(self.subrow_facet, experiment.conditions)) if self.subrow_facet and self.subrow_facet not in data.index.names: raise util.CytoflowViewError('subrow_facet', "Subrow facet {} not a statistic index; " "must be one of {}" .format(self.subrow_facet, data.index.names)) if self.column_facet and self.column_facet not in experiment.conditions: raise util.CytoflowViewError('column_facet', "Column facet {} not in the experiment, " "must be one of {}" .format(self.column_facet, experiment.conditions)) if self.column_facet and self.column_facet not in data.index.names: raise util.CytoflowViewError('column_facet', "Column facet {} not a statistic index; " "must be one of {}" .format(self.column_facet, data.index.names)) if self.subcolumn_facet and self.subcolumn_facet not in experiment.conditions: raise util.CytoflowViewError('subcolumn_facet', "Subcolumn facet {} not in the experiment, " "must be one of {}" .format(self.subcolumn_facet, experiment.conditions)) if self.subcolumn_facet and self.subcolumn_facet not in data.index.names: raise util.CytoflowViewError('subcolumn_facet', "Subcolumn facet {} not a statistic index; " "must be one of {}" .format(self.subcolumn_facet, data.index.names)) facets = [x for x in [self.row_facet, self.subrow_facet, self.column_facet, self.subcolumn_facet] if x] if len(facets) != len(set(facets)): raise util.CytoflowViewError(None, "Can't reuse facets") if set(facets) != set(data.index.names): raise util.CytoflowViewError(None, "Must use all the statistic indices as variables or facets: {}" .format(data.index.names)) row_groups = data.index.get_level_values(self.row_facet).unique() \ if self.row_facet else [None] subrow_groups = data.index.get_level_values(self.subrow_facet).unique() \ if self.subrow_facet else [None] col_groups = data.index.get_level_values(self.column_facet).unique() \ if self.column_facet else [None] subcol_groups = data.index.get_level_values(self.subcolumn_facet).unique() \ if self.subcolumn_facet else [None] row_offset = (self.column_facet != "") + (self.subcolumn_facet != "") col_offset = (self.row_facet != "") + (self.subrow_facet != "") num_cols = len(col_groups) * len(subcol_groups) + col_offset fig = plt.figure() ax = fig.add_subplot(111) # hide the plot axes that matplotlib tries to make ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) for sp in ax.spines.values(): sp.set_color('w') sp.set_zorder(0) loc = 'upper left' bbox = None t = Table(ax, loc, bbox, **kwargs) t.auto_set_font_size(False) for c in range(num_cols): t.auto_set_column_width(c) width = [0.2] * num_cols height = t._approx_text_height() * 1.8 # make the main table for (ri, r) in enumerate(row_groups): for (rri, rr) in enumerate(subrow_groups): for (ci, c) in enumerate(col_groups): for (cci, cc) in enumerate(subcol_groups): row_idx = ri * len(subrow_groups) + rri + row_offset col_idx = ci * len(subcol_groups) + cci + col_offset # this is not pythonic, but i'm tired agg_idx = [] for data_idx in data.index.names: if data_idx == self.row_facet: agg_idx.append(r) elif data_idx == self.subrow_facet: agg_idx.append(rr) elif data_idx == self.column_facet: agg_idx.append(c) elif data_idx == self.subcolumn_facet: agg_idx.append(cc) agg_idx = tuple(agg_idx) if len(agg_idx) == 1: agg_idx = agg_idx[0] try: text = "{:g}".format(data.loc[agg_idx][stat.name]) except ValueError: text = data.loc[agg_idx][stat.name] t.add_cell(row_idx, col_idx, width = width[col_idx], height = height, text = text) # row headers if self.row_facet: for (ri, r) in enumerate(row_groups): row_idx = ri * len(subrow_groups) + row_offset try: text = "{0} = {1:g}".format(self.row_facet, r) except ValueError: text = "{0} = {1}".format(self.row_facet, r) t.add_cell(row_idx, 0, width = width[0], height = height, text = text) # subrow headers if self.subrow_facet: for (ri, r) in enumerate(row_groups): for (rri, rr) in enumerate(subrow_groups): row_idx = ri * len(subrow_groups) + rri + row_offset try: text = "{0} = {1:g}".format(self.subrow_facet, rr) except ValueError: text = "{0} = {1}".format(self.subrow_facet, rr) t.add_cell(row_idx, 1, width = width[1], height = height, text = text) # column headers if self.column_facet: for (ci, c) in enumerate(col_groups): col_idx = ci * len(subcol_groups) + col_offset try: text = "{0} = {1:g}".format(self.column_facet, c) except ValueError: text = "{0} = {1}".format(self.column_facet, c) t.add_cell(0, col_idx, width = width[col_idx], height = height, text = text) # subcolumn headers if self.subcolumn_facet: for (ci, c) in enumerate(col_groups): for (cci, cc) in enumerate(subcol_groups): col_idx = ci * len(subcol_groups) + cci + col_offset try: text = "{0} = {1:g}".format(self.subcolumn_facet, cc) except ValueError: text = "{0} = {1}".format(self.subcolumn_facet, cc) t.add_cell(1, col_idx, width = width[col_idx], height = height, text = text) ax.add_table(t)
class TableView(HasStrictTraits): # traits id = "edu.mit.synbio.cytoflow.view.table" friendly_id = "Table View" REMOVED_ERROR = "Statistics have changed dramatically in 0.5; please see the documentation" channel = util.Removed(err_string=REMOVED_ERROR) function = util.Removed(err_string=REMOVED_ERROR) name = Str statistic = Tuple(Str, Str) row_facet = Str subrow_facet = Str column_facet = Str subcolumn_facet = Str subset = Str def plot(self, experiment, plot_name=None, **kwargs): """Plot a table""" if not experiment: raise util.CytoflowViewError("No experiment specified") if self.statistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find the statistic {} in the experiment".format( self.statistic)) else: stat = experiment.statistics[self.statistic] data = pd.DataFrame(index=stat.index) data[stat.name] = stat if self.subset: try: data = data.query(self.subset) except: raise util.CytoflowViewError( "Subset string '{0}' isn't valid".format(self.subset)) if len(data) == 0: raise util.CytoflowViewError( "Subset string '{0}' returned no values".format( self.subset)) names = list(data.index.names) for name in names: unique_values = data.index.get_level_values(name).unique() if len(unique_values) == 1: warn("Only one value for level {}; dropping it.".format(name), util.CytoflowViewWarning) try: data.index = data.index.droplevel(name) except AttributeError: raise util.CytoflowViewError("Must have more than one " "value to plot.") if not (self.row_facet or self.column_facet): raise util.CytoflowViewError("Must set at least one of row_facet " "or column_facet") if self.subrow_facet and not self.row_facet: raise util.CytoflowViewError("Must set row_facet before using " "subrow_facet") if self.subcolumn_facet and not self.column_facet: raise util.CytoflowViewError("Must set column_facet before using " "subcolumn_facet") if self.row_facet and self.row_facet not in experiment.conditions: raise util.CytoflowViewError( "Row facet {} not in the experiment".format(self.row_facet)) if self.row_facet and self.row_facet not in data.index.names: raise util.CytoflowViewError("Row facet {} not a statistic index; " "must be one of {}".format( self.row_facet, data.index.names)) if self.subrow_facet and self.subrow_facet not in experiment.conditions: raise util.CytoflowViewError( "Subrow facet {} not in the experiment".format( self.subrow_facet)) if self.subrow_facet and self.subrow_facet not in data.index.names: raise util.CytoflowViewError( "Subrow facet {} not a statistic index; " "must be one of {}".format(self.subrow_facet, data.index.names)) if self.column_facet and self.column_facet not in experiment.conditions: raise util.CytoflowViewError( "Column facet {} not in the experiment".format( self.column_facet)) if self.column_facet and self.column_facet not in data.index.names: raise util.CytoflowViewError( "Column facet {} not a statistic index; " "must be one of {}".format(self.column_facet, data.index.names)) if self.subcolumn_facet and self.subcolumn_facet not in experiment.conditions: raise util.CytoflowViewError( "Subcolumn facet {} not in the experiment".format( self.subcolumn_facet)) if self.subcolumn_facet and self.subcolumn_facet not in data.index.names: raise util.CytoflowViewError( "Subcolumn facet {} not a statistic index; " "must be one of {}".format(self.subcolumn_facet, data.index.names)) facets = filter(lambda x: x, [ self.row_facet, self.subrow_facet, self.column_facet, self.subcolumn_facet ]) if len(facets) != len(set(facets)): raise util.CytoflowViewError("Can't reuse facets") if set(facets) != set(data.index.names): raise util.CytoflowViewError( "Must use all the statistic indices as variables or facets: {}" .format(data.index.names)) row_groups = data.index.get_level_values(self.row_facet).unique() \ if self.row_facet else [None] subrow_groups = data.index.get_level_values(self.subrow_facet).unique() \ if self.subrow_facet else [None] col_groups = data.index.get_level_values(self.column_facet).unique() \ if self.column_facet else [None] subcol_groups = data.index.get_level_values(self.subcolumn_facet).unique() \ if self.subcolumn_facet else [None] row_offset = (self.column_facet != "") + (self.subcolumn_facet != "") col_offset = (self.row_facet != "") + (self.subrow_facet != "") num_cols = len(col_groups) * len(subcol_groups) + col_offset fig = plt.figure() ax = fig.add_subplot(111) # hide the plot axes that matplotlib tries to make ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) for sp in ax.spines.itervalues(): sp.set_color('w') sp.set_zorder(0) loc = 'best' bbox = None t = Table(ax, loc, bbox, **kwargs) width = [1.0 / num_cols] * num_cols height = t._approx_text_height() * 1.8 # make the main table for (ri, r) in enumerate(row_groups): for (rri, rr) in enumerate(subrow_groups): for (ci, c) in enumerate(col_groups): for (cci, cc) in enumerate(subcol_groups): row_idx = ri * len(subrow_groups) + rri + row_offset col_idx = ci * len(subcol_groups) + cci + col_offset # this is not pythonic, but i'm tired agg_idx = [] for data_idx in data.index.names: if data_idx == self.row_facet: agg_idx.append(r) elif data_idx == self.subrow_facet: agg_idx.append(rr) elif data_idx == self.column_facet: agg_idx.append(c) elif data_idx == self.subcolumn_facet: agg_idx.append(cc) agg_idx = tuple(agg_idx) if len(agg_idx) == 1: agg_idx = agg_idx[0] t.add_cell(row_idx, col_idx, width=width[col_idx], height=height, text=data.loc[agg_idx][stat.name]) # row headers if self.row_facet: for (ri, r) in enumerate(row_groups): row_idx = ri * len(subrow_groups) + row_offset text = "{0} = {1}".format(self.row_facet, r) t.add_cell(row_idx, 0, width=width[0], height=height, text=text) # subrow headers if self.subrow_facet: for (ri, r) in enumerate(row_groups): for (rri, rr) in enumerate(subrow_groups): row_idx = ri * len(subrow_groups) + rri + row_offset text = "{0} = {1}".format(self.subrow_facet, rr) t.add_cell(row_idx, 1, width=width[1], height=height, text=text) # column headers if self.column_facet: for (ci, c) in enumerate(col_groups): col_idx = ci * len(subcol_groups) + col_offset text = "{0} = {1}".format(self.column_facet, c) t.add_cell(0, col_idx, width=width[col_idx], height=height, text=text) # column headers if self.subcolumn_facet: for (ci, c) in enumerate(col_groups): for (cci, cc) in enumerate(subcol_groups): col_idx = ci * len(subcol_groups) + cci + col_offset text = "{0} = {1}".format(self.subcolumn_facet, c) t.add_cell(1, col_idx, width=width[col_idx], height=height, text=text) ax.add_table(t)
class Stats1DView(Base1DStatisticsView): """ Plot a statistic. The value of the statistic will be plotted on the Y axis; a numeric conditioning variable must be chosen for the X axis. Every variable in the statistic must be specified as either the `variable` or one of the plot facets. Attributes ---------- name : Str The plot's name statistic : Tuple(Str, Str) The statistic to plot. The first element is the name of the module that added the statistic, and the second element is the name of the statistic. variable : Str the name of the conditioning variable to put on the X axis. Must be numeric (float or int). xscale : Enum("linear", "log") (default = "linear") The scale to use on the X axis yscale : Enum("linear", "log", "logicle") (default = "linear") The scale to use on the Y axis xfacet : Str the conditioning variable for horizontal subplots yfacet : Str the conditioning variable for vertical subplots huefacet : the conditioning variable for color. huescale : the scale to use on the "hue" axis, if there are many values of the hue facet. error_statistic : Tuple(Str, Str) A statistic to use to draw error bars; the bars are +- the value of the statistic. subset : String Passed to pandas.DataFrame.query(), to get a subset of the statistic before we plot it. Examples -------- Assume we want a Dox induction curve in a transient transfection experiment. We have induced several wells with different amounts of Dox and the output of the Dox-inducible channel is "Pacific Blue-A". We have a constitutive expression channel in "PE-Tx-Red-YG-A". We want to bin all the data by constitutive expression level, then plot the dose-response (geometric mean) curve in each bin. >>> ex_bin = flow.BinningOp(name = "CFP_Bin", ... channel = "PE-Tx-Red-YG-A", ... scale = "log", ... bin_width = 0.1).apply(ex) >>> ex_stat = flow.ChannelStatisticOp(name = "DoxCFP", ... by = ["Dox", "CFP_Bin"], ... channel = "Pacific Blue-A", ... function = flow.geom_mean).apply(ex_bin) >>> view = flow.Stats1DView(name = "Dox vs IFP", ... statistic = ("DoxCFP", "geom_mean"), ... variable = "Dox", ... xscale = "log", ... huefacet = "CFP_Bin").plot(ex_stat) >>> view.plot(ex_stat) """ # traits id = "edu.mit.synbio.cytoflow.view.stats1d" friendly_id = "1D Statistics View" REMOVED_ERROR = "Statistics changed dramatically in 0.5; please see the documentation" by = util.Removed(err_string=REMOVED_ERROR) yfunction = util.Removed(err_string=REMOVED_ERROR) ychannel = util.Removed(err_string=REMOVED_ERROR) xvariable = util.Deprecated(new="variable") def enum_plots(self, experiment): """ Returns an iterator over the possible plots that this View can produce. The values returned can be passed to "plot". """ return super().enum_plots(experiment) def plot(self, experiment, plot_name=None, **kwargs): """Plot a chart of a variable's values against a statistic. Parameters ---------- color : a matplotlib color The color to plot with. Overridden if `huefacet` is not `None` linestyle : ['solid' | 'dashed', 'dashdot', 'dotted' | (offset, on-off-dash-seq) | '-' | '--' | '-.' | ':' | 'None' | ' ' | ''] marker : a matplotlib marker style See http://matplotlib.org/api/markers_api.html#module-matplotlib.markers markersize : int The marker size in points markerfacecolor : a matplotlib color The color to make the markers. Overridden (?) if `huefacet` is not `None` alpha : the alpha blending value, from 0.0 (transparent) to 1.0 (opaque) Other Parameters ---------------- Other `kwargs` are passed to matplotlib.pyplot.plot_. .. _matplotlib.pyplot.hist: https://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.plot.html See Also -------- BaseView.plot : common parameters for data views """ super().plot(experiment, plot_name, **kwargs) def _grid_plot(self, experiment, grid, xlim, ylim, xscale, yscale, **kwargs): data = grid.data stat = experiment.statistics[self.statistic] stat_name = stat.name if self.error_statistic[0]: err_stat = experiment.statistics[self.error_statistic] err_stat_name = err_stat.name xlim = kwargs.pop("xlim", None) if xlim is None: xlim = (xscale.clip(data[self.variable].min() * 0.9), xscale.clip(data[self.variable].max() * 1.1)) ylim = kwargs.pop("ylim", None) if ylim is None: ylim = (yscale.clip(data[stat_name].min() * 0.9), yscale.clip(data[stat_name].max() * 1.1)) if self.error_statistic[0]: try: ylim = (yscale.clip( min([x[0] for x in data[err_stat_name]]) * 0.9), yscale.clip( max([x[1] for x in data[err_stat_name]]) * 1.1)) except IndexError: ylim = (yscale.clip( (data[stat_name].min() - data[err_stat_name].min()) * 0.9), yscale.clip((data[stat_name].max() + data[err_stat_name].max()) * 1.1)) # plot the error bars first so the axis labels don't get overwritten if self.error_statistic[0]: grid.map(_error_bars, self.variable, stat_name, err_stat_name, **kwargs) grid.map(plt.plot, self.variable, stat_name, **kwargs) return {'xlim': xlim, 'ylim': ylim}
class BarChartView(HasStrictTraits): """Plots a bar chart of some summary statistic Attributes ---------- name : Str The bar chart's name statistic : Tuple(Str, Str) the statistic we're plotting scale : Enum("linear", "log", "logicle") (default = "linear") The scale to use on the Y axis. variable : Str the name of the conditioning variable to group the chart's bars error_statistic : Tuple(Str, Str) if specified, a statistic to draw error bars. if values are numeric, the bars are drawn +/- the value. if the values are tuples, then the first element is the low error and the second element is the high error. xfacet : Str the conditioning variable for horizontal subplots yfacet : Str the conditioning variable for vertical subplots huefacet : Str the conditioning variable to make multiple bar colors orientation : Enum("horizontal", "vertical") do we plot the bar chart horizontally or vertically? TODO - waiting on seaborn v0.6 subset : String Passed to pandas.DataFrame.query(), to get a subset of the statistic before we plot it. Examples -------- >>> bar = flow.BarChartView() >>> bar.name = "Bar Chart" >>> bar.channel = 'Y2-A' >>> bar.variable = 'Y2-A+' >>> bar.huefacet = 'Dox' >>> bar.function = len >>> bar.plot(ex) """ # traits id = "edu.mit.synbio.cytoflow.view.barchart" friendly_id = "Bar Chart" REMOVED_ERROR = "Statistics have changed dramatically in 0.5; please see the documentation" channel = util.Removed(err_string=REMOVED_ERROR) function = util.Removed(err_string=REMOVED_ERROR) error_bars = util.Removed(err_string=REMOVED_ERROR) by = util.Deprecated(new='variable') name = Str statistic = Tuple(Str, Str) scale = util.ScaleEnum variable = Str orientation = Enum("vertical", "horizontal") xfacet = Str yfacet = Str huefacet = Str error_statistic = Tuple(Str, Str) subset = Str def enum_plots(self, experiment): """ Returns an iterator over the possible plots that this View can produce. The values returned can be passed to "plot". """ # TODO - all this is copied from below. can we abstract it out somehow? if not experiment: raise util.CytoflowViewError("No experiment specified") if self.statistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find the statistic {} in the experiment".format( self.statistic)) else: stat = experiment.statistics[self.statistic] if self.error_statistic[0]: if self.error_statistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find the error statistic in the experiment") else: error_stat = experiment.statistics[self.error_statistic] else: error_stat = None if error_stat is not None: if not stat.index.equals(error_stat.index): raise util.CytoflowViewError( "Data statistic and error statistic " " don't have the same index.") data = pd.DataFrame(index=stat.index) data[stat.name] = stat if error_stat is not None: error_name = util.random_string(6) data[error_name] = error_stat else: error_name = None if self.subset: try: data = data.query(self.subset) except: raise util.CytoflowViewError( "Subset string '{0}' isn't valid".format(self.subset)) if len(data) == 0: raise util.CytoflowViewError( "Subset string '{0}' returned no values".format( self.subset)) names = list(data.index.names) for name in names: unique_values = data.index.get_level_values(name).unique() if len(unique_values) == 1: warn("Only one value for level {}; dropping it.".format(name), util.CytoflowViewWarning) try: data.index = data.index.droplevel(name) except AttributeError: raise util.CytoflowViewError("Must have more than one " "value to plot.") names = list(data.index.names) if not self.variable: raise util.CytoflowViewError("variable not specified") if not self.variable in data.index.names: raise util.CytoflowViewError("Variable {} isn't in the statistic; " "must be one of {}".format( self.variable, data.index.names)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError( "X facet {0} isn't in the experiment".format(self.xfacet)) if self.xfacet and self.xfacet not in data.index.names: raise util.CytoflowViewError( "X facet {} is not a statistic index; " "must be one of {}".format(self.xfacet, data.index.names)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError( "Y facet {0} isn't in the experiment".format(self.yfacet)) if self.yfacet and self.yfacet not in data.index.names: raise util.CytoflowViewError( "Y facet {} is not a statistic index; " "must be one of {}".format(self.yfacet, data.index.names)) if self.huefacet and self.huefacet not in experiment.conditions: raise util.CytoflowViewError( "Hue facet {0} isn't in the experiment".format(self.huefacet)) if self.huefacet and self.huefacet not in data.index.names: raise util.CytoflowViewError( "Hue facet {} is not a statistic index; " "must be one of {}".format(self.huefacet, data.index.names)) facets = filter( lambda x: x, [self.variable, self.xfacet, self.yfacet, self.huefacet]) if len(facets) != len(set(facets)): raise util.CytoflowViewError("Can't reuse facets") by = list(set(names) - set(facets)) class plot_enum(object): def __init__(self, experiment, by): self._iter = None self._returned = False if by: self._iter = experiment.data.groupby(by).__iter__() def __iter__(self): return self def next(self): if self._iter: return self._iter.next()[0] else: if self._returned: raise StopIteration else: self._returned = True return None return plot_enum(experiment, by) def plot(self, experiment, plot_name=None, **kwargs): """Plot a bar chart""" if not experiment: raise util.CytoflowViewError("No experiment specified") if self.statistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find the statistic {} in the experiment".format( self.statistic)) else: stat = experiment.statistics[self.statistic] if self.error_statistic[0]: if self.error_statistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find the error statistic in the experiment") else: error_stat = experiment.statistics[self.error_statistic] else: error_stat = None if error_stat is not None: if not stat.index.equals(error_stat.index): raise util.CytoflowViewError( "Data statistic and error statistic " " don't have the same index.") data = pd.DataFrame(index=stat.index) data[stat.name] = stat if error_stat is not None: error_name = util.random_string(6) data[error_name] = error_stat else: error_name = None if self.subset: try: data = data.query(self.subset) except: raise util.CytoflowViewError( "Subset string '{0}' isn't valid".format(self.subset)) if len(data) == 0: raise util.CytoflowViewError( "Subset string '{0}' returned no values".format( self.subset)) names = list(data.index.names) for name in names: unique_values = data.index.get_level_values(name).unique() if len(unique_values) == 1: warn("Only one value for level {}; dropping it.".format(name), util.CytoflowViewWarning) try: data.index = data.index.droplevel(name) except AttributeError: raise util.CytoflowViewError("Must have more than one " "value to plot.") names = list(data.index.names) if not self.variable: raise util.CytoflowViewError("variable not specified") if not self.variable in names: raise util.CytoflowViewError("Variable {} isn't in the statistic; " "must be one of {}".format( self.variable, names)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError( "X facet {0} isn't in the experiment".format(self.xfacet)) if self.xfacet and self.xfacet not in names: raise util.CytoflowViewError( "X facet {} is not a statistic index; " "must be one of {}".format(self.xfacet, names)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError( "Y facet {0} isn't in the experiment".format(self.yfacet)) if self.yfacet and self.yfacet not in names: raise util.CytoflowViewError( "Y facet {} is not a statistic index; " "must be one of {}".format(self.yfacet, names)) if self.huefacet and self.huefacet not in experiment.conditions: raise util.CytoflowViewError( "Hue facet {0} isn't in the experiment".format(self.huefacet)) if self.huefacet and self.huefacet not in names: raise util.CytoflowViewError( "Hue facet {} is not a statistic index; " "must be one of {}".format(self.huefacet, names)) col_wrap = kwargs.pop('col_wrap', None) if col_wrap and self.yfacet: raise util.CytoflowViewError( "Can't set yfacet and col_wrap at the same time.") if col_wrap and not self.xfacet: raise util.CytoflowViewError("Must set xfacet to use col_wrap.") facets = filter( lambda x: x, [self.variable, self.xfacet, self.yfacet, self.huefacet]) if len(facets) != len(set(facets)): raise util.CytoflowViewError("Can't reuse facets") unused_names = list(set(names) - set(facets)) if plot_name is not None and not unused_names: raise util.CytoflowViewError("You specified a plot name, but all " "the facets are already used") data.reset_index(inplace=True) if unused_names: groupby = data.groupby(unused_names) if plot_name is None: raise util.CytoflowViewError( "You must use facets {} in either the " "plot variables or the plot name. " "Possible plot names: {}".format(unused_names, groupby.groups.keys())) if plot_name not in set(groupby.groups.keys()): raise util.CytoflowViewError( "Plot {} not from plot_enum; must " "be one of {}".format(plot_name, groupby.groups.keys())) data = groupby.get_group(plot_name) sharex = kwargs.pop('sharex', True) sharey = kwargs.pop('sharey', True) cols = col_wrap if col_wrap else \ len(data[self.xfacet].unique()) if self.xfacet else 1 g = sns.FacetGrid(data, size=(6 / cols), aspect=1.5, col=(self.xfacet if self.xfacet else None), row=(self.yfacet if self.yfacet else None), col_order=(np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order=(np.sort(data[self.yfacet].unique()) if self.yfacet else None), col_wrap=col_wrap, legend_out=False, sharex=sharex, sharey=sharey) scale = util.scale_factory(self.scale, experiment, statistic=self.statistic) # because the bottom of a bar chart is "0", masking out bad # values on a log scale doesn't work. we must clip instead. if self.scale == "log": scale.mode = "clip" # set the scale for each set of axes; can't just call plt.xscale() for ax in g.axes.flatten(): if self.orientation == 'horizontal': ax.set_xscale(self.scale, **scale.mpl_params) else: ax.set_yscale(self.scale, **scale.mpl_params) map_args = [self.variable, stat.name] if self.huefacet: map_args.append(self.huefacet) if error_stat is not None: map_args.append(error_name) g.map(_barplot, *map_args, view=self, stat_name=stat.name, error_name=error_name, **kwargs) if sharex: # if are sharing axes make sure the x scale is the same for each fig = plt.gcf() fig_x_min = float("inf") fig_x_max = float("-inf") for ax in fig.get_axes(): ax_x_min, ax_x_max = ax.get_xlim() if ax_x_min < fig_x_min: fig_x_min = ax_x_min if ax_x_max > fig_x_max: fig_x_max = ax_x_max for ax in fig.get_axes(): ax.set_xlim(fig_x_min, fig_x_max) if sharey: # if we are sharing y axes, make sure the y scale is the same for each fig = plt.gcf() fig_y_min = float("inf") fig_y_max = float("-inf") for ax in fig.get_axes(): ax_y_min, ax_y_max = ax.get_ylim() if ax_y_min < fig_y_min: fig_y_min = ax_y_min if ax_y_max > fig_y_max: fig_y_max = ax_y_max for ax in fig.get_axes(): ax.set_ylim(fig_y_min, fig_y_max) if self.huefacet: labels = np.sort(data[self.huefacet].unique()) labels = [str(x) for x in labels] g.add_legend(title=self.huefacet, label_order=labels) if self.orientation == 'horizontal': plt.sca(fig.get_axes()[0]) plt.xlabel(self.statistic) else: plt.sca(fig.get_axes()[0]) plt.ylabel(self.statistic) if unused_names and plot_name is not None: plt.title("{0} = {1}".format(unused_names, plot_name))
class Base1DStatisticsView(BaseStatisticsView): """ The base class for 1-dimensional statistic views -- ie, the :attr:`variable` attribute is on the x axis, and the statistic value is on the y axis. Attributes ---------- statistic : (str, str) The name of the statistic to plot. Must be a key in the :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment` being plotted. error_statistic : (str, str) The name of the statistic used to plot error bars. Must be a key in the :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment` being plotted. """ REMOVED_ERROR = "Statistics changed dramatically in 0.5; please see the documentation" by = util.Removed(err_string = REMOVED_ERROR) yfunction = util.Removed(err_string = REMOVED_ERROR) ychannel = util.Removed(err_string = REMOVED_ERROR) channel = util.Removed(err_string = REMOVED_ERROR) function = util.Removed(err_string = REMOVED_ERROR) error_bars = util.Removed(err_string = REMOVED_ERROR) xvariable = util.Deprecated(new = "variable") statistic = Tuple(Str, Str) error_statistic = Tuple(Str, Str) def enum_plots(self, experiment): data = self._make_data(experiment) return super().enum_plots(experiment, data) def plot(self, experiment, plot_name = None, **kwargs): data = self._make_data(experiment) if not self.variable: raise util.CytoflowViewError('variable', "variable not set") if self.variable not in experiment.conditions: raise util.CytoflowViewError('variable', "variable {0} not in the experiment" .format(self.variable)) if util.is_numeric(experiment[self.variable]): xscale = util.scale_factory(self.xscale, experiment, condition = self.variable) else: xscale = None yscale = util.scale_factory(self.yscale, experiment, statistic = self.statistic, error_statistic = self.error_statistic) super().plot(experiment, data, plot_name, xscale = xscale, yscale = yscale, **kwargs) def _make_data(self, experiment): if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") if not self.statistic: raise util.CytoflowViewError('statistic', "Statistic not set") if self.statistic not in experiment.statistics: raise util.CytoflowViewError('statistic', "Can't find the statistic {} in the experiment" .format(self.statistic)) else: stat = experiment.statistics[self.statistic] if not util.is_numeric(stat): raise util.CytoflowViewError('statistic', "Statistic must be numeric") if self.error_statistic[0]: if self.error_statistic not in experiment.statistics: raise util.CytoflowViewError('error_statistic', "Can't find the error statistic in the experiment") else: error_stat = experiment.statistics[self.error_statistic] else: error_stat = None if error_stat is not None: if not stat.index.equals(error_stat.index): raise util.CytoflowViewError('error_statistic', "Data statistic and error statistic " " don't have the same index.") if stat.name == error_stat.name: raise util.CytoflowViewError('error_statistic', "Data statistic and error statistic can " "not have the same name.") data = pd.DataFrame(index = stat.index) data[stat.name] = stat if error_stat is not None: data[error_stat.name] = error_stat return data
class ColorTranslationOp(HasStrictTraits): """ Translate measurements from one color's scale to another, using a two-color or three-color control. To use, set up the `channels` dict with the desired mapping and the `controls` dict with the multi-color controls. Call `estimate()` to paramterize the module; check that the plots look good with `default_view().plot()`; then `apply()` to an Experiment. Attributes ---------- name : Str The operation name (for UI representation; optional for interactive use) controls : Dict((Str, Str), File) Two-color controls used to determine the mapping. They keys are tuples of *from-channel* and *to-channel*. The values are FCS files containing two-color constitutive fluorescent expression data for the mapping. mixture_model : Bool (default = False) If "True", try to model the "from" channel as a mixture of expressing cells and non-expressing cells (as you would get with a transient transfection.) Make sure you check the diagnostic plots! Metadata -------- channel_translation : Str Which channel was this one translated to? channel_translation_fn : Callable (pandas.Series --> pandas.Series) The function that translated this channel Notes ----- In the TASBE workflow, this operation happens *after* the application of `AutofluorescenceOp` and `BleedthroughPiecewiseOp`. Both must be applied to the single-color controls before the translation coefficients are estimated; the autofluorescence and bleedthrough parameters for each channel are retrieved from the channel metadata and applied in `estimate()`. Examples -------- >>> ct_op = flow.ColorTranslationOp() >>> ct_op.controls = {("Pacific Blue-A", "FITC-A") : "merged/rby.fcs", ... ("PE-Tx-Red-YG-A", "FITC-A") : "merged/rby.fcs"} >>> ct_op.mixture_model = True >>> >>> ct_op.estimate(ex4) >>> ct_op.default_view().plot(ex4) >>> ex5 = ct_op.apply(ex4) """ # traits id = Constant('edu.mit.synbio.cytoflow.operations.color_translation') friendly_id = Constant("Color translation") name = Constant("Color Translation") translation = util.Removed( err_string= "'translation' is removed; the same info is found in 'controls'", warning=True) controls = Dict(Tuple(Str, Str), File) mixture_model = Bool(False) linear_model = Bool(False) # The regression coefficients determined by `estimate()`, used to map # colors between channels. The keys are tuples of (*from-channel*, # *to-channel) (corresponding to key-value pairs in `translation`). The # values are lists of Float, the log-log coefficients for the color # translation (determined by `estimate()`). # TODO - why can't i make the value List(Float)? _coefficients = Dict(Tuple(Str, Str), Any, transient=True) _trans_fn = Dict(Tuple(Str, Str), Callable, transient=True) def estimate(self, experiment, subset=None): """ Estimate the mapping from the two-channel controls """ if experiment is None: raise util.CytoflowOpError("No experiment specified") if not self.controls: raise util.CytoflowOpError("No controls specified") tubes = {} translation = {x[0]: x[1] for x in list(self.controls.keys())} for from_channel, to_channel in translation.items(): if from_channel not in experiment.channels: raise util.CytoflowOpError( "Channel {0} not in the experiment".format(from_channel)) if to_channel not in experiment.channels: raise util.CytoflowOpError( "Channel {0} not in the experiment".format(to_channel)) if (from_channel, to_channel) not in self.controls: raise util.CytoflowOpError("Control file for {0} --> {1} " "not specified".format( from_channel, to_channel)) tube_file = self.controls[(from_channel, to_channel)] if tube_file not in tubes: # make a little Experiment check_tube(tube_file, experiment) tube_exp = ImportOp(tubes=[Tube(file=tube_file)], channels={ experiment.metadata[c]["fcs_name"]: c for c in experiment.channels }, name_metadata=experiment. metadata['name_metadata']).apply() # apply previous operations for op in experiment.history: tube_exp = op.apply(tube_exp) # subset the events if subset: try: tube_exp = tube_exp.query(subset) except Exception as e: raise util.CytoflowOpError( "Subset string '{0}' isn't valid".format( subset)) from e if len(tube_exp.data) == 0: raise util.CytoflowOpError( "Subset string '{0}' returned no events".format( subset)) tube_data = tube_exp.data tubes[tube_file] = tube_data data = tubes[tube_file][[from_channel, to_channel]].copy() data = data[data[from_channel] > 0] data = data[data[to_channel] > 0] _ = data.reset_index(drop=True, inplace=True) data[from_channel] = np.log10(data[from_channel]) data[to_channel] = np.log10(data[to_channel]) if self.mixture_model: gmm = sklearn.mixture.BayesianGaussianMixture(n_components=2) fit = gmm.fit(data) # pick the component with the maximum mean idx = 0 if fit.means_[0][0] > fit.means_[1][0] else 1 weights = [x[idx] for x in fit.predict_proba(data)] else: weights = [1] * len(data.index) # this estimation method yields different results than the TASBE # method. TASBE ..... does something with binned means, or # something ..... I can't read the MATLAB code too well, and I # don't know if the code I have is the same as is running on the # TASBE website ...... anyways. It computes a linear, multiplicative # scaling constant. Ie, OUT = m * IN, where OUT is the color we're # translating TO and IN is the color we're translating FROM. # this code uses a different approach: it uses a log-linear model, # computing the linear Y = a * X + b coefficients on a log-log # plot. this is a more general model of the underlying physical # behavior -- but it may not be more "correct." # which is better? idunno. i'd love to try EQUIP predictions with # both. i'd like to note that i can't reproduce the TASBE method # precisely anyways; if i replace this with a linear model, i get # coefficients that are close to (but not quite the same as) the # TASBE website, and WAY off the color model I have in the same # directory as my test data. lr = np.polyfit(data[from_channel], data[to_channel], deg=1, w=weights) # remember, these (linear) coefficients came from logspace, so # if the relationship in log10 space is Y = aX + b, then in # linear space the relationship is x = 10**X, y = 10**Y, # and y = (10**b) * x ^ a # also remember that the result of np.polyfit is a list of # coefficients with the highest power first! so if we # solve y=ax + b, coeff #0 is a and coeff #1 is b a = lr[0] b = 10**lr[1] trans_fn = lambda x, a=a, b=b: b * np.power(x, a) self._coefficients[(from_channel, to_channel)] = lr self._trans_fn[(from_channel, to_channel)] = trans_fn def apply(self, experiment): """Applies the color translation to an experiment Parameters ---------- experiment : Experiment the old_experiment to which this op is applied Returns ------- a new experiment with the color translation applied. """ if experiment is None: raise util.CytoflowOpError("No experiment specified") if not self.controls: raise util.CytoflowOpError("No controls specified") if not self._trans_fn: raise util.CytoflowOpError("Transfer functions aren't set. " "Did you call estimate()?") translation = {x[0]: x[1] for x in list(self.controls.keys())} from_channels = [x[0] for x in list(self.controls.keys())] for key, val in translation.items(): if (key, val) not in self._coefficients: raise util.CytoflowOpError( "Coefficients aren't set for translation " "{1} --> {2}. Did you call estimate()?".format(key, val)) new_experiment = experiment.clone() for channel in from_channels: new_experiment.data = \ new_experiment.data[new_experiment.data[channel] > 0] for from_channel, to_channel in translation.items(): trans_fn = self._trans_fn[(from_channel, to_channel)] new_experiment[from_channel] = trans_fn(experiment[from_channel]) new_experiment.metadata[from_channel][ 'channel_translation_fn'] = trans_fn new_experiment.metadata[from_channel][ 'channel_translation'] = to_channel new_experiment.history.append( self.clone_traits(transient=lambda _: True)) return new_experiment def default_view(self, **kwargs): """ Returns a diagnostic plot to see if the bleedthrough spline estimation is working. Returns ------- IView : An IView, call plot() to see the diagnostic plots """ return ColorTranslationDiagnostic(op=self, **kwargs)
class Stats1DView(Base1DStatisticsView): """ Plot a statistic. The value of the statistic will be plotted on the Y axis; a numeric conditioning variable must be chosen for the X axis. Every variable in the statistic must be specified as either the `variable` or one of the plot facets. Attributes ---------- Examples -------- .. plot:: :context: close-figs Make a little data set. >>> import cytoflow as flow >>> import_op = flow.ImportOp() >>> import_op.tubes = [flow.Tube(file = "Plate01/RFP_Well_A3.fcs", ... conditions = {'Dox' : 10.0}), ... flow.Tube(file = "Plate01/CFP_Well_A4.fcs", ... conditions = {'Dox' : 1.0})] >>> import_op.conditions = {'Dox' : 'float'} >>> ex = import_op.apply() Create and a new statistic. .. plot:: :context: close-figs >>> ch_op = flow.ChannelStatisticOp(name = 'MeanByDox', ... channel = 'Y2-A', ... function = flow.geom_mean, ... by = ['Dox']) >>> ex2 = ch_op.apply(ex) View the new statistic .. plot:: :context: close-figs >>> flow.Stats1DView(variable = 'Dox', ... statistic = ('MeanByDox', 'geom_mean'), ... xscale = 'log', ... yscale = 'log').plot(ex2) """ # traits id = Constant("edu.mit.synbio.cytoflow.view.stats1d") friendly_id = Constant("1D Statistics View") REMOVED_ERROR = Constant("Statistics changed dramatically in 0.5; please see the documentation") by = util.Removed(err_string = REMOVED_ERROR) yfunction = util.Removed(err_string = REMOVED_ERROR) ychannel = util.Removed(err_string = REMOVED_ERROR) xvariable = util.Deprecated(new = "variable") def enum_plots(self, experiment): """ Returns an iterator over the possible plots that this View can produce. The values returned can be passed to :meth:`plot`. """ return super().enum_plots(experiment) def plot(self, experiment, plot_name = None, **kwargs): """Plot a chart of a variable's values against a statistic. Parameters ---------- color : a matplotlib color The color to plot with. Overridden if `huefacet` is not `None` linestyle : ['solid' | 'dashed', 'dashdot', 'dotted' | (offset, on-off-dash-seq) | '-' | '--' | '-.' | ':' | 'None' | ' ' | ''] marker : a matplotlib marker style See http://matplotlib.org/api/markers_api.html#module-matplotlib.markers markersize : int The marker size in points markerfacecolor : a matplotlib color The color to make the markers. Overridden (?) if `huefacet` is not `None` alpha : the alpha blending value, from 0.0 (transparent) to 1.0 (opaque) Notes ----- Other `kwargs` are passed to `matplotlib.pyplot.plot <https://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.plot.html>`_ """ super().plot(experiment, plot_name, **kwargs) def _grid_plot(self, experiment, grid, xlim, ylim, xscale, yscale, **kwargs): data = grid.data stat = experiment.statistics[self.statistic] stat_name = stat.name if self.error_statistic[0]: err_stat = experiment.statistics[self.error_statistic] err_stat_name = err_stat.name xlim = kwargs.pop("xlim", None) if xlim is None: xlim = (xscale.clip(data[self.variable].min() * 0.9), xscale.clip(data[self.variable].max() * 1.1)) ylim = kwargs.pop("ylim", None) if ylim is None: ylim = (yscale.clip(data[stat_name].min() * 0.9), yscale.clip(data[stat_name].max() * 1.1)) if self.error_statistic[0]: try: ylim = (yscale.clip(min([x[0] for x in data[err_stat_name]]) * 0.9), yscale.clip(max([x[1] for x in data[err_stat_name]]) * 1.1)) except IndexError: ylim = (yscale.clip((data[stat_name].min() - data[err_stat_name].min()) * 0.9), yscale.clip((data[stat_name].max() + data[err_stat_name].max()) * 1.1)) # plot the error bars first so the axis labels don't get overwritten if self.error_statistic[0]: grid.map(_error_bars, self.variable, stat_name, err_stat_name, **kwargs) grid.map(plt.plot, self.variable, stat_name, **kwargs) return {'xlim' : xlim, 'ylim' : ylim}
class Base1DStatisticsView(BaseStatisticsView): REMOVED_ERROR = "Statistics changed dramatically in 0.5; please see the documentation" by = util.Removed(err_string=REMOVED_ERROR) yfunction = util.Removed(err_string=REMOVED_ERROR) ychannel = util.Removed(err_string=REMOVED_ERROR) channel = util.Removed(err_string=REMOVED_ERROR) function = util.Removed(err_string=REMOVED_ERROR) error_bars = util.Removed(err_string=REMOVED_ERROR) xvariable = util.Deprecated(new="variable") statistic = Tuple(Str, Str) error_statistic = Tuple(Str, Str) def enum_plots(self, experiment): data = self._make_data(experiment) return super().enum_plots(experiment, data) def plot(self, experiment, plot_name=None, **kwargs): data = self._make_data(experiment) if util.is_numeric(experiment[self.variable]): xscale = util.scale_factory(self.xscale, experiment, condition=self.variable) else: xscale = None yscale = util.scale_factory(self.yscale, experiment, statistic=self.statistic, error_statistic=self.error_statistic) super().plot(experiment, data, plot_name, xscale=xscale, yscale=yscale, **kwargs) def _make_data(self, experiment): if experiment is None: raise util.CytoflowViewError("No experiment specified") if not self.statistic: raise util.CytoflowViewError("Statistic not set") if self.statistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find the statistic {} in the experiment".format( self.statistic)) else: stat = experiment.statistics[self.statistic] if not util.is_numeric(stat): raise util.CytoflowViewError("Statistic must be numeric") if self.error_statistic[0]: if self.error_statistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find the error statistic in the experiment") else: error_stat = experiment.statistics[self.error_statistic] else: error_stat = None if error_stat is not None: if not stat.index.equals(error_stat.index): raise util.CytoflowViewError( "Data statistic and error statistic " " don't have the same index.") if stat.name == error_stat.name: raise util.CytoflowViewError( "Data statistic and error statistic can " "not have the same name.") data = pd.DataFrame(index=stat.index) data[stat.name] = stat if error_stat is not None: data[error_stat.name] = error_stat return data
class BarChartView(Base1DStatisticsView): """ Plots a bar chart of some summary statistic Attributes ---------- Examples -------- Make a little data set. .. plot:: :context: close-figs >>> import cytoflow as flow >>> import_op = flow.ImportOp() >>> import_op.tubes = [flow.Tube(file = "Plate01/RFP_Well_A3.fcs", ... conditions = {'Dox' : 10.0}), ... flow.Tube(file = "Plate01/CFP_Well_A4.fcs", ... conditions = {'Dox' : 1.0})] >>> import_op.conditions = {'Dox' : 'float'} >>> ex = import_op.apply() Add a threshold gate .. plot:: :context: close-figs >>> ex2 = flow.ThresholdOp(name = 'Threshold', ... channel = 'Y2-A', ... threshold = 2000).apply(ex) Add a statistic .. plot:: :context: close-figs >>> ex3 = flow.ChannelStatisticOp(name = "ByDox", ... channel = "Y2-A", ... by = ['Dox', 'Threshold'], ... function = len).apply(ex2) Plot the bar chart .. plot:: :context: close-figs >>> flow.BarChartView(statistic = ("ByDox", "len"), ... variable = "Dox", ... huefacet = "Threshold").plot(ex3) """ # traits id = Constant("edu.mit.synbio.cytoflow.view.barchart") friendly_id = Constant("Bar Chart") orientation = util.Removed(err_string = "`orientation` is now a parameter to `plot`") def enum_plots(self, experiment): """ Returns an iterator over the possible plots that this View can produce. The values returned can be passed to "plot". """ return super().enum_plots(experiment) def plot(self, experiment, plot_name = None, **kwargs): """ Plot a bar chart Parameters ---------- color : a matplotlib color Sets the colors of all the bars, even if there is a hue facet errwidth : scalar The width of the error bars, in points errcolor : a matplotlib color The color of the error bars capsize : scalar The size of the error bar caps, in points Notes ----- Other ``kwargs`` are passed to `matplotlib.axes.Axes.bar <https://matplotlib.org/devdocs/api/_as_gen/matplotlib.axes.Axes.bar.html>`_ """ super().plot(experiment, plot_name, **kwargs) def _grid_plot(self, experiment, grid, **kwargs): # because the bottom of a bar chart is "0", masking out bad # values on a log scale doesn't work. we must clip instead. orientation = kwargs.pop('orientation', 'vertical') # statistic scale scale = kwargs.pop('scale') if scale.name == "log": scale.mode = "clip" # limits lim = kwargs.pop('lim', None) # # set the scale for each set of axes; can't just call plt.xscale() # for ax in grid.axes.flatten(): # if orient == 'horizontal': # ax.set_xscale(yscale.name, **yscale.mpl_params) # elif orient == 'vertical': # ax.set_yscale(yscale.name, **yscale.mpl_params) # else: # raise util.CytoflowViewError('orient', "'orient' param must be 'horizontal' or 'vertical'") # stat = experiment.statistics[self.statistic] map_args = [self.variable, stat.name] if self.huefacet: map_args.append(self.huefacet) if self.error_statistic[0]: error_stat = experiment.statistics[self.error_statistic] map_args.append(error_stat.name) else: error_stat = None grid.map(_barplot, *map_args, view = self, stat_name = stat.name, error_name = error_stat.name if error_stat is not None else None, orientation = orientation, grid = grid, **kwargs) if orientation == 'horizontal': return dict(xscale = scale, xlim = lim) else: return dict(yscale = scale, ylim = lim)
class Stats1DView(HasStrictTraits): """ Plot a statistic. The value of the statistic will be plotted on the Y axis; a numeric conditioning variable must be chosen for the X axis. Every variable in the statistic must be specified as either the `variable` or one of the plot facets. Attributes ---------- name : Str The plot's name statistic : Tuple(Str, Str) The statistic to plot. The first element is the name of the module that added the statistic, and the second element is the name of the statistic. variable : Str the name of the conditioning variable to put on the X axis. Must be numeric (float or int). xscale : Enum("linear", "log") (default = "linear") The scale to use on the X axis yscale : Enum("linear", "log", "logicle") (default = "linear") The scale to use on the Y axis xfacet : Str the conditioning variable for horizontal subplots yfacet : Str the conditioning variable for vertical subplots huefacet : the conditioning variable for color. huescale : the scale to use on the "hue" axis, if there are many values of the hue facet. error_statistic : Tuple(Str, Str) A statistic to use to draw error bars; the bars are +- the value of the statistic. subset : String Passed to pandas.DataFrame.query(), to get a subset of the statistic before we plot it. Examples -------- Assume we want a Dox induction curve in a transient transfection experiment. We have induced several wells with different amounts of Dox and the output of the Dox-inducible channel is "Pacific Blue-A". We have a constitutive expression channel in "PE-Tx-Red-YG-A". We want to bin all the data by constitutive expression level, then plot the dose-response (geometric mean) curve in each bin. >>> ex_bin = flow.BinningOp(name = "CFP_Bin", ... channel = "PE-Tx-Red-YG-A", ... scale = "log", ... bin_width = 0.1).apply(ex) >>> ex_stat = flow.ChannelStatisticOp(name = "DoxCFP", ... by = ["Dox", "CFP_Bin"], ... channel = "Pacific Blue-A", ... function = flow.geom_mean).apply(ex_bin) >>> view = flow.Stats1DView(name = "Dox vs IFP", ... statistic = ("DoxCFP", "geom_mean"), ... variable = "Dox", ... xscale = "log", ... huefacet = "CFP_Bin").plot(ex_stat) >>> view.plot(ex_stat) """ # traits id = "edu.mit.synbio.cytoflow.view.stats1d" friendly_id = "1D Statistics View" REMOVED_ERROR = "Statistics have changed dramatically in 0.5; please see the documentation" by = util.Removed(err_string = REMOVED_ERROR) yfunction = util.Removed(err_string = REMOVED_ERROR) ychannel = util.Removed(err_string = REMOVED_ERROR) xvariable = util.Deprecated(new = "variable") name = Str statistic = Tuple(Str, Str) variable = Str xscale = util.ScaleEnum yscale = util.ScaleEnum xfacet = Str yfacet = Str huefacet = Str huescale = util.ScaleEnum # TODO - make this actually work error_statistic = Tuple(Str, Str) subset = Str def enum_plots(self, experiment): """ Returns an iterator over the possible plots that this View can produce. The values returned can be passed to "plot". """ # TODO - all this is copied from below. can we abstract it out somehow? if not experiment: raise util.CytoflowViewError("No experiment specified") if self.statistic not in experiment.statistics: raise util.CytoflowViewError("Can't find the statistic {} in the experiment" .format(self.statistic)) else: stat = experiment.statistics[self.statistic] if self.error_statistic[0]: if self.error_statistic not in experiment.statistics: raise util.CytoflowViewError("Can't find the error statistic in the experiment") else: error_stat = experiment.statistics[self.error_statistic] else: error_stat = None if error_stat is not None: if not stat.index.equals(error_stat.index): raise util.CytoflowViewError("Data statistic and error statistic " " don't have the same index.") data = pd.DataFrame(index = stat.index) data[stat.name] = stat if error_stat is not None: error_name = util.random_string(6) data[error_name] = error_stat else: error_name = None if self.subset: try: data = data.query(self.subset) except: raise util.CytoflowViewError("Subset string '{0}' isn't valid" .format(self.subset)) if len(data) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no values" .format(self.subset)) names = list(data.index.names) for name in names: unique_values = data.index.get_level_values(name).unique() if len(unique_values) == 1: warn("Only one value for level {}; dropping it.".format(name), util.CytoflowViewWarning) try: data.index = data.index.droplevel(name) except AttributeError: raise util.CytoflowViewError("Must have more than one " "value to plot.") names = list(data.index.names) if not self.variable: raise util.CytoflowViewError("variable not specified") if not self.variable in data.index.names: raise util.CytoflowViewError("Variable {} isn't in the statistic; " "must be one of {}" .format(self.variable, data.index.names)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} isn't in the experiment" .format(self.xfacet)) if self.xfacet and self.xfacet not in data.index.names: raise util.CytoflowViewError("X facet {} is not a statistic index; " "must be one of {}".format(self.xfacet, data.index.names)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} isn't in the experiment" .format(self.yfacet)) if self.yfacet and self.yfacet not in data.index.names: raise util.CytoflowViewError("Y facet {} is not a statistic index; " "must be one of {}".format(self.yfacet, data.index.names)) if self.huefacet and self.huefacet not in experiment.conditions: raise util.CytoflowViewError("Hue facet {0} isn't in the experiment" .format(self.huefacet)) if self.huefacet and self.huefacet not in data.index.names: raise util.CytoflowViewError("Hue facet {} is not a statistic index; " "must be one of {}".format(self.huefacet, data.index.names)) facets = filter(lambda x: x, [self.variable, self.xfacet, self.yfacet, self.huefacet]) if len(facets) != len(set(facets)): raise util.CytoflowViewError("Can't reuse facets") by = list(set(names) - set(facets)) class plot_enum(object): def __init__(self, experiment, by): self._iter = None self._returned = False if by: self._iter = experiment.data.groupby(by).__iter__() def __iter__(self): return self def next(self): if self._iter: return self._iter.next()[0] else: if self._returned: raise StopIteration else: self._returned = True return None return plot_enum(experiment, by) def plot(self, experiment, plot_name = None, **kwargs): """Plot a chart""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.statistic: raise util.CytoflowViewError("Statistic not set") if self.statistic not in experiment.statistics: raise util.CytoflowViewError("Can't find the statistic {} in the experiment" .format(self.statistic)) else: stat = experiment.statistics[self.statistic] if self.error_statistic[0]: if self.error_statistic not in experiment.statistics: raise util.CytoflowViewError("Can't find the error statistic in the experiment") else: error_stat = experiment.statistics[self.error_statistic] else: error_stat = None if error_stat is not None: if not stat.index.equals(error_stat.index): raise util.CytoflowViewError("Data statistic and error statistic " " don't have the same index.") data = pd.DataFrame(index = stat.index) data[stat.name] = stat if error_stat is not None: error_name = util.random_string(6) data[error_name] = error_stat if self.subset: try: # TODO - either sanitize column names, or check to see that # all conditions are valid Python variables data = data.query(self.subset) except: raise util.CytoflowViewError("Subset string '{0}' isn't valid" .format(self.subset)) if len(data) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no values" .format(self.subset)) names = list(data.index.names) for name in names: unique_values = data.index.get_level_values(name).unique() if len(unique_values) == 1: warn("Only one value for level {}; dropping it.".format(name), util.CytoflowViewWarning) try: data.index = data.index.droplevel(name) except AttributeError: raise util.CytoflowViewError("Must have more than one " "value to plot.") names = list(data.index.names) if not self.variable: raise util.CytoflowViewError("X variable not set") if self.variable not in experiment.conditions: raise util.CytoflowViewError("X variable {0} not in the experiment" .format(self.variable)) if self.variable not in names: raise util.CytoflowViewError("X variable {} is not a statistic index; " "must be one of {}".format(self.variable, names)) if experiment.conditions[self.variable].dtype.kind not in "biufc": raise util.CytoflowViewError("X variable {0} isn't numeric" .format(self.variable)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment") if self.xfacet and self.xfacet not in names: raise util.CytoflowViewError("X facet {} is not a statistic index; " "must be one of {}".format(self.xfacet, names)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment") if self.yfacet and self.yfacet not in names: raise util.CytoflowViewError("Y facet {} is not a statistic index; " "must be one of {}".format(self.yfacet, names)) if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError("Hue facet {0} not in the experiment") if self.huefacet and self.huefacet not in names: raise util.CytoflowViewError("Hue facet {} is not a statistic index; " "must be one of {}".format(self.huefacet, names)) col_wrap = kwargs.pop('col_wrap', None) if col_wrap and self.yfacet: raise util.CytoflowViewError("Can't set yfacet and col_wrap at the same time.") if col_wrap and not self.xfacet: raise util.CytoflowViewError("Must set xfacet to use col_wrap.") facets = filter(lambda x: x, [self.variable, self.xfacet, self.yfacet, self.huefacet]) if len(facets) != len(set(facets)): raise util.CytoflowViewError("Can't reuse facets") unused_names = list(set(names) - set(facets)) if unused_names and plot_name is None: for plot in self.enum_plots(experiment): self.plot(experiment, plot, **kwargs) return data.reset_index(inplace = True) if plot_name is not None: if plot_name is not None and not unused_names: raise util.CytoflowViewError("Plot {} not from plot_enum" .format(plot_name)) groupby = data.groupby(unused_names) if plot_name not in set(groupby.groups.keys()): raise util.CytoflowViewError("Plot {} not from plot_enum" .format(plot_name)) data = groupby.get_group(plot_name) data.reset_index(drop = True, inplace = True) xscale = util.scale_factory(self.xscale, experiment, condition = self.variable) if error_stat is not None: yscale = util.scale_factory(self.yscale, experiment, statistic = self.error_statistic) else: yscale = util.scale_factory(self.yscale, experiment, statistic = self.statistic) xlim = kwargs.pop("xlim", None) if xlim is None: xlim = (xscale.clip(data[self.variable].min() * 0.9), xscale.clip(data[self.variable].max() * 1.1)) ylim = kwargs.pop("ylim", None) if ylim is None: ylim = (yscale.clip(data[stat.name].min() * 0.9), yscale.clip(data[stat.name].max() * 1.1)) if error_stat is not None: try: ylim = (yscale.clip(min([x[0] for x in error_stat]) * 0.9), yscale.clip(max([x[1] for x in error_stat]) * 1.1)) except IndexError: ylim = (yscale.clip(error_stat.min() * 0.9), yscale.clip(error_stat.max() * 1.1)) kwargs.setdefault('antialiased', True) cols = col_wrap if col_wrap else \ len(data[self.xfacet].unique()) if self.xfacet else 1 sharex = kwargs.pop('sharex', True) sharey = kwargs.pop('sharey', True) grid = sns.FacetGrid(data, size = (6 / cols), aspect = 1.5, col = (self.xfacet if self.xfacet else None), row = (self.yfacet if self.yfacet else None), hue = (self.huefacet if self.huefacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), col_wrap = col_wrap, legend_out = False, sharex = sharex, sharey = sharey, xlim = xlim, ylim = ylim) for ax in grid.axes.flatten(): ax.set_xscale(self.xscale, **xscale.mpl_params) ax.set_yscale(self.yscale, **yscale.mpl_params) # plot the error bars first so the axis labels don't get overwritten if error_stat is not None: grid.map(_error_bars, self.variable, stat.name, error_name, **kwargs) grid.map(plt.plot, self.variable, stat.name, **kwargs) # if we are sharing y axes, make sure the y scale is the same for each if sharey: fig = plt.gcf() fig_y_min = float("inf") fig_y_max = float("-inf") for ax in fig.get_axes(): ax_y_min, ax_y_max = ax.get_ylim() if ax_y_min < fig_y_min: fig_y_min = ax_y_min if ax_y_max > fig_y_max: fig_y_max = ax_y_max for ax in fig.get_axes(): ax.set_ylim(fig_y_min, fig_y_max) # if we are sharing x axes, make sure the x scale is the same for each if sharex: fig = plt.gcf() fig_x_min = float("inf") fig_x_max = float("-inf") for ax in fig.get_axes(): ax_x_min, ax_x_max = ax.get_xlim() if ax_x_min < fig_x_min: fig_x_min = ax_x_min if ax_x_max > fig_x_max: fig_x_max = ax_x_max for ax in fig.get_axes(): ax.set_xlim(fig_x_min, fig_x_max) # if we have a hue facet and a lot of hues, make a color bar instead # of a super-long legend. if self.huefacet: current_palette = mpl.rcParams['axes.color_cycle'] if util.is_numeric(experiment.data[self.huefacet]) and \ len(grid.hue_names) > len(current_palette): plot_ax = plt.gca() cmap = mpl.colors.ListedColormap(sns.color_palette("husl", n_colors = len(grid.hue_names))) cax, kw = mpl.colorbar.make_axes(plt.gca()) norm = mpl.colors.Normalize(vmin = np.min(grid.hue_names), vmax = np.max(grid.hue_names), clip = False) mpl.colorbar.ColorbarBase(cax, cmap = cmap, norm = norm, label = self.huefacet, **kw) plt.sca(plot_ax) else: grid.add_legend(title = self.huefacet) if unused_names and plot_name: plt.title("{0} = {1}".format(unused_names, plot_name)) plt.ylabel(self.statistic)