def apply(self, experiment): if not self.beads_name: raise util.CytoflowOpError( "Specify which beads to calibrate with.") for i, unit_i in enumerate(self.units_list): for j, unit_j in enumerate(self.units_list): if unit_i.channel == unit_j.channel and i != j: raise util.CytoflowOpError( "Channel {0} is included more than once".format( unit_i.channel)) self.units = {} for unit in self.units_list: self.units[unit.channel] = unit.unit self.beads = self.BEADS[self.beads_name] return BeadCalibrationOp.apply(self, experiment)
def check_tube(filename, experiment): if experiment is None: raise util.CytoflowError("No experiment specified") ignore_v = experiment.metadata['ignore_v'] try: tube_meta = fcsparser.parse( filename, channel_naming=experiment.metadata["name_metadata"], meta_data_only=True, reformat_meta=True) except Exception as e: raise util.CytoflowOpError( "FCS reader threw an error reading metadata " "for tube {0}".format(filename)) from e # first make sure the tube has the right channels if not set( [experiment.metadata[c]["fcs_name"] for c in experiment.channels]) <= set(tube_meta["_channel_names_"]): raise util.CytoflowOpError( "Tube {0} doesn't have the same channels".format(filename)) tube_channels = tube_meta["_channels_"] tube_channels.set_index(experiment.metadata["name_metadata"], inplace=True) # next check the per-channel parameters for channel in experiment.channels: fcs_name = experiment.metadata[channel]["fcs_name"] # first check voltage if "voltage" in experiment.metadata[channel]: if not "$PnV" in tube_channels.loc[fcs_name]: raise util.CytoflowOpError("Didn't find a voltage for channel {0}" \ "in tube {1}".format(channel, filename)) old_v = experiment.metadata[channel]["voltage"] new_v = tube_channels.loc[fcs_name]['$PnV'] if old_v != new_v and not channel in ignore_v: raise util.CytoflowOpError( "Tube {0} doesn't have the same voltages".format(filename))
def apply(self, experiment): """Applies the threshold to an experiment. Parameters ---------- experiment : Experiment the old_experiment to which this op is applied Returns ------- a new experiment, the same as old_experiment but with a new column the same as the operation name. The bool is True if the event's measurement in self.channel is greater than self.low and less than self.high; it is False otherwise. """ if experiment is None: raise util.CytoflowOpError("No experiment specified") # make sure name got set! if not self.name: raise util.CytoflowOpError("You have to set the gate's name " "before applying it!") if self.name in experiment.data.columns: raise util.CytoflowOpError( "Experiment already has a column named {0}".format(self.name)) if not self.channel: raise util.CytoflowOpError("Channel not specified") if not self.channel in experiment.channels: raise util.CytoflowOpError( "Channel {0} not in the experiment".format(self.channel)) if self.high <= self.low: raise util.CytoflowOpError("range high must be > range low") if self.high <= experiment[self.channel].min(): raise util.CytoflowOpError("range high must be > {0}".format( experiment[self.channel].min())) if self.low >= experiment[self.channel].max(): raise util.CytoflowOpError("range low must be < {0}".format( experiment[self.channel].max())) gate = experiment[self.channel].between(self.low, self.high) new_experiment = experiment.clone() new_experiment.add_condition(self.name, "bool", gate) new_experiment.history.append( self.clone_traits(transient=lambda _: True)) return new_experiment
def apply(self, experiment): if experiment is None: raise util.CytoflowOpError("No experiment was specified") experiment = self._af_op.apply(experiment) experiment = self._bleedthrough_op.apply(experiment) experiment = self._bead_calibration_op.apply(experiment) experiment = self._color_translation_op.apply(experiment) return experiment
def default_view(self, **kwargs): """ Returns a diagnostic plot to see if the bleedthrough spline estimation is working. Returns ------- IView : An IView, call plot() to see the diagnostic plots """ if not self.ignore_deprecated: raise util.CytoflowOpError( "BleedthroughPiecewiseOp is DEPRECATED. " "To use it anyway, set ignore_deprected " "to True.") if set(self.controls.keys()) != set(self._splines.keys()): raise util.CytoflowOpError( "Must have both the controls and bleedthrough to plot") return BleedthroughPiecewiseDiagnostic(op=self, **kwargs)
def apply(self, experiment): """Applies the threshold to an experiment. Parameters ---------- old_experiment : Experiment the experiment to which this op is applied Returns ------- a new experiment, the same as old_experiment but with a new column the same as the operation name. The bool is True if the event's measurement in self.channel is greater than self.threshold; it is False otherwise. """ if not experiment: raise util.CytoflowOpError("No experiment specified") # make sure name got set! if not self.name: raise util.CytoflowOpError("You have to set the gate's name " "before applying it!") # make sure old_experiment doesn't already have a column named self.name if (self.name in experiment.data.columns): raise util.CytoflowOpError( "Experiment already contains a column {0}".format(self.name)) if self.channel not in experiment.channels: raise util.CytoflowOpError( "{0} isn't a channel in the experiment".format(self.channel)) gate = pd.Series(experiment[self.channel] > self.threshold) new_experiment = experiment.clone() new_experiment.add_condition(self.name, "bool", gate) new_experiment.history.append( self.clone_traits(transient=lambda t: True)) return new_experiment
def parse_tube(filename, experiment): check_tube(filename, experiment) try: _, tube_data = fcsparser.parse( filename, channel_naming=experiment.metadata["name_metadata"]) except Exception as e: raise util.CytoflowOpError( "FCS reader threw an error reading data for tube {}".format( filename)) from e return tube_data
def parse_tube(filename, experiment, ignore_v = False): check_tube(filename, experiment, ignore_v) try: _, tube_data = fcsparser.parse( filename, channel_naming = experiment.metadata["name_metadata"]) except Exception as e: raise util.CytoflowOpError("FCS reader threw an error reading data for tube " "{0}: {1}".format(filename, str(e))) return tube_data
def _univariate_kdeplot(data, scale=None, shade=False, kernel="gaussian", bw="scott", gridsize=100, cut=3, clip=None, legend=True, ax=None, orientation = "vertical", **kwargs): if ax is None: ax = plt.gca() if clip is None: clip = (-np.inf, np.inf) scaled_data = scale(data) # mask out the data that's not in the scale domain scaled_data = scaled_data[~np.isnan(scaled_data)] if kernel not in ['gaussian','tophat','epanechnikov','exponential','linear','cosine']: raise util.CytoflowOpError(None, "kernel must be one of ['gaussian'|'tophat'|'epanechnikov'|'exponential'|'linear'|'cosine']") if bw == 'scott': bw = bw_scott(scaled_data) elif bw == 'silverman': bw = bw_silverman(scaled_data) elif not isinstance(bw, float): raise util.CytoflowViewError(None, "Bandwith must be 'scott', 'silverman' or a float") support = _kde_support(scaled_data, bw, gridsize, cut, clip)[:, np.newaxis] kde = KernelDensity(kernel = kernel, bandwidth = bw).fit(scaled_data[:, np.newaxis]) log_density = kde.score_samples(support) x = scale.inverse(support[:, 0]) y = np.exp(log_density) # Check if a label was specified in the call label = kwargs.pop("label", None) color = kwargs.pop("color", None) alpha = kwargs.pop("alpha", 0.25) # Draw the KDE plot and, optionally, shade if orientation == "vertical": ax.plot(x, y, color=color, label=label, **kwargs) if shade: ax.fill_between(x, 1e-12, y, facecolor=color, alpha=alpha) else: ax.plot(y, x, color=color, label=label, **kwargs) if shade: ax.fill_between(y, 1e-12, x, facecolor=color, alpha=alpha) return ax
def enum_plots(self, experiment): """ Returns an iterator over the possible plots that this View can produce. The values returned can be passed to "plot". """ if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError( "X facet {} not in the experiment".format(self.xfacet)) if self.xfacet and self.xfacet not in self.op.by: raise util.CytoflowViewError( "X facet {} must be in GaussianMixture1DOp.by, which is {}". format(self.xfacet, self.op.by)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError( "Y facet {0} not in the experiment".format(self.yfacet)) if self.yfacet and self.yfacet not in self.op.by: raise util.CytoflowViewError( "Y facet {} must be in GaussianMixture1DOp.by, which is {}". format(self.yfacet, self.op.by)) for b in self.op.by: if b not in experiment.data: raise util.CytoflowOpError("Aggregation metadata {0} not found" " in the experiment".format(b)) class plot_enum(object): def __init__(self, view, experiment): self._iter = None self._returned = False if view._by: self._iter = experiment.data.groupby(view._by).__iter__() def __iter__(self): return self def next(self): if self._iter: return self._iter.next()[0] else: if self._returned: raise StopIteration else: self._returned = True return None return plot_enum(self, experiment)
def estimate(self, experiment, subset=None): if not self.subset: warnings.warn( "Are you sure you don't want to specify a subset " "used to estimate the model?", util.CytoflowOpWarning) if experiment is None: raise util.CytoflowOpError("No valid result to estimate with") experiment = experiment.clone() self._af_op.channels = self.channels self._af_op.blank_file = self.blank_file self._af_op.estimate(experiment, subset=self.subset) self.changed = "estimate_result" experiment = self._af_op.apply(experiment) self._bleedthrough_op.controls.clear() for control in self.bleedthrough_list: self._bleedthrough_op.controls[control.channel] = control.file self._bleedthrough_op.estimate(experiment, subset=self.subset) self.changed = "estimate_result" experiment = self._bleedthrough_op.apply(experiment) self._bead_calibration_op.beads = BeadCalibrationOp.BEADS[ self.beads_name] self._bead_calibration_op.beads_file = self.beads_file self._bead_calibration_op.bead_peak_quantile = self.bead_peak_quantile self._bead_calibration_op.bead_brightness_threshold = self.bead_brightness_threshold self._bead_calibration_op.bead_brightness_cutoff = self.bead_brightness_cutoff self._bead_calibration_op.units.clear() for channel in self.channels: self._bead_calibration_op.units[channel] = self.beads_unit self._bead_calibration_op.estimate(experiment) self.changed = "estimate_result" experiment = self._bead_calibration_op.apply(experiment) self._color_translation_op.mixture_model = self.mixture_model self._color_translation_op.controls.clear() for control in self.translation_list: self._color_translation_op.controls[( control.from_channel, control.to_channel)] = control.file self._color_translation_op.estimate(experiment, subset=self.subset) self.changed = "estimate_result"
def apply(self, experiment): """ Applies the autofluorescence correction to channels in an experiment. Parameters ---------- experiment : Experiment the experiment to which this op is applied Returns ------- Experiment a new experiment with the autofluorescence median subtracted. The corrected channels have the following metadata added to them: - **af_median** : Float The median of the non-fluorescent distribution - **af_stdev** : Float The standard deviation of the non-fluorescent distribution """ if experiment is None: raise util.CytoflowOpError('experiment', "No experiment specified") if not self.channels: raise util.CytoflowOpError('channels', "No channels specified") if not self._af_median: raise util.CytoflowOpError(None, "Autofluorescence values aren't set. Did " "you forget to run estimate()?") if not set(self._af_median.keys()) <= set(experiment.channels) or \ not set(self._af_stdev.keys()) <= set(experiment.channels): raise util.CytoflowOpError(None, "Autofluorescence estimates aren't set, or are " "different than those in the experiment " "parameter. Did you forget to run estimate()?") if not set(self._af_median.keys()) == set(self._af_stdev.keys()): raise util.CytoflowOpError(None, "Median and stdev keys are different! " "What the hell happened?!") if not set(self.channels) == set(self._af_median.keys()): raise util.CytoflowOpError('channels', "Estimated channels differ from the channels " "parameter. Did you forget to (re)run estimate()?") new_experiment = experiment.clone() for channel in self.channels: new_experiment[channel] = \ experiment[channel] - self._af_median[channel] new_experiment.metadata[channel]['af_median'] = self._af_median[channel] new_experiment.metadata[channel]['af_stdev'] = self._af_stdev[channel] new_experiment.history.append(self.clone_traits(transient = lambda t: True)) return new_experiment
def estimate(self, experiment): if not self.beads_name: raise util.CytoflowOpError( "Specify which beads to calibrate with.") for i, unit_i in enumerate(self.units_list): for j, unit_j in enumerate(self.units_list): if unit_i.channel == unit_j.channel and i != j: raise util.CytoflowOpError( "Channel {0} is included more than once".format( unit_i.channel)) self.units = {} for unit in self.units_list: self.units[unit.channel] = unit.unit self.beads = self.BEADS[self.beads_name] try: BeadCalibrationOp.estimate(self, experiment) except: raise finally: self.changed = (Changed.ESTIMATE_RESULT, self)
def apply(self, experiment): for i, channel_i in enumerate(self.channels_list): for j, channel_j in enumerate(self.channels_list): if channel_i.channel == channel_j.channel and i != j: raise util.CytoflowOpError("Channel {0} is included more than once" .format(channel_i.channel)) self.channels = [] self.scale = {} for channel in self.channels_list: self.channels.append(channel.channel) self.scale[channel.channel] = channel.scale return super().apply(experiment)
def estimate(self, experiment): for i, channel_i in enumerate(self.channels_list): for j, channel_j in enumerate(self.channels_list): if channel_i.channel == channel_j.channel and i != j: raise util.CytoflowOpError("Channel {0} is included more than once" .format(channel_i.channel)) self.channels = [] self.scale = {} for channel in self.channels_list: self.channels.append(channel.channel) self.scale[channel.channel] = channel.scale super().estimate(experiment, subset = self.subset) self.changed = (Changed.ESTIMATE_RESULT, self)
def default_view(self, **kwargs): """ Returns a diagnostic plot to see if the bleedthrough spline estimation is working. Returns ------- IView : An IView, call plot() to see the diagnostic plots """ if set(self.controls.keys()) != set(self._splines.keys()): raise util.CytoflowOpError( "Must have both the controls and bleedthrough to plot") return BleedthroughPiecewiseDiagnostic(op=self, **kwargs)
def apply(self, experiment): if not self.statistic_name: raise util.CytoflowOpError("Transform function not set") self.function = transform_functions[self.statistic_name] ret = TransformStatisticOp.apply(self, experiment) stat = ret.statistics[(self.name, self.statistic_name)] if Undefined in stat: warn("One of the transformed values was Undefined. " "Subsequent operations may fail. " "Please report this as a bug! ") return ret
def default_view(self, **kwargs): """ Returns a diagnostic plot to make sure spillover estimation is working. Returns ------- IView : An IView, call plot() to see the diagnostic plots """ # the completely arbitrary ordering of the channels channels = list(set([x for (x, _) in list(self.spillover.keys())])) if set(self.controls.keys()) != set(channels): raise util.CytoflowOpError("Must have both the controls and bleedthrough to plot") return BleedthroughLinearDiagnostic(op = self, **kwargs)
def apply(self, experiment): """Applies the threshold to an experiment. Parameters ---------- experiment : Experiment the experiment to which this operation is applied Returns ------- Experiment a new :class:`~experiment`, the same as the old experiment but with a new column of type ``bool`` with the same name as the operation :attr:`name`. The new condition is ``True`` if the event's measurement in :attr:`channel` is greater than :attr:`threshold`; it is ``False`` otherwise. """ if experiment is None: raise util.CytoflowOpError('experiment', "No experiment specified") # make sure name got set! if not self.name: raise util.CytoflowOpError( 'name', "You have to set the gate's name " "before applying it!") if self.name != util.sanitize_identifier(self.name): raise util.CytoflowOpError( 'name', "Name can only contain letters, numbers and underscores.". format(self.name)) # make sure old_experiment doesn't already have a column named self.name if (self.name in experiment.data.columns): raise util.CytoflowOpError( 'name', "Experiment already contains a column {0}".format(self.name)) if self.channel not in experiment.channels: raise util.CytoflowOpError( 'channel', "{0} isn't a channel in the experiment".format(self.channel)) if self.threshold is None: raise util.CytoflowOpError('threshold', "must set 'threshold'") gate = pd.Series(experiment[self.channel] > self.threshold) new_experiment = experiment.clone() new_experiment.add_condition(self.name, "bool", gate) new_experiment.history.append( self.clone_traits(transient=lambda t: True)) return new_experiment
def estimate(self, experiment): if not self.subset: warnings.warn( "Are you sure you don't want to specify a subset " "used to estimate the model?", util.CytoflowOpWarning) # check for experiment metadata used to estimate operations in the # history, and bail if we find any for op in experiment.history: if hasattr(op, 'by'): for by in op.by: if 'experiment' in experiment.metadata[by]: raise util.CytoflowOpError( 'experiment', "Prior to applying this operation, " "you must not apply any operation with 'by' " "set to an experimental condition.") super().estimate(experiment, subset=self.subset)
def apply(self, experiment): if self.blank_file != self._blank_exp_file: self._blank_exp = ImportOp(tubes=[Tube( file=self.blank_file)]).apply() self._blank_exp_file = self.blank_file self._blank_exp_channels = self._blank_exp.channels self.changed = (Changed.PREV_RESULT, None) return out_dir = Path(self.output_directory) for path in self.input_files: in_file_path = Path(path) out_file_path = out_dir / in_file_path.name if out_file_path.exists(): raise util.CytoflowOpError( None, "File {} already exists".format(out_file_path)) tubes = [ Tube(file=path, conditions={'filename': Path(path).stem}) for path in self.input_files ] for tube in tubes: self.status = "Converting " + Path(tube.file).stem experiment = ImportOp(tubes=[tube], conditions={ 'filename': 'category' }).apply() experiment = self._af_op.apply(experiment) experiment = self._bleedthrough_op.apply(experiment) experiment = self._bead_calibration_op.apply(experiment) if self.do_color_translation: experiment = self._color_translation_op.apply(experiment) ExportFCS(path=self.output_directory, by=['filename'], _include_by=False).export(experiment) self.input_files = [] self.status = "Done converting!"
def estimate(self, experiment): for i, control_i in enumerate(self.controls_list): for j, control_j in enumerate(self.controls_list): if control_i.channel == control_j.channel and i != j: raise util.CytoflowOpError("Channel {0} is included more than once" .format(control_i.channel)) self.controls = {} for control in self.controls_list: self.controls[control.channel] = control.file if not self.subset: warnings.warn("Are you sure you don't want to specify a subset " "used to estimate the model?", util.CytoflowOpWarning) BleedthroughPiecewiseOp.estimate(self, experiment, subset = self.subset) self.changed = "estimate_result"
def apply(self, experiment): """Applies the threshold to an experiment. Parameters ---------- experiment : Experiment the old_experiment to which this op is applied Returns ------- a new experiment with the autofluorescence median subtracted from the values in self.blank_file """ if not experiment: raise util.CytoflowOpError("No experiment specified") if not self.channels: raise util.CytoflowOpError("No channels specified") if not self._af_median: raise util.CytoflowOpError( "Autofluorescence values aren't set. Did " "you forget to run estimate()?") if not set(self._af_median.keys()) <= set(experiment.channels) or \ not set(self._af_stdev.keys()) <= set(experiment.channels): raise util.CytoflowOpError( "Autofluorescence estimates aren't set, or are " "different than those in the experiment " "parameter. Did you forget to run estimate()?") if not set(self._af_median.keys()) == set(self._af_stdev.keys()): raise util.CytoflowOpError("Median and stdev keys are different! " "What the hell happened?!") if not set(self.channels) == set(self._af_median.keys()): raise util.CytoflowOpError( "Estimated channels differ from the channels " "parameter. Did you forget to (re)run estimate()?") new_experiment = experiment.clone() for channel in self.channels: new_experiment[channel] = \ experiment[channel] - self._af_median[channel] new_experiment.history.append( self.clone_traits(transient=lambda t: True)) return new_experiment
def plot(self, experiment=None, **kwargs): """Plot a faceted histogram view of a channel""" if not experiment: raise util.CytoflowOpError("No experiment specified") kwargs.setdefault('histtype', 'stepfilled') kwargs.setdefault('alpha', 0.5) kwargs.setdefault('antialiased', True) plt.figure() # the completely arbitrary ordering of the channels channels = list(set([x for (x, _) in self.op.spillover.keys()])) num_channels = len(channels) for from_idx, from_channel in enumerate(channels): for to_idx, to_channel in enumerate(channels): if from_idx == to_idx: continue tube_data = parse_tube(self.op.controls[from_channel], experiment) plt.subplot(num_channels, num_channels, from_idx + (to_idx * num_channels) + 1) plt.xlim(np.percentile(tube_data[from_channel], (5, 95))) plt.ylim(np.percentile(tube_data[to_channel], (5, 95))) plt.xlabel(from_channel) plt.ylabel(to_channel) plt.scatter(tube_data[from_channel], tube_data[to_channel], alpha=0.1, s=1, marker='o') xstart, xstop = np.percentile(tube_data[from_channel], (5, 95)) xs = np.linspace(xstart, xstop, 2) ys = xs * self.op.spillover[(from_channel, to_channel)] plt.plot(xs, ys, 'g-', lw=3)
def plot(self, experiment, **kwargs): """Plot the underlying scatterplot and then plot the selection on top of it.""" if not experiment: raise util.CytoflowOpError("No experiment specified") if not experiment: raise util.CytoflowViewError("No experiment specified") if self.xfacet: raise util.CytoflowViewError( "RangeSelection.xfacet must be empty or `Undefined`") if self.yfacet: raise util.CytoflowViewError( "RangeSelection.yfacet must be empty or `Undefined`") super(RangeSelection2D, self).plot(experiment, **kwargs) self._ax = plt.gca() self._draw_rect() self._interactive()
def estimate(self, experiment): for i, control_i in enumerate(self.controls_list): for j, control_j in enumerate(self.controls_list): if control_i.from_channel == control_j.from_channel and i != j: raise util.CytoflowOpError( "Channel {0} is included more than once".format( control_i.from_channel)) self.controls = {} for control in self.controls_list: self.controls[(control.from_channel, control.to_channel)] = control.file if not self.subset: warnings.warn( "Are you sure you don't want to specify a subset " "used to estimate the model?", util.CytoflowOpWarning) ColorTranslationOp.estimate(self, experiment, subset=self.subset) self.changed = (Changed.ESTIMATE_RESULT, self)
def plot(self, experiment=None, **kwargs): """Plot a faceted histogram view of a channel""" if not experiment: raise util.CytoflowOpError("No experiment specified") kwargs.setdefault('histtype', 'stepfilled') kwargs.setdefault('alpha', 0.5) kwargs.setdefault('antialiased', True) plt.figure() channels = self.op._splines.keys() num_channels = len(channels) for from_idx, from_channel in enumerate(channels): for to_idx, to_channel in enumerate(channels): if from_idx == to_idx: continue tube_data = parse_tube(self.op.controls[from_channel], experiment) plt.subplot(num_channels, num_channels, from_idx + (to_idx * num_channels) + 1) plt.xscale('log', nonposx='mask') plt.yscale('log', nonposy='mask') plt.xlabel(from_channel) plt.ylabel(to_channel) plt.scatter(tube_data[from_channel], tube_data[to_channel], alpha=0.1, s=1, marker='o') spline = self.op._splines[from_channel][to_channel] xs = np.logspace(-1, math.log(tube_data[from_channel].max(), 10)) plt.plot(xs, spline(xs), 'g-', lw=3)
def max_tol(x, y): f = lambda a: density(a[np.newaxis, :]) # lx = kmeans.predict(x[np.newaxis, :])[0] # ly = kmeans.predict(y[np.newaxis, :])[0] n = len(x) n_scale = 1 # n_scale = np.sqrt(((nx + ny) / 2.0) / (n / num_clusters)) def tol(t): zt = x + t * (y - x) fhat_zt = f(x) + t * (f(y) - f(x)) return -1.0 * abs((f(zt) - fhat_zt) / fhat_zt) * n_scale res = scipy.optimize.minimize_scalar(tol, bounds=[0, 1], method='Bounded') if res.status != 0: raise util.CytoflowOpError( "tol optimization failed for {}, {}".format(x, y)) return -1.0 * res.fun
def plot(self, experiment, **kwargs): """Plot a faceted histogram view of a channel""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.op.channels: raise util.CytoflowViewError("No channels specified") if not self.op._af_median: raise util.CytoflowViewError( "Autofluorescence values aren't set. Did " "you forget to run estimate()?") if not set(self.op._af_median.keys()) <= set(experiment.channels) or \ not set(self.op._af_stdev.keys()) <= set(experiment.channels): raise util.CytoflowOpError( "Autofluorescence estimates aren't set, or are " "different than those in the experiment " "parameter. Did you forget to run estimate()?") if not set(self.op._af_median.keys()) == set(self.op._af_stdev.keys()): raise util.CytoflowOpError("Median and stdev keys are different! " "What the hell happened?!") if not set(self.op.channels) == set(self.op._af_median.keys()): raise util.CytoflowOpError( "Estimated channels differ from the channels " "parameter. Did you forget to (re)run estimate()?") import matplotlib.pyplot as plt import seaborn as sns # @UnusedImport kwargs.setdefault('histtype', 'stepfilled') kwargs.setdefault('alpha', 0.5) kwargs.setdefault('antialiased', True) # make a little Experiment try: check_tube(self.op.blank_file, experiment) blank_exp = ImportOp( tubes=[Tube(file=self.op.blank_file)], name_metadata=experiment.metadata['name_metadata']).apply() except util.CytoflowOpError as e: raise util.CytoflowViewError(e.__str__()) # apply previous operations for op in experiment.history: blank_exp = op.apply(blank_exp) plt.figure() for idx, channel in enumerate(self.op.channels): d = blank_exp.data[channel] plt.subplot(len(self.op.channels), 1, idx + 1) plt.title(channel) plt.hist(d, bins=200, **kwargs) plt.axvline(self.op._af_median[channel], color='r') plt.tight_layout(pad=0.8)
def apply(self, experiment): """ Assigns new metadata to events using the mixture model estimated in `estimate`. """ if not experiment: raise util.CytoflowOpError("No experiment specified") # make sure name got set! if not self.name: raise util.CytoflowOpError("You have to set the gate's name " "before applying it!") if self.name in experiment.data.columns: raise util.CytoflowOpError("Experiment already has a column named {0}" .format(self.name)) if not self._gmms: raise util.CytoflowOpError("No components found. Did you forget to " "call estimate()?") if not self._xscale: raise util.CytoflowOpError("Couldn't find _xscale. What happened??") if not self._yscale: raise util.CytoflowOpError("Couldn't find _yscale. What happened??") if self.xchannel not in experiment.data: raise util.CytoflowOpError("Column {0} not found in the experiment" .format(self.xchannel)) if self.ychannel not in experiment.data: raise util.CytoflowOpError("Column {0} not found in the experiment" .format(self.ychannel)) if (self.name + "_Posterior") in experiment.data: raise util.CytoflowOpError("Column {0} already found in the experiment" .format(self.name + "_Posterior")) if self.num_components == 1 and self.sigma == 0.0: raise util.CytoflowError("If num_components == 1, sigma must be > 0") if self.posteriors: col_name = "{0}_Posterior".format(self.name) if col_name in experiment.data: raise util.CytoflowOpError("Column {0} already found in the experiment" .format(col_name)) for b in self.by: if b not in experiment.data: raise util.CytoflowOpError("Aggregation metadata {0} not found" " in the experiment" .format(b)) if len(experiment.data[b].unique()) > 100: #WARNING - magic number raise util.CytoflowOpError("More than 100 unique values found for" " aggregation metadata {0}. Did you" " accidentally specify a data channel?" .format(b)) if self.sigma < 0.0: raise util.CytoflowOpError("sigma must be >= 0.0") event_assignments = pd.Series([None] * len(experiment), dtype = "object") if self.posteriors: event_posteriors = pd.Series([0.0] * len(experiment)) # what we DON'T want to do is iterate through event-by-event. # the more of this we can push into numpy, sklearn and pandas, # the faster it's going to be. for example, this is why # we don't use Ellipse.contains(). if self.by: groupby = experiment.data.groupby(self.by) else: # use a lambda expression to return a group that # contains all the events groupby = experiment.data.groupby(lambda x: True) for group, data_subset in groupby: gmm = self._gmms[group] x = data_subset.loc[:, [self.xchannel, self.ychannel]] x[self.xchannel] = self._xscale(x[self.xchannel]) x[self.ychannel] = self._yscale(x[self.ychannel]) # which values are missing? x_na = np.isnan(x[self.xchannel]) | np.isnan(x[self.ychannel]) x_na = x_na.values x = x.values group_idx = groupby.groups[group] # make a preliminary assignment predicted = np.full(len(x), -1, "int") predicted[~x_na] = gmm.predict(x[~x_na]) # if we're doing sigma-based gating, for each component check # to see if the event is in the sigma gate. if self.sigma > 0.0: # make a quick dataframe with the value and the predicted # component gate_df = pd.DataFrame({"x" : x[:, 0], "y" : x[:, 1], "p" : predicted}) # for each component, get the ellipse that follows the isoline # around the mixture component # cf. http://scikit-learn.org/stable/auto_examples/mixture/plot_gmm.html # and http://www.mathworks.com/matlabcentral/newsreader/view_thread/298389 # and http://stackoverflow.com/questions/7946187/point-and-ellipse-rotated-position-test-algorithm # i am not proud of how many tries this took me to get right. for c in range(0, self.num_components): mean = gmm.means_[c] covar = gmm._get_covars()[c] # xc is the center on the x axis # yc is the center on the y axis xc = mean[0] # @UnusedVariable yc = mean[1] # @UnusedVariable v, w = linalg.eigh(covar) u = w[0] / linalg.norm(w[0]) # xl is the length along the x axis # yl is the length along the y axis xl = np.sqrt(v[0]) * self.sigma # @UnusedVariable yl = np.sqrt(v[1]) * self.sigma # @UnusedVariable # t is the rotation in radians (counter-clockwise) t = 2 * np.pi - np.arctan(u[1] / u[0]) sin_t = np.sin(t) # @UnusedVariable cos_t = np.cos(t) # @UnusedVariable # and build an expression with numexpr so it evaluates fast! gate_bool = gate_df.eval("p == @c and " "((x - @xc) * @cos_t - (y - @yc) * @sin_t) ** 2 / ((@xl / 2) ** 2) + " "((x - @xc) * @sin_t + (y - @yc) * @cos_t) ** 2 / ((@yl / 2) ** 2) <= 1").values predicted[np.logical_and(predicted == c, gate_bool == False)] = -1 predicted_str = pd.Series(["(none)"] * len(predicted)) for c in range(0, self.num_components): predicted_str[predicted == c] = "{0}_{1}".format(self.name, c + 1) predicted_str[predicted == -1] = "{0}_None".format(self.name) predicted_str.index = group_idx event_assignments.iloc[group_idx] = predicted_str if self.posteriors: probability = np.full((len(x), self.num_components), 0.0, "float") probability[~x_na, :] = gmm.predict_proba(x[~x_na, :]) posteriors = pd.Series([0.0] * len(predicted)) for c in range(0, self.num_components): posteriors[predicted == c] = probability[predicted == c, c] posteriors.index = group_idx event_posteriors.iloc[group_idx] = posteriors new_experiment = experiment.clone() if self.num_components == 1: new_experiment.add_condition(self.name, "bool", event_assignments == "{0}_1".format(self.name)) else: new_experiment.add_condition(self.name, "category", event_assignments) if self.posteriors: col_name = "{0}_Posterior".format(self.name) new_experiment.add_condition(col_name, "float", event_posteriors) new_experiment.history.append(self.clone_traits()) return new_experiment