class Stats1DView(Base1DStatisticsView): """ Plot a statistic. The value of the statistic will be plotted on the Y axis; a numeric conditioning variable must be chosen for the X axis. Every variable in the statistic must be specified as either the `variable` or one of the plot facets. Attributes ---------- Examples -------- .. plot:: :context: close-figs Make a little data set. >>> import cytoflow as flow >>> import_op = flow.ImportOp() >>> import_op.tubes = [flow.Tube(file = "Plate01/RFP_Well_A3.fcs", ... conditions = {'Dox' : 10.0}), ... flow.Tube(file = "Plate01/CFP_Well_A4.fcs", ... conditions = {'Dox' : 1.0})] >>> import_op.conditions = {'Dox' : 'float'} >>> ex = import_op.apply() Create and a new statistic. .. plot:: :context: close-figs >>> ch_op = flow.ChannelStatisticOp(name = 'MeanByDox', ... channel = 'Y2-A', ... function = flow.geom_mean, ... by = ['Dox']) >>> ex2 = ch_op.apply(ex) View the new statistic .. plot:: :context: close-figs >>> flow.Stats1DView(variable = 'Dox', ... statistic = ('MeanByDox', 'geom_mean'), ... xscale = 'log', ... yscale = 'log').plot(ex2) """ # traits id = Constant("edu.mit.synbio.cytoflow.view.stats1d") friendly_id = Constant("1D Statistics View") REMOVED_ERROR = Constant("Statistics changed dramatically in 0.5; please see the documentation") by = util.Removed(err_string = REMOVED_ERROR) yfunction = util.Removed(err_string = REMOVED_ERROR) ychannel = util.Removed(err_string = REMOVED_ERROR) xvariable = util.Deprecated(new = "variable") def enum_plots(self, experiment): """ Returns an iterator over the possible plots that this View can produce. The values returned can be passed to :meth:`plot`. """ return super().enum_plots(experiment) def plot(self, experiment, plot_name = None, **kwargs): """Plot a chart of a variable's values against a statistic. Parameters ---------- color : a matplotlib color The color to plot with. Overridden if `huefacet` is not `None` linestyle : ['solid' | 'dashed', 'dashdot', 'dotted' | (offset, on-off-dash-seq) | '-' | '--' | '-.' | ':' | 'None' | ' ' | ''] marker : a matplotlib marker style See http://matplotlib.org/api/markers_api.html#module-matplotlib.markers markersize : int The marker size in points markerfacecolor : a matplotlib color The color to make the markers. Overridden (?) if `huefacet` is not `None` alpha : the alpha blending value, from 0.0 (transparent) to 1.0 (opaque) Notes ----- Other `kwargs` are passed to `matplotlib.pyplot.plot <https://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.plot.html>`_ """ super().plot(experiment, plot_name, **kwargs) def _grid_plot(self, experiment, grid, xlim, ylim, xscale, yscale, **kwargs): data = grid.data stat = experiment.statistics[self.statistic] stat_name = stat.name if self.error_statistic[0]: err_stat = experiment.statistics[self.error_statistic] err_stat_name = err_stat.name xlim = kwargs.pop("xlim", None) if xlim is None: xlim = (xscale.clip(data[self.variable].min() * 0.9), xscale.clip(data[self.variable].max() * 1.1)) ylim = kwargs.pop("ylim", None) if ylim is None: ylim = (yscale.clip(data[stat_name].min() * 0.9), yscale.clip(data[stat_name].max() * 1.1)) if self.error_statistic[0]: try: ylim = (yscale.clip(min([x[0] for x in data[err_stat_name]]) * 0.9), yscale.clip(max([x[1] for x in data[err_stat_name]]) * 1.1)) except IndexError: ylim = (yscale.clip((data[stat_name].min() - data[err_stat_name].min()) * 0.9), yscale.clip((data[stat_name].max() + data[err_stat_name].max()) * 1.1)) # plot the error bars first so the axis labels don't get overwritten if self.error_statistic[0]: grid.map(_error_bars, self.variable, stat_name, err_stat_name, **kwargs) grid.map(plt.plot, self.variable, stat_name, **kwargs) return {'xlim' : xlim, 'ylim' : ylim}
class ImportOp(HasStrictTraits): """ An operation for importing data and making an :class:`.Experiment`. To use, set the :attr:`conditions` dict to a mapping between condition name and NumPy ``dtype``. Useful dtypes include ``category``, ``float``, ``int``, ``bool``. Next, set :attr:`tubes` to a list of :class:`Tube` containing FCS filenames and the corresponding conditions. If you would rather not analyze every single event in every FCS file, set :attr:`events` to the number of events from each FCS file you want to load. Call :meth:`apply` to load the data. The usual ``experiment`` parameter can be ``None``. Attributes ---------- conditions : Dict(Str, Str) A dictionary mapping condition names (keys) to NumPy ``dtype``s (values). Useful ``dtype``s include ``category``, ``float``, ``int``, and ``bool``. tubes : List(Tube) A list of :class:``Tube`` instances, which map FCS files to their corresponding experimental conditions. Each :class:``Tube`` must have a :attr:``~Tube.conditions`` dict whose keys match those of :attr:`conditions`. channels : Dict(Str, Str) If you only need a subset of the channels available in the data set, specify them here. Each ``(key, value)`` pair specifies a channel to include in the output experiment. The key is the channel name in the FCS file, and the value is the name of the channel in the Experiment. You can use this to rename channels as you import data (because flow channel names are frequently not terribly informative.) New channel names must be valid Python identifiers: start with a letter or ``_``, and all characters must be letters, numbers or ``_``. If :attr:`channels` is empty, load all channels in the FCS files. events : Int If not None, import only a random subset of events of size :attr:`events`. Presumably the analysis will go faster but less precisely; good for interactive data exploration. Then, unset :attr:`events` and re-run the analysis non-interactively. name_metadata : {None, "$PnN", "$PnS"} (default = None) Which FCS metadata is the channel name? If ``None``, attempt to autodetect. data_set : Int (default = 0) The FCS standard allows you to encode multiple data sets in a single FCS file. Some software (such as the Beckman-Coulter software) also encode the same data in two different formats -- for example, FCS2.0 and FCS3.0. To access a data set other than the first one, set :attr:`data_set` to the 0-based index of the data set you would like to use. This will be used for *all FCS files imported by this operation.* ignore_v : List(Str) :class:`cytoflow` is designed to operate on an :class:`.Experiment` containing tubes that were all collected under the same instrument settings. In particular, the same PMT voltages ensure that data can be compared across samples. *Very rarely*, you may need to set up an :class:`.Experiment` with different voltage settings on different :class:`Tube`s. This is likely only to be the case when you are trying to figure out which voltages should be used in future experiments. If so, set :attr:`ignore_v` to a :class:`List` of channel names to ignore particular channels. .. warning:: THIS WILL BREAK REAL EXPERIMENTS Examples -------- >>> tube1 = flow.Tube(file = 'RFP_Well_A3.fcs', conditions = {"Dox" : 10.0}) >>> tube2 = flow.Tube(file='CFP_Well_A4.fcs', conditions = {"Dox" : 1.0}) >>> import_op = flow.ImportOp(conditions = {"Dox" : "float"}, ... tubes = [tube1, tube2]) >>> ex = import_op.apply() """ id = Constant("edu.mit.synbio.cytoflow.operations.import") friendly_id = Constant("Import") name = Constant("Import Data") # experimental conditions: name --> dtype. conditions = Dict(Str, Str) # the tubes tubes = List(Tube) # which channels do we import? channels = Dict(Str, Str) # which FCS metadata has the channel names in it? name_metadata = Enum(None, "$PnN", "$PnS") # which data set to get out of the FCS files? data_set = Int(0) # are we subsetting? events = util.CIntOrNone(None) coarse_events = util.Deprecated(new='events') # DON'T DO THIS ignore_v = List(Str) def apply(self, experiment=None, metadata_only=False): """ Load a new :class:`.Experiment`. Parameters ---------- experiment : Experiment Ignored metadata_only : bool (default = False) Only "import" the metadata, creating an Experiment with all the expected metadata and structure but 0 events. Returns ------- Experiment The new :class:`.Experiment`. New channels have the following metadata: - **voltage** - int The voltage that this channel was collected at. Determined by the ``$PnV`` field from the first FCS file. - **range** - int The maximum range of this channel. Determined by the ``$PnR`` field from the first FCS file. New experimental conditions do not have **voltage** or **range** metadata, obviously. Instead, they have **experiment** set to ``True``, to distinguish the experimental variables from the conditions that were added by gates, etc. If :attr:`ignore_v` is set, it is added as a key to the :class:`.Experiment`-wide metadata. """ if not self.tubes or len(self.tubes) == 0: raise util.CytoflowOpError('tubes', "Must specify some tubes!") # if we have channel renaming, make sure the new names are valid # python identifiers if self.channels: for old_name, new_name in self.channels.items(): if old_name != new_name and new_name != util.sanitize_identifier( new_name): raise util.CytoflowOpError( 'channels', "Channel name {} must be a " "valid Python identifier.".format(new_name)) # make sure each tube has the same conditions tube0_conditions = set(self.tubes[0].conditions) for tube in self.tubes: tube_conditions = set(tube.conditions) if len(tube0_conditions ^ tube_conditions) > 0: raise util.CytoflowOpError( 'tubes', "Tube {0} didn't have the same " "conditions as tube {1}".format(tube.file, self.tubes[0].file)) # make sure experimental conditions are unique for idx, i in enumerate(self.tubes[0:-1]): for j in self.tubes[idx + 1:]: if i.conditions_equal(j): raise util.CytoflowOpError( 'tubes', "The same conditions specified for " "tube {0} and tube {1}".format(i.file, j.file)) experiment = Experiment() experiment.metadata["ignore_v"] = self.ignore_v for condition, dtype in list(self.conditions.items()): experiment.add_condition(condition, dtype) experiment.metadata[condition]['experiment'] = True try: # silence warnings about duplicate channels; # we'll figure that out below with warnings.catch_warnings(): warnings.simplefilter("ignore") tube0_meta = fcsparser.parse(self.tubes[0].file, data_set=self.data_set, meta_data_only=True, reformat_meta=True) except Exception as e: raise util.CytoflowOpError( 'tubes', "FCS reader threw an error reading metadata " "for tube {}: {}".format(self.tubes[0].file, str(e))) from e meta_channels = tube0_meta["_channels_"] if self.name_metadata: experiment.metadata["name_metadata"] = self.name_metadata else: experiment.metadata["name_metadata"] = autodetect_name_metadata( self.tubes[0].file, data_set=self.data_set) meta_channels['Index'] = meta_channels.index meta_channels.set_index(experiment.metadata["name_metadata"], inplace=True) channels = list(self.channels.keys()) if self.channels \ else list(meta_channels.index.values) # make sure everything in self.channels is in the tube channels for channel in channels: if channel not in meta_channels.index: raise util.CytoflowOpError( 'channels', "Channel {0} not in tube {1}".format( channel, self.tubes[0].file)) # now that we have the metadata, load it into experiment for channel in channels: experiment.add_channel(channel) experiment.metadata[channel]["fcs_name"] = channel # keep track of the channel's PMT voltage if ("$PnV" in meta_channels.loc[channel]): v = meta_channels.loc[channel]['$PnV'] if v: experiment.metadata[channel]["voltage"] = v # add the maximum possible value for this channel. data_range = meta_channels.loc[channel]['$PnR'] data_range = float(data_range) experiment.metadata[channel]['range'] = data_range experiment.metadata['fcs_metadata'] = {} for tube in self.tubes: if metadata_only: tube_meta, tube_data = parse_tube(tube.file, experiment, data_set=self.data_set, metadata_only=True) else: tube_meta, tube_data = parse_tube(tube.file, experiment, data_set=self.data_set) if self.events: if self.events <= len(tube_data): tube_data = tube_data.loc[np.random.choice( tube_data.index, self.events, replace=False)] else: warnings.warn( "Only {0} events in tube {1}".format( len(tube_data), tube.file), util.CytoflowWarning) experiment.add_events(tube_data[channels], tube.conditions) # extract the row and column from wells collected on a # BD HTS if 'WELL ID' in tube_meta: pos = tube_meta['WELL ID'] tube_meta['CF_Row'] = pos[0] tube_meta['CF_Col'] = int(pos[1:3]) for i, channel in enumerate(channels): # remove the PnV tube metadata if '$P{}V'.format(i + 1) in tube_meta: del tube_meta['$P{}V'.format(i + 1)] # work around a bug where the PnR is sometimes not the detector range # but the data range. pnr = '$P{}R'.format(i + 1) if pnr in tube_meta and float( tube_meta[pnr] ) > experiment.metadata[channel]['range']: experiment.metadata[channel]['range'] = float( tube_meta[pnr]) tube_meta['CF_File'] = Path(tube.file).stem experiment.metadata['fcs_metadata'][tube.file] = tube_meta for channel in channels: if self.channels and channel in self.channels: new_name = self.channels[channel] if channel == new_name: continue experiment.data.rename(columns={channel: new_name}, inplace=True) experiment.metadata[new_name] = experiment.metadata[channel] experiment.metadata[new_name]["fcs_name"] = channel del experiment.metadata[channel] # this catches an odd corner case where some instruments store # instrument-specific info in the "extra" bits. we have to # clear them out. if tube0_meta['$DATATYPE'] == 'I': data_bits = int(meta_channels.loc[channel]['$PnB']) data_range = float(meta_channels.loc[channel]['$PnR']) range_bits = int(math.log(data_range, 2)) if range_bits < data_bits: mask = 1 for _ in range(1, range_bits): mask = mask << 1 | 1 experiment.data[channel] = experiment.data[ channel].values.astype('int') & mask # re-scale the data to linear if if's recorded as log-scaled with # integer channels data_range = float(meta_channels.loc[channel]['$PnR']) f1 = float(meta_channels.loc[channel]['$PnE'][0]) f2 = float(meta_channels.loc[channel]['$PnE'][1]) if f1 > 0.0 and f2 == 0.0: warnings.warn( 'Invalid $PnE = {},{} for channel {}, changing it to {},1.0' .format(f1, f2, channel, f1), util.CytoflowWarning) f2 = 1.0 if f1 > 0.0 and f2 > 0.0 and tube0_meta['$DATATYPE'] == 'I': warnings.warn( 'Converting channel {} from logarithmic to linear'.format( channel), util.CytoflowWarning) # experiment.data[channel] = 10 ** (f1 * experiment.data[channel] / data_range) * f2 return experiment
class ImportOp(HasStrictTraits): """ An operation for importing data and making an :class:`.Experiment`. To use, set the :attr:`conditions` dict to a mapping between condition name and NumPy ``dtype``. Useful dtypes include ``category``, ``float``, ``int``, ``bool``. Next, set :attr:`tubes` to a list of :class:`Tube` containing FCS filenames and the corresponding conditions. If you would rather not analyze every single event in every FCS file, set :attr:`events` to the number of events from each FCS file you want to load. Call :meth:`apply` to load the data. The usual ``experiment`` parameter can be ``None``. Attributes ---------- conditions : Dict(Str, Str) A dictionary mapping condition names (keys) to NumPy ``dtype``s (values). Useful ``dtype``s include ``category``, ``float``, ``int``, and ``bool``. tubes : List(Tube) A list of :class:``Tube`` instances, which map FCS files to their corresponding experimental conditions. Each :class:``Tube`` must have a :attr:``~Tube.conditions`` dict whose keys match those of :attr:`conditions`. channels : Dict(Str, Str) If you only need a subset of the channels available in the data set, specify them here. Each ``(key, value)`` pair specifies a channel to include in the output experiment. The key is the channel name in the FCS file, and the value is the name of the channel in the Experiment. You can use this to rename channels as you import data (because flow channel names are frequently not terribly informative.) New channel names must be valid Python identifiers: start with a letter or ``_``, and all characters must be letters, numbers or ``_``. If :attr:`channels` is empty, load all channels in the FCS files. events : Int (default = 0) If ``> 0``, import only a random subset of events of size :attr:`events`. Presumably the analysis will go faster but less precisely; good for interactive data exploration. Then, unset :attr:`events` and re-run the analysis non-interactively. name_metadata : {None, "$PnN", "$PnS"} (default = None) Which FCS metadata is the channel name? If ``None``, attempt to autodetect. ignore_v : List(Str) :class:`cytoflow` is designed to operate on an :class:`.Experiment` containing tubes that were all collected under the same instrument settings. In particular, the same PMT voltages ensure that data can be compared across samples. *Very rarely*, you may need to set up an :class:`.Experiment` with different voltage settings on different :class:`Tube`s. This is likely only to be the case when you are trying to figure out which voltages should be used in future experiments. If so, set :attr:`ignore_v` to a :class:`List` of channel names to ignore particular channels. .. warning:: THIS WILL BREAK REAL EXPERIMENTS Examples -------- >>> tube1 = flow.Tube(file = 'RFP_Well_A3.fcs', conditions = {"Dox" : 10.0}) >>> tube2 = flow.Tube(file='CFP_Well_A4.fcs', conditions = {"Dox" : 1.0}) >>> import_op = flow.ImportOp(conditions = {"Dox" : "float"}, ... tubes = [tube1, tube2]) >>> ex = import_op.apply() """ id = Constant("edu.mit.synbio.cytoflow.operations.import") friendly_id = Constant("Import") name = Constant("Import Data") # experimental conditions: name --> dtype. conditions = Dict(Str, Str) # the tubes tubes = List(Tube) # which channels do we import? channels = Dict(Str, Str) # which FCS metadata has the channel names in it? name_metadata = Enum(None, "$PnN", "$PnS") # are we subsetting? events = util.PositiveInt(0, allow_zero=True) coarse_events = util.Deprecated(new='events') # DON'T DO THIS ignore_v = List(Str) def apply(self, experiment=None): """ Load a new :class:`.Experiment`. Returns ------- Experiment The new :class:`.Experiment`. New channels have the following metadata: - **voltage** - int The voltage that this channel was collected at. Determined by the ``$PnV`` field from the first FCS file. - **range** - int The maximum range of this channel. Determined by the ``$PnR`` field from the first FCS file. New experimental conditions do not have **voltage** or **range** metadata, obviously. Instead, they have **experiment** set to ``True``, to distinguish the experimental variables from the conditions that were added by gates, etc. If :attr:`ignore_v` is set, it is added as a key to the :class:`.Experiment`-wide metadata. """ if not self.tubes or len(self.tubes) == 0: raise util.CytoflowOpError('tubes', "Must specify some tubes!") # if we have channel renaming, make sure the new names are valid # python identifiers if self.channels: for old_name, new_name in self.channels.items(): if old_name != new_name and new_name != util.sanitize_identifier( new_name): raise util.CytoflowOpError( 'channels', "Channel name {} must be a " "valid Python identifier.".format(new_name)) # make sure each tube has the same conditions tube0_conditions = set(self.tubes[0].conditions) for tube in self.tubes: tube_conditions = set(tube.conditions) if len(tube0_conditions ^ tube_conditions) > 0: raise util.CytoflowOpError( 'tubes', "Tube {0} didn't have the same " "conditions as tube {1}".format(tube.file, self.tubes[0].file)) # make sure experimental conditions are unique for idx, i in enumerate(self.tubes[0:-1]): for j in self.tubes[idx + 1:]: if i.conditions_equal(j): raise util.CytoflowOpError( 'tubes', "The same conditions specified for " "tube {0} and tube {1}".format(i.file, j.file)) experiment = Experiment() experiment.metadata["ignore_v"] = self.ignore_v for condition, dtype in list(self.conditions.items()): experiment.add_condition(condition, dtype) experiment.metadata[condition]['experiment'] = True try: # silence warnings about duplicate channels; # we'll figure that out below with warnings.catch_warnings(): warnings.simplefilter("ignore") tube0_meta = fcsparser.parse(self.tubes[0].file, meta_data_only=True, reformat_meta=True) except Exception as e: raise util.CytoflowOpError( 'tubes', "FCS reader threw an error reading metadata " "for tube {}".format(self.tubes[0].file)) from e meta_channels = tube0_meta["_channels_"] if self.name_metadata: experiment.metadata["name_metadata"] = self.name_metadata else: # try to autodetect the metadata if "$PnN" in meta_channels and not "$PnS" in meta_channels: experiment.metadata["name_metadata"] = "$PnN" elif "$PnN" not in meta_channels and "$PnS" in meta_channels: experiment.metadata["name_metadata"] = "$PnS" else: PnN = meta_channels["$PnN"] PnS = meta_channels["$PnS"] # sometimes one is unique and the other isn't if (len(set(PnN)) == len(PnN) and len(set(PnS)) != len(PnS)): experiment.metadata["name_metadata"] = "$PnN" elif (len(set(PnN)) != len(PnN) and len(set(PnS)) == len(PnS)): experiment.metadata["name_metadata"] = "$PnS" else: # as per fcsparser.api, $PnN is the "short name" (like FL-1) # and $PnS is the "actual name" (like "FSC-H"). so let's # use $PnS. experiment.metadata["name_metadata"] = "$PnS" meta_channels.set_index(experiment.metadata["name_metadata"], inplace=True) channels = list(self.channels.keys()) if self.channels \ else list(tube0_meta["_channel_names_"]) # make sure everything in self.channels is in the tube channels for channel in channels: if channel not in meta_channels.index: raise util.CytoflowOpError( 'channels', "Channel {0} not in tube {1}".format( channel, self.tubes[0].file)) # now that we have the metadata, load it into experiment for channel in channels: experiment.add_channel(channel) experiment.metadata[channel]["fcs_name"] = channel # keep track of the channel's PMT voltage if ("$PnV" in meta_channels.loc[channel]): v = meta_channels.loc[channel]['$PnV'] if v: experiment.metadata[channel]["voltage"] = v # add the maximum possible value for this channel. data_range = meta_channels.loc[channel]['$PnR'] data_range = float(data_range) experiment.metadata[channel]['range'] = data_range experiment.metadata['fcs_metadata'] = {} for tube in self.tubes: tube_meta, tube_data = parse_tube(tube.file, experiment) if self.events: if self.events <= len(tube_data): tube_data = tube_data.loc[np.random.choice(tube_data.index, self.events, replace=False)] else: warnings.warn( "Only {0} events in tube {1}".format( len(tube_data), tube.file), util.CytoflowWarning) experiment.add_events(tube_data[channels], tube.conditions) experiment.metadata['fcs_metadata'][tube.file] = tube_meta for channel in channels: if self.channels and channel in self.channels: new_name = self.channels[channel] if channel == new_name: continue experiment.data.rename(columns={channel: new_name}, inplace=True) experiment.metadata[new_name] = experiment.metadata[channel] experiment.metadata[new_name]["fcs_name"] = channel del experiment.metadata[channel] return experiment
class BaseStatisticsView(BaseView): """ The base class for statisticxs views (as opposed to data views). Attributes ---------- variable : str The condition that varies when plotting this statistic: used for the x axis of line plots, the bar groups in bar plots, etc. subset : str An expression that specifies the subset of the statistic to plot. """ # deprecated or removed attributes give warnings & errors, respectively by = util.Deprecated( new='variable', err_string="'by' is deprecated, please use 'variable'") variable = Str subset = Str def enum_plots(self, experiment, data): """ Enumerate the named plots we can make from this set of statistics. """ if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") if not self.variable: raise util.CytoflowViewError('variable', "variable not set") if self.variable not in experiment.conditions: raise util.CytoflowViewError( 'variable', "variable {0} not in the experiment".format(self.variable)) data, facets, names = self._subset_data(data) by = list(set(names) - set(facets)) class plot_enum(object): def __init__(self, data, by): self.by = by self._iter = None self._returned = False if by: self._iter = data.groupby(by).__iter__() def __iter__(self): return self def __next__(self): if self._iter: return next(self._iter)[0] else: if self._returned: raise StopIteration else: self._returned = True return None return plot_enum(data.reset_index(), by) def plot(self, experiment, data, plot_name=None, **kwargs): """ Plot some data from a statistic. This function takes care of checking for facet name validity and subsetting, then passes the dataframe to `BaseView.plot` """ if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") if not self.variable: raise util.CytoflowViewError('variable', "variable not set") if self.variable not in experiment.conditions: raise util.CytoflowViewError( 'variable', "variable {0} not in the experiment".format(self.variable)) data, facets, names = self._subset_data(data) unused_names = list(set(names) - set(facets)) if plot_name is not None and not unused_names: raise util.CytoflowViewError( 'plot_name', "You specified a plot name, but all " "the facets are already used") if unused_names: groupby = data.groupby(unused_names) if plot_name is None: raise util.CytoflowViewError( 'plot_name', "You must use facets {} in either the " "plot variables or the plot name. " "Possible plot names: {}".format( unused_names, list(groupby.groups.keys()))) if plot_name not in set(groupby.groups.keys()): raise util.CytoflowViewError( 'plot_name', "Plot {} not from plot_enum; must " "be one of {}".format(plot_name, list(groupby.groups.keys()))) data = groupby.get_group(plot_name) # FacetGrid needs a "long" data set data.reset_index(inplace=True) super().plot(experiment, data, **kwargs) def _subset_data(self, data): if self.subset: try: # TODO - either sanitize column names, or check to see that # all conditions are valid Python variables data = data.query(self.subset) except Exception as e: raise util.CytoflowViewError( 'subset', "Subset string '{0}' isn't valid".format( self.subset)) from e if len(data) == 0: raise util.CytoflowViewError( 'subset', "Subset string '{0}' returned no values".format( self.subset)) names = list(data.index.names) for name in names: unique_values = data.index.get_level_values(name).unique() if len(unique_values) == 1: warn("Only one value for level {}; dropping it.".format(name), util.CytoflowViewWarning) try: data.index = data.index.droplevel(name) except AttributeError as e: raise util.CytoflowViewError( None, "Must have more than one " "value to plot.") from e names = list(data.index.names) if self.xfacet and self.xfacet not in data.index.names: raise util.CytoflowViewError( 'xfacet', "X facet {} not in statistics; must be one of {}".format( self.xfacet, data.index.names)) if self.yfacet and self.yfacet not in data.index.names: raise util.CytoflowViewError( 'yfacet', "Y facet {} not in statistics; must be one of {}".format( self.yfacet, data.index.names)) if self.huefacet and self.huefacet not in data.index.names: raise util.CytoflowViewError( 'huefacet', "Hue facet {} not in statistics; must be one of {}".format( self.huefacet, data.index.names)) facets = [ x for x in [self.variable, self.xfacet, self.yfacet, self.huefacet] if x ] if len(facets) != len(set(facets)): raise util.CytoflowViewError(None, "Can't reuse facets") return data, facets, names
class Base1DStatisticsView(BaseStatisticsView): """ The base class for 1-dimensional statistic views -- ie, the :attr:`variable` attribute is on the x axis, and the statistic value is on the y axis. Attributes ---------- statistic : (str, str) The name of the statistic to plot. Must be a key in the :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment` being plotted. error_statistic : (str, str) The name of the statistic used to plot error bars. Must be a key in the :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment` being plotted. scale : {'linear', 'log', 'logicle'} The scale applied to the data before plotting it. """ REMOVED_ERROR = "Statistics changed dramatically in 0.5; please see the documentation" by = util.Removed(err_string=REMOVED_ERROR) yfunction = util.Removed(err_string=REMOVED_ERROR) ychannel = util.Removed(err_string=REMOVED_ERROR) channel = util.Removed(err_string=REMOVED_ERROR) function = util.Removed(err_string=REMOVED_ERROR) error_bars = util.Removed(err_string=REMOVED_ERROR) xvariable = util.Deprecated(new="variable") statistic = Tuple(Str, Str) error_statistic = Tuple(Str, Str) scale = util.ScaleEnum def enum_plots(self, experiment): if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") data = self._make_data(experiment) return super().enum_plots(experiment, data) def plot(self, experiment, plot_name=None, **kwargs): """ Parameters ---------- orientation : {'vertical', 'horizontal'} lim : (float, float) Set the range of the plot's axis. """ if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") data = self._make_data(experiment) if not self.variable: raise util.CytoflowViewError('variable', "variable not set") if self.variable not in experiment.conditions: raise util.CytoflowViewError( 'variable', "variable {0} not in the experiment".format(self.variable)) scale = util.scale_factory(self.scale, experiment, statistic=self.statistic, error_statistic=self.error_statistic) super().plot(experiment, data, plot_name=plot_name, scale=scale, **kwargs) def _make_data(self, experiment): if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") if not self.statistic: raise util.CytoflowViewError('statistic', "Statistic not set") if self.statistic not in experiment.statistics: raise util.CytoflowViewError( 'statistic', "Can't find the statistic {} in the experiment".format( self.statistic)) else: stat = experiment.statistics[self.statistic] if not util.is_numeric(stat): raise util.CytoflowViewError('statistic', "Statistic must be numeric") if self.error_statistic[0]: if self.error_statistic not in experiment.statistics: raise util.CytoflowViewError( 'error_statistic', "Can't find the error statistic in the experiment") else: error_stat = experiment.statistics[self.error_statistic] else: error_stat = None if error_stat is not None: try: error_stat.index = error_stat.index.reorder_levels( stat.index.names) error_stat.sort_index(inplace=True) except AttributeError: pass if not stat.index.equals(error_stat.index): raise util.CytoflowViewError( 'error_statistic', "Data statistic and error statistic " " don't have the same index.") if stat.name == error_stat.name: raise util.CytoflowViewError( 'error_statistic', "Data statistic and error statistic can " "not have the same name.") data = pd.DataFrame(index=stat.index) data[stat.name] = stat if error_stat is not None: data[error_stat.name] = error_stat return data
class Base1DStatisticsView(BaseStatisticsView): REMOVED_ERROR = "Statistics changed dramatically in 0.5; please see the documentation" by = util.Removed(err_string=REMOVED_ERROR) yfunction = util.Removed(err_string=REMOVED_ERROR) ychannel = util.Removed(err_string=REMOVED_ERROR) channel = util.Removed(err_string=REMOVED_ERROR) function = util.Removed(err_string=REMOVED_ERROR) error_bars = util.Removed(err_string=REMOVED_ERROR) xvariable = util.Deprecated(new="variable") statistic = Tuple(Str, Str) error_statistic = Tuple(Str, Str) def enum_plots(self, experiment): data = self._make_data(experiment) return super().enum_plots(experiment, data) def plot(self, experiment, plot_name=None, **kwargs): data = self._make_data(experiment) if util.is_numeric(experiment[self.variable]): xscale = util.scale_factory(self.xscale, experiment, condition=self.variable) else: xscale = None yscale = util.scale_factory(self.yscale, experiment, statistic=self.statistic, error_statistic=self.error_statistic) super().plot(experiment, data, plot_name, xscale=xscale, yscale=yscale, **kwargs) def _make_data(self, experiment): if experiment is None: raise util.CytoflowViewError("No experiment specified") if not self.statistic: raise util.CytoflowViewError("Statistic not set") if self.statistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find the statistic {} in the experiment".format( self.statistic)) else: stat = experiment.statistics[self.statistic] if not util.is_numeric(stat): raise util.CytoflowViewError("Statistic must be numeric") if self.error_statistic[0]: if self.error_statistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find the error statistic in the experiment") else: error_stat = experiment.statistics[self.error_statistic] else: error_stat = None if error_stat is not None: if not stat.index.equals(error_stat.index): raise util.CytoflowViewError( "Data statistic and error statistic " " don't have the same index.") if stat.name == error_stat.name: raise util.CytoflowViewError( "Data statistic and error statistic can " "not have the same name.") data = pd.DataFrame(index=stat.index) data[stat.name] = stat if error_stat is not None: data[error_stat.name] = error_stat return data
class Stats1DView(Base1DStatisticsView): """ Plot a statistic. The value of the statistic will be plotted on the Y axis; a numeric conditioning variable must be chosen for the X axis. Every variable in the statistic must be specified as either the `variable` or one of the plot facets. Attributes ---------- variable_scale : {'linear', 'log', 'logicle'} The scale applied to the variable (on the X axis) Examples -------- .. plot:: :context: close-figs Make a little data set. >>> import cytoflow as flow >>> import_op = flow.ImportOp() >>> import_op.tubes = [flow.Tube(file = "Plate01/RFP_Well_A3.fcs", ... conditions = {'Dox' : 10.0}), ... flow.Tube(file = "Plate01/CFP_Well_A4.fcs", ... conditions = {'Dox' : 1.0})] >>> import_op.conditions = {'Dox' : 'float'} >>> ex = import_op.apply() Create and a new statistic. .. plot:: :context: close-figs >>> ch_op = flow.ChannelStatisticOp(name = 'MeanByDox', ... channel = 'Y2-A', ... function = flow.geom_mean, ... by = ['Dox']) >>> ex2 = ch_op.apply(ex) View the new statistic .. plot:: :context: close-figs >>> flow.Stats1DView(variable = 'Dox', ... statistic = ('MeanByDox', 'geom_mean'), ... variable_scale = 'log', ... scale = 'log').plot(ex2) """ # traits id = Constant("edu.mit.synbio.cytoflow.view.stats1d") friendly_id = Constant("1D Statistics View") REMOVED_ERROR = Constant( "Statistics changed dramatically in 0.5; please see the documentation") by = util.Removed(err_string=REMOVED_ERROR) yfunction = util.Removed(err_string=REMOVED_ERROR) ychannel = util.Removed(err_string=REMOVED_ERROR) xvariable = util.Deprecated(new="variable") xscale = util.Deprecated(new='variable_scale') variable_scale = util.ScaleEnum def enum_plots(self, experiment): """ Returns an iterator over the possible plots that this View can produce. The values returned can be passed to :meth:`plot`. """ return super().enum_plots(experiment) def plot(self, experiment, plot_name=None, **kwargs): """Plot a chart of a variable's values against a statistic. Parameters ---------- variable_lim : (float, float) The limits on the variable axis color : a matplotlib color The color to plot with. Overridden if `huefacet` is not `None` linewidth : float The width of the line, in points linestyle : ['solid' | 'dashed', 'dashdot', 'dotted' | (offset, on-off-dash-seq) | '-' | '--' | '-.' | ':' | 'None' | ' ' | ''] marker : a matplotlib marker style See http://matplotlib.org/api/markers_api.html#module-matplotlib.markers markersize : int The marker size in points markerfacecolor : a matplotlib color The color to make the markers. Overridden (?) if `huefacet` is not `None` alpha : the alpha blending value, from 0.0 (transparent) to 1.0 (opaque) capsize : scalar The size of the error bar caps, in points shade_error : bool If `False` (the default), plot the error statistic as traditional "error bars." If `True`, plot error statistic as a filled, shaded region. shade_alpha : float The transparency of the shaded error region, from 0.0 (transparent) to 1.0 (opaque.) Default is 0.2. Notes ----- Other `kwargs` are passed to `matplotlib.pyplot.plot <https://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.plot.html>`_ """ if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") if self.variable not in experiment.conditions: raise util.CytoflowError( 'variable', "Variable {} not in the experiment".format(self.variable)) if not util.is_numeric(experiment[self.variable]): raise util.CytoflowError( 'variable', "Variable {} must be numeric".format(self.variable)) variable_scale = util.scale_factory(self.variable_scale, experiment, condition=self.variable) super().plot(experiment, plot_name, variable_scale=variable_scale, **kwargs) def _grid_plot(self, experiment, grid, **kwargs): data = grid.data data_scale = kwargs.pop('scale') variable_scale = kwargs.pop('variable_scale') stat = experiment.statistics[self.statistic] stat_name = stat.name if self.error_statistic[0]: err_stat = experiment.statistics[self.error_statistic] err_stat_name = err_stat.name else: err_stat = None variable_lim = kwargs.pop("variable_lim", None) if variable_lim is None: variable_lim = (variable_scale.clip( data[self.variable].min() * 0.9), variable_scale.clip(data[self.variable].max() * 1.1)) lim = kwargs.pop("lim", None) if lim is None: lim = (data_scale.clip(data[stat_name].min() * 0.9), data_scale.clip(data[stat_name].max() * 1.1)) if self.error_statistic[0]: try: lim = (data_scale.clip( min([x[0] for x in data[err_stat_name]]) * 0.9), data_scale.clip( max([x[1] for x in data[err_stat_name]]) * 1.1)) except (TypeError, IndexError): lim = (data_scale.clip( (data[stat_name].min() - data[err_stat_name].min()) * 0.9), data_scale.clip((data[stat_name].max() + data[err_stat_name].max()) * 1.1)) orientation = kwargs.pop('orientation', 'vertical') capsize = kwargs.pop('capsize', None) shade_error = kwargs.pop('shade_error', False) shade_alpha = kwargs.pop('shade_alpha', 0.2) if orientation == 'vertical': # plot the error bars first so the axis labels don't get overwritten if err_stat is not None: if shade_error: grid.map(_v_error_shade, self.variable, stat_name, err_stat_name, alpha=shade_alpha) else: grid.map(_v_error_bars, self.variable, stat_name, err_stat_name, capsize=capsize) grid.map(plt.plot, self.variable, stat_name, **kwargs) return dict(xscale=variable_scale, xlim=variable_lim, yscale=data_scale, ylim=lim) else: # plot the error bars first so the axis labels don't get overwritten if err_stat is not None: if shade_error: grid.map(_h_error_shade, stat_name, self.variable, err_stat_name, alpha=shade_alpha) else: grid.map(_h_error_bars, stat_name, self.variable, err_stat_name, capsize=capsize) grid.map(plt.plot, stat_name, self.variable, **kwargs) return dict(yscale=variable_scale, ylim=variable_lim, xscale=data_scale, xlim=lim)
class Stats2DView(HasStrictTraits): """ Plot two statistics on a scatter plot. A point (X,Y) is drawn for every pair of elements with the same value of `variable`; the X value is from `xstatistic` and the Y value is from `ystatistic`. Attributes ---------- name : Str The plot's name variable : Str the name of the conditioning variable xstatistic : Tuple(Str, Str) The statistic to plot on the X axis. Must have the same indices as `ystatistic`. xscale : Enum("linear", "log", "logicle") (default = "linear") What scale to use on the X axis ystatistic : Tuple(Str, Str) The statistic to plot on the Y axis. Must have the same indices as `xstatistic`. yscale : Enum("linear", "log", "logicle") (default = "linear") What scale to use on the Y axis xfacet : Str the conditioning variable for horizontal subplots yfacet : Str the conditioning variable for vertical subplots huefacet : the conditioning variable for color. huescale : Enum("linear", "log", "logicle") (default = "linear") scale for the hue facet, if there are a lot of hue values. x_error_statistic, y_error_statistic : Tuple(Str, Str) if specified, draw error bars. must be the name of a statistic, with the same indices as `xstatistic` and `ystatistic`. subset : Str What subset of the data to plot? Examples -------- Assume we want an input-output curve for a repressor that's under the control of a Dox-inducible promoter. We have an "input" channel `(Dox --> eYFP, FITC-A channel)` and an output channel `(Dox --> repressor --| eBFP, Pacific Blue channel)` as well as a constitutive expression channel (mKate, PE-Tx-Red-YG-A channel). We have induced several wells with different amounts of Dox. We want to plot the relationship between the input and output channels (binned by input channel intensity) as we vary Dox, faceted by constitutive channel bin. >>> cfp_bin_op = flow.BinningOp(name = "CFP_Bin", ... channel = "PE-Tx-Red-YG-A", ... scale = "log", ... bin_width = 0.1) >>> ifp_bin_op = flow.BinningOp(name = "IFP_Bin", ... channel = "Pacific Blue-A", ... scale = "log", ... bin_width = 0.1).apply(ex_cfp_binned) >>> ifp_mean = flow.ChannelStatisticOp(name = "IFP", ... channel = "FITC-A", ... by = ["IFP_Bin", "CFP_Bin"], ... function = flow.geom_mean) >>> ofp_mean = flow.ChannelStatisticOp(name = "OFP", ... channel = "Pacific_Blue-A", ... by = ["IFP_Bin", "CFP_Bin"], ... function = flow.geom_mean) >>> ex = cfp_bin_op.apply(ex) >>> ex = ifp_bin_op.apply(ex) >>> ex = ifp_mean.apply(ex) >>> ex = ofp_mean.apply(ex) >>> view = flow.Stats2DView(name = "IFP vs OFP", ... variable = "IFP_Bin", ... xstatistic = ("IFP", "geom_mean"), ... ystatistic = ("OFP", "geom_mean"), ... huefacet = "CFP_Bin").plot(ex_ifp_binned) >>> view.plot(ex_binned) """ # traits id = "edu.mit.synbio.cytoflow.view.stats2d" friendly_id = "2D Statistics View" # deprecated or removed attributes give warnings & errors, respectively by = util.Deprecated( new='variable', err_string="'by' is deprecated, please use 'variable'") STATS_REMOVED = "{} has been removed. Statistics changed dramatically in 0.5; please see the documentation." xchannel = util.Removed(err_string=STATS_REMOVED) xfunction = util.Removed(err_string=STATS_REMOVED) ychannel = util.Removed(err_string=STATS_REMOVED) yfunction = util.Removed(err_string=STATS_REMOVED) name = Str variable = Str xstatistic = Tuple(Str, Str) xscale = util.ScaleEnum ystatistic = Tuple(Str, Str) yscale = util.ScaleEnum xfacet = Str yfacet = Str huefacet = Str huescale = util.ScaleEnum x_error_statistic = Tuple(Str, Str) y_error_statistic = Tuple(Str, Str) subset = Str def enum_plots(self, experiment): """ Returns an iterator over the possible plots that this View can produce. The values returned can be passed to "plot". """ # TODO - all this is copied from below. can we abstract it out somehow? if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.variable: raise util.CytoflowViewError("variable not set") if self.variable not in experiment.conditions: raise util.CytoflowViewError( "variable {0} not in the experiment".format(self.variable)) if not self.xstatistic: raise util.CytoflowViewError("X statistic not set") if self.xstatistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find X statistic {} in experiment".format( self.ystatistic)) else: xstat = experiment.statistics[self.xstatistic] if not self.ystatistic: raise util.CytoflowViewError("Y statistic not set") if self.ystatistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find Y statistic {} in experiment".format( self.ystatistic)) else: ystat = experiment.statistics[self.ystatistic] if not xstat.index.equals(ystat.index): raise util.CytoflowViewError( "X statistic and Y statistic must have " "the same indices: {}".format(xstat.index.names)) if self.x_error_statistic[0]: if self.x_error_statistic not in experiment.statistics: raise util.CytoflowViewError( "X error statistic not in experiment") else: x_error_stat = experiment.statistics[self.x_error_statistic] if not x_error_stat.index.equals(xstat.index): raise util.CytoflowViewError( "X error statistic doesn't have the " "same indices as the X statistic") else: x_error_stat = None if self.y_error_statistic[0]: if self.y_error_statistic not in experiment.statistics: raise util.CytoflowViewError( "Y error statistic not in experiment") else: y_error_stat = experiment.statistics[self.y_error_statistic] if not y_error_stat.index.equals(ystat.index): raise util.CytoflowViewError( "Y error statistic doesn't have the " "same indices as the Y statistic") else: y_error_stat = None data = pd.DataFrame(index=xstat.index) xname = util.random_string(6) data[xname] = xstat yname = util.random_string(6) data[yname] = ystat if x_error_stat is not None: #x_error_data = x_error_stat.reset_index() x_error_name = util.random_string(6) data[x_error_name] = x_error_stat if y_error_stat is not None: y_error_name = util.random_string(6) data[y_error_name] = y_error_stat if y_error_stat is not None: y_error_data = y_error_stat.reset_index() y_error_name = util.random_string() data[y_error_name] = y_error_data[y_error_stat.name] if self.subset: try: # TODO - either sanitize column names, or check to see that # all conditions are valid Python variables data = data.query(self.subset) except: raise util.CytoflowViewError( "Subset string '{0}' isn't valid".format(self.subset)) if len(data) == 0: raise util.CytoflowViewError( "Subset string '{0}' returned no values".format( self.subset)) names = list(data.index.names) for name in names: unique_values = data.index.get_level_values(name).unique() if len(unique_values) == 1: warn("Only one value for level {}; dropping it.".format(name), util.CytoflowViewWarning) try: data.index = data.index.droplevel(name) except AttributeError: raise util.CytoflowViewError("Must have more than one " "value to plot.") names = list(data.index.names) if not self.variable in experiment.conditions: raise util.CytoflowViewError( "Variable {} not in experiment".format(self.variable)) if not self.variable in data.index.names: raise util.CytoflowViewError( "Variable {} not in statistic; must be one of {}".format( self.variable, data.index.names)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError( "X facet {} not in the experiment".format(self.xfacet)) if self.xfacet and self.xfacet not in data.index.names: raise util.CytoflowViewError( "X facet {} not in statistics; must be one of {}".format( self.xfacet, data.index.names)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError( "Y facet {} not in the experiment".format(self.yfacet)) if self.yfacet and self.yfacet not in data.index.names: raise util.CytoflowViewError( "Y facet {} not in statistics; must be one of {}".format( self.yfacet, data.index.names)) if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError( "Hue facet {} not in the experiment".format(self.huefacet)) if self.huefacet and self.huefacet not in data.index.names: raise util.CytoflowViewError( "Hue facet {} not in statistics; must be one of {}".format( self.huefacet, data.index.names)) facets = filter( lambda x: x, [self.variable, self.xfacet, self.yfacet, self.huefacet]) if len(facets) != len(set(facets)): raise util.CytoflowViewError("Can't reuse facets") by = list(set(names) - set(facets)) class plot_enum(object): def __init__(self, experiment, by): self._iter = None self._returned = False if by: self._iter = experiment.data.groupby(by).__iter__() def __iter__(self): return self def next(self): if self._iter: return self._iter.next()[0] else: if self._returned: raise StopIteration else: self._returned = True return None return plot_enum(experiment, by) def plot(self, experiment, plot_name=None, **kwargs): """Plot a bar chart""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.variable: raise util.CytoflowViewError("variable not set") if self.variable not in experiment.conditions: raise util.CytoflowViewError( "variable {0} not in the experiment".format(self.variable)) if not self.xstatistic: raise util.CytoflowViewError("X statistic not set") if self.xstatistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find X statistic {} in experiment".format( self.ystatistic)) else: xstat = experiment.statistics[self.xstatistic] if not self.ystatistic: raise util.CytoflowViewError("Y statistic not set") if self.ystatistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find Y statistic {} in experiment".format( self.ystatistic)) else: ystat = experiment.statistics[self.ystatistic] if not xstat.index.equals(ystat.index): raise util.CytoflowViewError( "X statistic and Y statistic must have " "the same indices: {}".format(xstat.index.names)) if self.x_error_statistic[0]: if self.x_error_statistic not in experiment.statistics: raise util.CytoflowViewError( "X error statistic not in experiment") else: x_error_stat = experiment.statistics[self.x_error_statistic] if not x_error_stat.index.equals(xstat.index): raise util.CytoflowViewError( "X error statistic doesn't have the " "same indices as the X statistic") else: x_error_stat = None if self.y_error_statistic[0]: if self.y_error_statistic not in experiment.statistics: raise util.CytoflowViewError( "Y error statistic not in experiment") else: y_error_stat = experiment.statistics[self.y_error_statistic] if not y_error_stat.index.equals(ystat.index): raise util.CytoflowViewError( "Y error statistic doesn't have the " "same indices as the Y statistic") else: y_error_stat = None col_wrap = kwargs.pop('col_wrap', None) if col_wrap and self.yfacet: raise util.CytoflowViewError( "Can't set yfacet and col_wrap at the same time.") data = pd.DataFrame(index=xstat.index) xname = util.random_string(6) data[xname] = xstat yname = util.random_string(6) data[yname] = ystat if x_error_stat is not None: #x_error_data = x_error_stat.reset_index() x_error_name = util.random_string(6) data[x_error_name] = x_error_stat if y_error_stat is not None: y_error_name = util.random_string(6) data[y_error_name] = y_error_stat if y_error_stat is not None: y_error_data = y_error_stat.reset_index() y_error_name = util.random_string() data[y_error_name] = y_error_data[y_error_stat.name] if self.subset: try: # TODO - either sanitize column names, or check to see that # all conditions are valid Python variables data = data.query(self.subset) except: raise util.CytoflowViewError( "Subset string '{0}' isn't valid".format(self.subset)) if len(data) == 0: raise util.CytoflowViewError( "Subset string '{0}' returned no values".format( self.subset)) names = list(data.index.names) for name in names: unique_values = data.index.get_level_values(name).unique() if len(unique_values) == 1: warn("Only one value for level {}; dropping it.".format(name), util.CytoflowViewWarning) try: data.index = data.index.droplevel(name) except AttributeError: raise util.CytoflowViewError("Must have more than one " "value to plot.") names = list(data.index.names) if not self.variable in experiment.conditions: raise util.CytoflowViewError( "Variable {} not in experiment".format(self.variable)) if not self.variable in data.index.names: raise util.CytoflowViewError( "Variable {} not in statistic; must be one of {}".format( self.variable, data.index.names)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError( "X facet {} not in the experiment".format(self.xfacet)) if self.xfacet and self.xfacet not in data.index.names: raise util.CytoflowViewError( "X facet {} not in statistics; must be one of {}".format( self.xfacet, data.index.names)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError( "Y facet {} not in the experiment".format(self.yfacet)) if self.yfacet and self.yfacet not in data.index.names: raise util.CytoflowViewError( "Y facet {} not in statistics; must be one of {}".format( self.yfacet, data.index.names)) if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError( "Hue facet {} not in the experiment".format(self.huefacet)) if self.huefacet and self.huefacet not in data.index.names: raise util.CytoflowViewError( "Hue facet {} not in statistics; must be one of {}".format( self.huefacet, data.index.names)) col_wrap = kwargs.pop('col_wrap', None) if col_wrap and self.yfacet: raise util.CytoflowViewError( "Can't set yfacet and col_wrap at the same time.") if col_wrap and not self.xfacet: raise util.CytoflowViewError("Must set xfacet to use col_wrap.") facets = filter( lambda x: x, [self.variable, self.xfacet, self.yfacet, self.huefacet]) if len(facets) != len(set(facets)): raise util.CytoflowViewError("Can't reuse facets") unused_names = list(set(names) - set(facets)) if plot_name is not None and not unused_names: raise util.CytoflowViewError("You specified a plot name, but all " "the facets are already used") data.reset_index(inplace=True) if unused_names: groupby = data.groupby(unused_names) if plot_name is None: raise util.CytoflowViewError( "You must use facets {} in either the " "plot variables or the plot name. " "Possible plot names: {}".format(unused_names, groupby.groups.keys())) if plot_name not in set(groupby.groups.keys()): raise util.CytoflowViewError( "Plot {} not from plot_enum; must " "be one of {}".format(plot_name, groupby.groups.keys())) data = groupby.get_group(plot_name) if self.x_error_statistic is not None: xscale = util.scale_factory(self.xscale, experiment, statistic=self.xstatistic) else: xscale = util.scale_factory(self.xscale, experiment, statistic=self.x_error_statistic) if self.y_error_statistic is not None: yscale = util.scale_factory(self.yscale, experiment, statistic=self.ystatistic) else: yscale = util.scale_factory(self.yscale, experiment, statistic=self.y_error_statistic) xlim = kwargs.pop("xlim", None) if xlim is None: xlim = (xscale.clip(data[xname].min() * 0.9), xscale.clip(data[xname].max() * 1.1)) if x_error_stat is not None: try: xlim = (xscale.clip( min([x[0] for x in x_error_stat]) * 0.9), xscale.clip( max([x[1] for x in x_error_stat]) * 1.1)) except IndexError: xlim = (xscale.clip(x_error_stat.min() * 0.9), xscale.clip(x_error_stat.max() * 1.1)) ylim = kwargs.pop("ylim", None) if ylim is None: ylim = (yscale.clip(data[yname].min() * 0.9), yscale.clip(data[yname].max() * 1.1)) if y_error_stat is not None: try: ylim = (yscale.clip( min([x[0] for x in y_error_stat]) * 0.9), yscale.clip( max([x[1] for x in y_error_stat]) * 1.1)) except IndexError: ylim = (yscale.clip(y_error_stat.min() * 0.9), yscale.clip(y_error_stat.max() * 1.1)) kwargs.setdefault('antialiased', True) cols = col_wrap if col_wrap else \ len(data[self.xfacet].unique()) if self.xfacet else 1 sharex = kwargs.pop('sharex', True) sharey = kwargs.pop('sharey', True) grid = sns.FacetGrid(data, size=(6 / cols), aspect=1.5, col=(self.xfacet if self.xfacet else None), row=(self.yfacet if self.yfacet else None), hue=(self.huefacet if self.huefacet else None), col_order=(np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order=(np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order=(np.sort(data[self.huefacet].unique()) if self.huefacet else None), col_wrap=col_wrap, legend_out=False, sharex=sharex, sharey=sharey, xlim=xlim, ylim=ylim) for ax in grid.axes.flatten(): ax.set_xscale(self.xscale, **xscale.mpl_params) ax.set_yscale(self.yscale, **yscale.mpl_params) # plot the error bars first so the axis labels don't get overwritten if x_error_stat: grid.map(_x_error_bars, xname, yname, x_error_name) if y_error_stat: grid.map(_y_error_bars, xname, yname, y_error_name) grid.map(plt.plot, xname, yname, **kwargs) # if we have an xfacet, make sure the y scale is the same for each fig = plt.gcf() fig_y_min = float("inf") fig_y_max = float("-inf") for ax in fig.get_axes(): ax_y_min, ax_y_max = ax.get_ylim() if ax_y_min < fig_y_min: fig_y_min = ax_y_min if ax_y_max > fig_y_max: fig_y_max = ax_y_max for ax in fig.get_axes(): ax.set_ylim(fig_y_min, fig_y_max) # if we have a yfacet, make sure the x scale is the same for each fig = plt.gcf() fig_x_min = float("inf") fig_x_max = float("-inf") for ax in fig.get_axes(): ax_x_min, ax_x_max = ax.get_xlim() if ax_x_min < fig_x_min: fig_x_min = ax_x_min if ax_x_max > fig_x_max: fig_x_max = ax_x_max # if we have a hue facet and a lot of hues, make a color bar instead # of a super-long legend. if self.huefacet: current_palette = mpl.rcParams['axes.color_cycle'] if util.is_numeric(experiment.data[self.huefacet]) and \ len(grid.hue_names) > len(current_palette): plot_ax = plt.gca() cmap = mpl.colors.ListedColormap( sns.color_palette("husl", n_colors=len(grid.hue_names))) cax, _ = mpl.colorbar.make_axes(plt.gca()) hue_scale = util.scale_factory(self.huescale, experiment, condition=self.huefacet) mpl.colorbar.ColorbarBase(cax, cmap=cmap, norm=hue_scale.color_norm(), label=self.huefacet) plt.sca(plot_ax) else: grid.add_legend(title=self.huefacet) plt.xlabel(self.xstatistic) plt.ylabel(self.ystatistic) if unused_names and plot_name is not None: plt.title("{0} = {1}".format(unused_names, plot_name))
class Stats1DView(Base1DStatisticsView): """ Plot a statistic. The value of the statistic will be plotted on the Y axis; a numeric conditioning variable must be chosen for the X axis. Every variable in the statistic must be specified as either the `variable` or one of the plot facets. Attributes ---------- name : Str The plot's name statistic : Tuple(Str, Str) The statistic to plot. The first element is the name of the module that added the statistic, and the second element is the name of the statistic. variable : Str the name of the conditioning variable to put on the X axis. Must be numeric (float or int). xscale : Enum("linear", "log") (default = "linear") The scale to use on the X axis yscale : Enum("linear", "log", "logicle") (default = "linear") The scale to use on the Y axis xfacet : Str the conditioning variable for horizontal subplots yfacet : Str the conditioning variable for vertical subplots huefacet : the conditioning variable for color. huescale : the scale to use on the "hue" axis, if there are many values of the hue facet. error_statistic : Tuple(Str, Str) A statistic to use to draw error bars; the bars are +- the value of the statistic. subset : String Passed to pandas.DataFrame.query(), to get a subset of the statistic before we plot it. Examples -------- Assume we want a Dox induction curve in a transient transfection experiment. We have induced several wells with different amounts of Dox and the output of the Dox-inducible channel is "Pacific Blue-A". We have a constitutive expression channel in "PE-Tx-Red-YG-A". We want to bin all the data by constitutive expression level, then plot the dose-response (geometric mean) curve in each bin. >>> ex_bin = flow.BinningOp(name = "CFP_Bin", ... channel = "PE-Tx-Red-YG-A", ... scale = "log", ... bin_width = 0.1).apply(ex) >>> ex_stat = flow.ChannelStatisticOp(name = "DoxCFP", ... by = ["Dox", "CFP_Bin"], ... channel = "Pacific Blue-A", ... function = flow.geom_mean).apply(ex_bin) >>> view = flow.Stats1DView(name = "Dox vs IFP", ... statistic = ("DoxCFP", "geom_mean"), ... variable = "Dox", ... xscale = "log", ... huefacet = "CFP_Bin").plot(ex_stat) >>> view.plot(ex_stat) """ # traits id = "edu.mit.synbio.cytoflow.view.stats1d" friendly_id = "1D Statistics View" REMOVED_ERROR = "Statistics changed dramatically in 0.5; please see the documentation" by = util.Removed(err_string=REMOVED_ERROR) yfunction = util.Removed(err_string=REMOVED_ERROR) ychannel = util.Removed(err_string=REMOVED_ERROR) xvariable = util.Deprecated(new="variable") def enum_plots(self, experiment): """ Returns an iterator over the possible plots that this View can produce. The values returned can be passed to "plot". """ return super().enum_plots(experiment) def plot(self, experiment, plot_name=None, **kwargs): """Plot a chart of a variable's values against a statistic. Parameters ---------- color : a matplotlib color The color to plot with. Overridden if `huefacet` is not `None` linestyle : ['solid' | 'dashed', 'dashdot', 'dotted' | (offset, on-off-dash-seq) | '-' | '--' | '-.' | ':' | 'None' | ' ' | ''] marker : a matplotlib marker style See http://matplotlib.org/api/markers_api.html#module-matplotlib.markers markersize : int The marker size in points markerfacecolor : a matplotlib color The color to make the markers. Overridden (?) if `huefacet` is not `None` alpha : the alpha blending value, from 0.0 (transparent) to 1.0 (opaque) Other Parameters ---------------- Other `kwargs` are passed to matplotlib.pyplot.plot_. .. _matplotlib.pyplot.hist: https://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.plot.html See Also -------- BaseView.plot : common parameters for data views """ super().plot(experiment, plot_name, **kwargs) def _grid_plot(self, experiment, grid, xlim, ylim, xscale, yscale, **kwargs): data = grid.data stat = experiment.statistics[self.statistic] stat_name = stat.name if self.error_statistic[0]: err_stat = experiment.statistics[self.error_statistic] err_stat_name = err_stat.name xlim = kwargs.pop("xlim", None) if xlim is None: xlim = (xscale.clip(data[self.variable].min() * 0.9), xscale.clip(data[self.variable].max() * 1.1)) ylim = kwargs.pop("ylim", None) if ylim is None: ylim = (yscale.clip(data[stat_name].min() * 0.9), yscale.clip(data[stat_name].max() * 1.1)) if self.error_statistic[0]: try: ylim = (yscale.clip( min([x[0] for x in data[err_stat_name]]) * 0.9), yscale.clip( max([x[1] for x in data[err_stat_name]]) * 1.1)) except IndexError: ylim = (yscale.clip( (data[stat_name].min() - data[err_stat_name].min()) * 0.9), yscale.clip((data[stat_name].max() + data[err_stat_name].max()) * 1.1)) # plot the error bars first so the axis labels don't get overwritten if self.error_statistic[0]: grid.map(_error_bars, self.variable, stat_name, err_stat_name, **kwargs) grid.map(plt.plot, self.variable, stat_name, **kwargs) return {'xlim': xlim, 'ylim': ylim}
class BarChartView(HasStrictTraits): """Plots a bar chart of some summary statistic Attributes ---------- name : Str The bar chart's name statistic : Tuple(Str, Str) the statistic we're plotting scale : Enum("linear", "log", "logicle") (default = "linear") The scale to use on the Y axis. variable : Str the name of the conditioning variable to group the chart's bars error_statistic : Tuple(Str, Str) if specified, a statistic to draw error bars. if values are numeric, the bars are drawn +/- the value. if the values are tuples, then the first element is the low error and the second element is the high error. xfacet : Str the conditioning variable for horizontal subplots yfacet : Str the conditioning variable for vertical subplots huefacet : Str the conditioning variable to make multiple bar colors orientation : Enum("horizontal", "vertical") do we plot the bar chart horizontally or vertically? TODO - waiting on seaborn v0.6 subset : String Passed to pandas.DataFrame.query(), to get a subset of the statistic before we plot it. Examples -------- >>> bar = flow.BarChartView() >>> bar.name = "Bar Chart" >>> bar.channel = 'Y2-A' >>> bar.variable = 'Y2-A+' >>> bar.huefacet = 'Dox' >>> bar.function = len >>> bar.plot(ex) """ # traits id = "edu.mit.synbio.cytoflow.view.barchart" friendly_id = "Bar Chart" REMOVED_ERROR = "Statistics have changed dramatically in 0.5; please see the documentation" channel = util.Removed(err_string=REMOVED_ERROR) function = util.Removed(err_string=REMOVED_ERROR) error_bars = util.Removed(err_string=REMOVED_ERROR) by = util.Deprecated(new='variable') name = Str statistic = Tuple(Str, Str) scale = util.ScaleEnum variable = Str orientation = Enum("vertical", "horizontal") xfacet = Str yfacet = Str huefacet = Str error_statistic = Tuple(Str, Str) subset = Str def enum_plots(self, experiment): """ Returns an iterator over the possible plots that this View can produce. The values returned can be passed to "plot". """ # TODO - all this is copied from below. can we abstract it out somehow? if not experiment: raise util.CytoflowViewError("No experiment specified") if self.statistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find the statistic {} in the experiment".format( self.statistic)) else: stat = experiment.statistics[self.statistic] if self.error_statistic[0]: if self.error_statistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find the error statistic in the experiment") else: error_stat = experiment.statistics[self.error_statistic] else: error_stat = None if error_stat is not None: if not stat.index.equals(error_stat.index): raise util.CytoflowViewError( "Data statistic and error statistic " " don't have the same index.") data = pd.DataFrame(index=stat.index) data[stat.name] = stat if error_stat is not None: error_name = util.random_string(6) data[error_name] = error_stat else: error_name = None if self.subset: try: data = data.query(self.subset) except: raise util.CytoflowViewError( "Subset string '{0}' isn't valid".format(self.subset)) if len(data) == 0: raise util.CytoflowViewError( "Subset string '{0}' returned no values".format( self.subset)) names = list(data.index.names) for name in names: unique_values = data.index.get_level_values(name).unique() if len(unique_values) == 1: warn("Only one value for level {}; dropping it.".format(name), util.CytoflowViewWarning) try: data.index = data.index.droplevel(name) except AttributeError: raise util.CytoflowViewError("Must have more than one " "value to plot.") names = list(data.index.names) if not self.variable: raise util.CytoflowViewError("variable not specified") if not self.variable in data.index.names: raise util.CytoflowViewError("Variable {} isn't in the statistic; " "must be one of {}".format( self.variable, data.index.names)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError( "X facet {0} isn't in the experiment".format(self.xfacet)) if self.xfacet and self.xfacet not in data.index.names: raise util.CytoflowViewError( "X facet {} is not a statistic index; " "must be one of {}".format(self.xfacet, data.index.names)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError( "Y facet {0} isn't in the experiment".format(self.yfacet)) if self.yfacet and self.yfacet not in data.index.names: raise util.CytoflowViewError( "Y facet {} is not a statistic index; " "must be one of {}".format(self.yfacet, data.index.names)) if self.huefacet and self.huefacet not in experiment.conditions: raise util.CytoflowViewError( "Hue facet {0} isn't in the experiment".format(self.huefacet)) if self.huefacet and self.huefacet not in data.index.names: raise util.CytoflowViewError( "Hue facet {} is not a statistic index; " "must be one of {}".format(self.huefacet, data.index.names)) facets = filter( lambda x: x, [self.variable, self.xfacet, self.yfacet, self.huefacet]) if len(facets) != len(set(facets)): raise util.CytoflowViewError("Can't reuse facets") by = list(set(names) - set(facets)) class plot_enum(object): def __init__(self, experiment, by): self._iter = None self._returned = False if by: self._iter = experiment.data.groupby(by).__iter__() def __iter__(self): return self def next(self): if self._iter: return self._iter.next()[0] else: if self._returned: raise StopIteration else: self._returned = True return None return plot_enum(experiment, by) def plot(self, experiment, plot_name=None, **kwargs): """Plot a bar chart""" if not experiment: raise util.CytoflowViewError("No experiment specified") if self.statistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find the statistic {} in the experiment".format( self.statistic)) else: stat = experiment.statistics[self.statistic] if self.error_statistic[0]: if self.error_statistic not in experiment.statistics: raise util.CytoflowViewError( "Can't find the error statistic in the experiment") else: error_stat = experiment.statistics[self.error_statistic] else: error_stat = None if error_stat is not None: if not stat.index.equals(error_stat.index): raise util.CytoflowViewError( "Data statistic and error statistic " " don't have the same index.") data = pd.DataFrame(index=stat.index) data[stat.name] = stat if error_stat is not None: error_name = util.random_string(6) data[error_name] = error_stat else: error_name = None if self.subset: try: data = data.query(self.subset) except: raise util.CytoflowViewError( "Subset string '{0}' isn't valid".format(self.subset)) if len(data) == 0: raise util.CytoflowViewError( "Subset string '{0}' returned no values".format( self.subset)) names = list(data.index.names) for name in names: unique_values = data.index.get_level_values(name).unique() if len(unique_values) == 1: warn("Only one value for level {}; dropping it.".format(name), util.CytoflowViewWarning) try: data.index = data.index.droplevel(name) except AttributeError: raise util.CytoflowViewError("Must have more than one " "value to plot.") names = list(data.index.names) if not self.variable: raise util.CytoflowViewError("variable not specified") if not self.variable in names: raise util.CytoflowViewError("Variable {} isn't in the statistic; " "must be one of {}".format( self.variable, names)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError( "X facet {0} isn't in the experiment".format(self.xfacet)) if self.xfacet and self.xfacet not in names: raise util.CytoflowViewError( "X facet {} is not a statistic index; " "must be one of {}".format(self.xfacet, names)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError( "Y facet {0} isn't in the experiment".format(self.yfacet)) if self.yfacet and self.yfacet not in names: raise util.CytoflowViewError( "Y facet {} is not a statistic index; " "must be one of {}".format(self.yfacet, names)) if self.huefacet and self.huefacet not in experiment.conditions: raise util.CytoflowViewError( "Hue facet {0} isn't in the experiment".format(self.huefacet)) if self.huefacet and self.huefacet not in names: raise util.CytoflowViewError( "Hue facet {} is not a statistic index; " "must be one of {}".format(self.huefacet, names)) col_wrap = kwargs.pop('col_wrap', None) if col_wrap and self.yfacet: raise util.CytoflowViewError( "Can't set yfacet and col_wrap at the same time.") if col_wrap and not self.xfacet: raise util.CytoflowViewError("Must set xfacet to use col_wrap.") facets = filter( lambda x: x, [self.variable, self.xfacet, self.yfacet, self.huefacet]) if len(facets) != len(set(facets)): raise util.CytoflowViewError("Can't reuse facets") unused_names = list(set(names) - set(facets)) if plot_name is not None and not unused_names: raise util.CytoflowViewError("You specified a plot name, but all " "the facets are already used") data.reset_index(inplace=True) if unused_names: groupby = data.groupby(unused_names) if plot_name is None: raise util.CytoflowViewError( "You must use facets {} in either the " "plot variables or the plot name. " "Possible plot names: {}".format(unused_names, groupby.groups.keys())) if plot_name not in set(groupby.groups.keys()): raise util.CytoflowViewError( "Plot {} not from plot_enum; must " "be one of {}".format(plot_name, groupby.groups.keys())) data = groupby.get_group(plot_name) sharex = kwargs.pop('sharex', True) sharey = kwargs.pop('sharey', True) cols = col_wrap if col_wrap else \ len(data[self.xfacet].unique()) if self.xfacet else 1 g = sns.FacetGrid(data, size=(6 / cols), aspect=1.5, col=(self.xfacet if self.xfacet else None), row=(self.yfacet if self.yfacet else None), col_order=(np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order=(np.sort(data[self.yfacet].unique()) if self.yfacet else None), col_wrap=col_wrap, legend_out=False, sharex=sharex, sharey=sharey) scale = util.scale_factory(self.scale, experiment, statistic=self.statistic) # because the bottom of a bar chart is "0", masking out bad # values on a log scale doesn't work. we must clip instead. if self.scale == "log": scale.mode = "clip" # set the scale for each set of axes; can't just call plt.xscale() for ax in g.axes.flatten(): if self.orientation == 'horizontal': ax.set_xscale(self.scale, **scale.mpl_params) else: ax.set_yscale(self.scale, **scale.mpl_params) map_args = [self.variable, stat.name] if self.huefacet: map_args.append(self.huefacet) if error_stat is not None: map_args.append(error_name) g.map(_barplot, *map_args, view=self, stat_name=stat.name, error_name=error_name, **kwargs) if sharex: # if are sharing axes make sure the x scale is the same for each fig = plt.gcf() fig_x_min = float("inf") fig_x_max = float("-inf") for ax in fig.get_axes(): ax_x_min, ax_x_max = ax.get_xlim() if ax_x_min < fig_x_min: fig_x_min = ax_x_min if ax_x_max > fig_x_max: fig_x_max = ax_x_max for ax in fig.get_axes(): ax.set_xlim(fig_x_min, fig_x_max) if sharey: # if we are sharing y axes, make sure the y scale is the same for each fig = plt.gcf() fig_y_min = float("inf") fig_y_max = float("-inf") for ax in fig.get_axes(): ax_y_min, ax_y_max = ax.get_ylim() if ax_y_min < fig_y_min: fig_y_min = ax_y_min if ax_y_max > fig_y_max: fig_y_max = ax_y_max for ax in fig.get_axes(): ax.set_ylim(fig_y_min, fig_y_max) if self.huefacet: labels = np.sort(data[self.huefacet].unique()) labels = [str(x) for x in labels] g.add_legend(title=self.huefacet, label_order=labels) if self.orientation == 'horizontal': plt.sca(fig.get_axes()[0]) plt.xlabel(self.statistic) else: plt.sca(fig.get_axes()[0]) plt.ylabel(self.statistic) if unused_names and plot_name is not None: plt.title("{0} = {1}".format(unused_names, plot_name))
class Base1DStatisticsView(BaseStatisticsView): """ The base class for 1-dimensional statistic views -- ie, the :attr:`variable` attribute is on the x axis, and the statistic value is on the y axis. Attributes ---------- statistic : (str, str) The name of the statistic to plot. Must be a key in the :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment` being plotted. error_statistic : (str, str) The name of the statistic used to plot error bars. Must be a key in the :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment` being plotted. """ REMOVED_ERROR = "Statistics changed dramatically in 0.5; please see the documentation" by = util.Removed(err_string = REMOVED_ERROR) yfunction = util.Removed(err_string = REMOVED_ERROR) ychannel = util.Removed(err_string = REMOVED_ERROR) channel = util.Removed(err_string = REMOVED_ERROR) function = util.Removed(err_string = REMOVED_ERROR) error_bars = util.Removed(err_string = REMOVED_ERROR) xvariable = util.Deprecated(new = "variable") statistic = Tuple(Str, Str) error_statistic = Tuple(Str, Str) def enum_plots(self, experiment): data = self._make_data(experiment) return super().enum_plots(experiment, data) def plot(self, experiment, plot_name = None, **kwargs): data = self._make_data(experiment) if not self.variable: raise util.CytoflowViewError('variable', "variable not set") if self.variable not in experiment.conditions: raise util.CytoflowViewError('variable', "variable {0} not in the experiment" .format(self.variable)) if util.is_numeric(experiment[self.variable]): xscale = util.scale_factory(self.xscale, experiment, condition = self.variable) else: xscale = None yscale = util.scale_factory(self.yscale, experiment, statistic = self.statistic, error_statistic = self.error_statistic) super().plot(experiment, data, plot_name, xscale = xscale, yscale = yscale, **kwargs) def _make_data(self, experiment): if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") if not self.statistic: raise util.CytoflowViewError('statistic', "Statistic not set") if self.statistic not in experiment.statistics: raise util.CytoflowViewError('statistic', "Can't find the statistic {} in the experiment" .format(self.statistic)) else: stat = experiment.statistics[self.statistic] if not util.is_numeric(stat): raise util.CytoflowViewError('statistic', "Statistic must be numeric") if self.error_statistic[0]: if self.error_statistic not in experiment.statistics: raise util.CytoflowViewError('error_statistic', "Can't find the error statistic in the experiment") else: error_stat = experiment.statistics[self.error_statistic] else: error_stat = None if error_stat is not None: if not stat.index.equals(error_stat.index): raise util.CytoflowViewError('error_statistic', "Data statistic and error statistic " " don't have the same index.") if stat.name == error_stat.name: raise util.CytoflowViewError('error_statistic', "Data statistic and error statistic can " "not have the same name.") data = pd.DataFrame(index = stat.index) data[stat.name] = stat if error_stat is not None: data[error_stat.name] = error_stat return data
class Stats1DView(HasStrictTraits): """ Plot a statistic. The value of the statistic will be plotted on the Y axis; a numeric conditioning variable must be chosen for the X axis. Every variable in the statistic must be specified as either the `variable` or one of the plot facets. Attributes ---------- name : Str The plot's name statistic : Tuple(Str, Str) The statistic to plot. The first element is the name of the module that added the statistic, and the second element is the name of the statistic. variable : Str the name of the conditioning variable to put on the X axis. Must be numeric (float or int). xscale : Enum("linear", "log") (default = "linear") The scale to use on the X axis yscale : Enum("linear", "log", "logicle") (default = "linear") The scale to use on the Y axis xfacet : Str the conditioning variable for horizontal subplots yfacet : Str the conditioning variable for vertical subplots huefacet : the conditioning variable for color. huescale : the scale to use on the "hue" axis, if there are many values of the hue facet. error_statistic : Tuple(Str, Str) A statistic to use to draw error bars; the bars are +- the value of the statistic. subset : String Passed to pandas.DataFrame.query(), to get a subset of the statistic before we plot it. Examples -------- Assume we want a Dox induction curve in a transient transfection experiment. We have induced several wells with different amounts of Dox and the output of the Dox-inducible channel is "Pacific Blue-A". We have a constitutive expression channel in "PE-Tx-Red-YG-A". We want to bin all the data by constitutive expression level, then plot the dose-response (geometric mean) curve in each bin. >>> ex_bin = flow.BinningOp(name = "CFP_Bin", ... channel = "PE-Tx-Red-YG-A", ... scale = "log", ... bin_width = 0.1).apply(ex) >>> ex_stat = flow.ChannelStatisticOp(name = "DoxCFP", ... by = ["Dox", "CFP_Bin"], ... channel = "Pacific Blue-A", ... function = flow.geom_mean).apply(ex_bin) >>> view = flow.Stats1DView(name = "Dox vs IFP", ... statistic = ("DoxCFP", "geom_mean"), ... variable = "Dox", ... xscale = "log", ... huefacet = "CFP_Bin").plot(ex_stat) >>> view.plot(ex_stat) """ # traits id = "edu.mit.synbio.cytoflow.view.stats1d" friendly_id = "1D Statistics View" REMOVED_ERROR = "Statistics have changed dramatically in 0.5; please see the documentation" by = util.Removed(err_string = REMOVED_ERROR) yfunction = util.Removed(err_string = REMOVED_ERROR) ychannel = util.Removed(err_string = REMOVED_ERROR) xvariable = util.Deprecated(new = "variable") name = Str statistic = Tuple(Str, Str) variable = Str xscale = util.ScaleEnum yscale = util.ScaleEnum xfacet = Str yfacet = Str huefacet = Str huescale = util.ScaleEnum # TODO - make this actually work error_statistic = Tuple(Str, Str) subset = Str def enum_plots(self, experiment): """ Returns an iterator over the possible plots that this View can produce. The values returned can be passed to "plot". """ # TODO - all this is copied from below. can we abstract it out somehow? if not experiment: raise util.CytoflowViewError("No experiment specified") if self.statistic not in experiment.statistics: raise util.CytoflowViewError("Can't find the statistic {} in the experiment" .format(self.statistic)) else: stat = experiment.statistics[self.statistic] if self.error_statistic[0]: if self.error_statistic not in experiment.statistics: raise util.CytoflowViewError("Can't find the error statistic in the experiment") else: error_stat = experiment.statistics[self.error_statistic] else: error_stat = None if error_stat is not None: if not stat.index.equals(error_stat.index): raise util.CytoflowViewError("Data statistic and error statistic " " don't have the same index.") data = pd.DataFrame(index = stat.index) data[stat.name] = stat if error_stat is not None: error_name = util.random_string(6) data[error_name] = error_stat else: error_name = None if self.subset: try: data = data.query(self.subset) except: raise util.CytoflowViewError("Subset string '{0}' isn't valid" .format(self.subset)) if len(data) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no values" .format(self.subset)) names = list(data.index.names) for name in names: unique_values = data.index.get_level_values(name).unique() if len(unique_values) == 1: warn("Only one value for level {}; dropping it.".format(name), util.CytoflowViewWarning) try: data.index = data.index.droplevel(name) except AttributeError: raise util.CytoflowViewError("Must have more than one " "value to plot.") names = list(data.index.names) if not self.variable: raise util.CytoflowViewError("variable not specified") if not self.variable in data.index.names: raise util.CytoflowViewError("Variable {} isn't in the statistic; " "must be one of {}" .format(self.variable, data.index.names)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} isn't in the experiment" .format(self.xfacet)) if self.xfacet and self.xfacet not in data.index.names: raise util.CytoflowViewError("X facet {} is not a statistic index; " "must be one of {}".format(self.xfacet, data.index.names)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} isn't in the experiment" .format(self.yfacet)) if self.yfacet and self.yfacet not in data.index.names: raise util.CytoflowViewError("Y facet {} is not a statistic index; " "must be one of {}".format(self.yfacet, data.index.names)) if self.huefacet and self.huefacet not in experiment.conditions: raise util.CytoflowViewError("Hue facet {0} isn't in the experiment" .format(self.huefacet)) if self.huefacet and self.huefacet not in data.index.names: raise util.CytoflowViewError("Hue facet {} is not a statistic index; " "must be one of {}".format(self.huefacet, data.index.names)) facets = filter(lambda x: x, [self.variable, self.xfacet, self.yfacet, self.huefacet]) if len(facets) != len(set(facets)): raise util.CytoflowViewError("Can't reuse facets") by = list(set(names) - set(facets)) class plot_enum(object): def __init__(self, experiment, by): self._iter = None self._returned = False if by: self._iter = experiment.data.groupby(by).__iter__() def __iter__(self): return self def next(self): if self._iter: return self._iter.next()[0] else: if self._returned: raise StopIteration else: self._returned = True return None return plot_enum(experiment, by) def plot(self, experiment, plot_name = None, **kwargs): """Plot a chart""" if not experiment: raise util.CytoflowViewError("No experiment specified") if not self.statistic: raise util.CytoflowViewError("Statistic not set") if self.statistic not in experiment.statistics: raise util.CytoflowViewError("Can't find the statistic {} in the experiment" .format(self.statistic)) else: stat = experiment.statistics[self.statistic] if self.error_statistic[0]: if self.error_statistic not in experiment.statistics: raise util.CytoflowViewError("Can't find the error statistic in the experiment") else: error_stat = experiment.statistics[self.error_statistic] else: error_stat = None if error_stat is not None: if not stat.index.equals(error_stat.index): raise util.CytoflowViewError("Data statistic and error statistic " " don't have the same index.") data = pd.DataFrame(index = stat.index) data[stat.name] = stat if error_stat is not None: error_name = util.random_string(6) data[error_name] = error_stat if self.subset: try: # TODO - either sanitize column names, or check to see that # all conditions are valid Python variables data = data.query(self.subset) except: raise util.CytoflowViewError("Subset string '{0}' isn't valid" .format(self.subset)) if len(data) == 0: raise util.CytoflowViewError("Subset string '{0}' returned no values" .format(self.subset)) names = list(data.index.names) for name in names: unique_values = data.index.get_level_values(name).unique() if len(unique_values) == 1: warn("Only one value for level {}; dropping it.".format(name), util.CytoflowViewWarning) try: data.index = data.index.droplevel(name) except AttributeError: raise util.CytoflowViewError("Must have more than one " "value to plot.") names = list(data.index.names) if not self.variable: raise util.CytoflowViewError("X variable not set") if self.variable not in experiment.conditions: raise util.CytoflowViewError("X variable {0} not in the experiment" .format(self.variable)) if self.variable not in names: raise util.CytoflowViewError("X variable {} is not a statistic index; " "must be one of {}".format(self.variable, names)) if experiment.conditions[self.variable].dtype.kind not in "biufc": raise util.CytoflowViewError("X variable {0} isn't numeric" .format(self.variable)) if self.xfacet and self.xfacet not in experiment.conditions: raise util.CytoflowViewError("X facet {0} not in the experiment") if self.xfacet and self.xfacet not in names: raise util.CytoflowViewError("X facet {} is not a statistic index; " "must be one of {}".format(self.xfacet, names)) if self.yfacet and self.yfacet not in experiment.conditions: raise util.CytoflowViewError("Y facet {0} not in the experiment") if self.yfacet and self.yfacet not in names: raise util.CytoflowViewError("Y facet {} is not a statistic index; " "must be one of {}".format(self.yfacet, names)) if self.huefacet and self.huefacet not in experiment.metadata: raise util.CytoflowViewError("Hue facet {0} not in the experiment") if self.huefacet and self.huefacet not in names: raise util.CytoflowViewError("Hue facet {} is not a statistic index; " "must be one of {}".format(self.huefacet, names)) col_wrap = kwargs.pop('col_wrap', None) if col_wrap and self.yfacet: raise util.CytoflowViewError("Can't set yfacet and col_wrap at the same time.") if col_wrap and not self.xfacet: raise util.CytoflowViewError("Must set xfacet to use col_wrap.") facets = filter(lambda x: x, [self.variable, self.xfacet, self.yfacet, self.huefacet]) if len(facets) != len(set(facets)): raise util.CytoflowViewError("Can't reuse facets") unused_names = list(set(names) - set(facets)) if unused_names and plot_name is None: for plot in self.enum_plots(experiment): self.plot(experiment, plot, **kwargs) return data.reset_index(inplace = True) if plot_name is not None: if plot_name is not None and not unused_names: raise util.CytoflowViewError("Plot {} not from plot_enum" .format(plot_name)) groupby = data.groupby(unused_names) if plot_name not in set(groupby.groups.keys()): raise util.CytoflowViewError("Plot {} not from plot_enum" .format(plot_name)) data = groupby.get_group(plot_name) data.reset_index(drop = True, inplace = True) xscale = util.scale_factory(self.xscale, experiment, condition = self.variable) if error_stat is not None: yscale = util.scale_factory(self.yscale, experiment, statistic = self.error_statistic) else: yscale = util.scale_factory(self.yscale, experiment, statistic = self.statistic) xlim = kwargs.pop("xlim", None) if xlim is None: xlim = (xscale.clip(data[self.variable].min() * 0.9), xscale.clip(data[self.variable].max() * 1.1)) ylim = kwargs.pop("ylim", None) if ylim is None: ylim = (yscale.clip(data[stat.name].min() * 0.9), yscale.clip(data[stat.name].max() * 1.1)) if error_stat is not None: try: ylim = (yscale.clip(min([x[0] for x in error_stat]) * 0.9), yscale.clip(max([x[1] for x in error_stat]) * 1.1)) except IndexError: ylim = (yscale.clip(error_stat.min() * 0.9), yscale.clip(error_stat.max() * 1.1)) kwargs.setdefault('antialiased', True) cols = col_wrap if col_wrap else \ len(data[self.xfacet].unique()) if self.xfacet else 1 sharex = kwargs.pop('sharex', True) sharey = kwargs.pop('sharey', True) grid = sns.FacetGrid(data, size = (6 / cols), aspect = 1.5, col = (self.xfacet if self.xfacet else None), row = (self.yfacet if self.yfacet else None), hue = (self.huefacet if self.huefacet else None), col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None), row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None), hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None), col_wrap = col_wrap, legend_out = False, sharex = sharex, sharey = sharey, xlim = xlim, ylim = ylim) for ax in grid.axes.flatten(): ax.set_xscale(self.xscale, **xscale.mpl_params) ax.set_yscale(self.yscale, **yscale.mpl_params) # plot the error bars first so the axis labels don't get overwritten if error_stat is not None: grid.map(_error_bars, self.variable, stat.name, error_name, **kwargs) grid.map(plt.plot, self.variable, stat.name, **kwargs) # if we are sharing y axes, make sure the y scale is the same for each if sharey: fig = plt.gcf() fig_y_min = float("inf") fig_y_max = float("-inf") for ax in fig.get_axes(): ax_y_min, ax_y_max = ax.get_ylim() if ax_y_min < fig_y_min: fig_y_min = ax_y_min if ax_y_max > fig_y_max: fig_y_max = ax_y_max for ax in fig.get_axes(): ax.set_ylim(fig_y_min, fig_y_max) # if we are sharing x axes, make sure the x scale is the same for each if sharex: fig = plt.gcf() fig_x_min = float("inf") fig_x_max = float("-inf") for ax in fig.get_axes(): ax_x_min, ax_x_max = ax.get_xlim() if ax_x_min < fig_x_min: fig_x_min = ax_x_min if ax_x_max > fig_x_max: fig_x_max = ax_x_max for ax in fig.get_axes(): ax.set_xlim(fig_x_min, fig_x_max) # if we have a hue facet and a lot of hues, make a color bar instead # of a super-long legend. if self.huefacet: current_palette = mpl.rcParams['axes.color_cycle'] if util.is_numeric(experiment.data[self.huefacet]) and \ len(grid.hue_names) > len(current_palette): plot_ax = plt.gca() cmap = mpl.colors.ListedColormap(sns.color_palette("husl", n_colors = len(grid.hue_names))) cax, kw = mpl.colorbar.make_axes(plt.gca()) norm = mpl.colors.Normalize(vmin = np.min(grid.hue_names), vmax = np.max(grid.hue_names), clip = False) mpl.colorbar.ColorbarBase(cax, cmap = cmap, norm = norm, label = self.huefacet, **kw) plt.sca(plot_ax) else: grid.add_legend(title = self.huefacet) if unused_names and plot_name: plt.title("{0} = {1}".format(unused_names, plot_name)) plt.ylabel(self.statistic)