Beispiel #1
0
class Stats1DView(Base1DStatisticsView):
    """
    Plot a statistic.  The value of the statistic will be plotted on the
    Y axis; a numeric conditioning variable must be chosen for the X axis.
    Every variable in the statistic must be specified as either the `variable`
    or one of the plot facets.
    
    Attributes
    ----------
        
    Examples
    --------
    
    .. plot::
        :context: close-figs
        
        Make a little data set.
    
        >>> import cytoflow as flow
        >>> import_op = flow.ImportOp()
        >>> import_op.tubes = [flow.Tube(file = "Plate01/RFP_Well_A3.fcs",
        ...                              conditions = {'Dox' : 10.0}),
        ...                    flow.Tube(file = "Plate01/CFP_Well_A4.fcs",
        ...                              conditions = {'Dox' : 1.0})]
        >>> import_op.conditions = {'Dox' : 'float'}
        >>> ex = import_op.apply()
    
    Create and a new statistic.
    
    .. plot::
        :context: close-figs
        
        >>> ch_op = flow.ChannelStatisticOp(name = 'MeanByDox',
        ...                     channel = 'Y2-A',
        ...                     function = flow.geom_mean,
        ...                     by = ['Dox'])
        >>> ex2 = ch_op.apply(ex)
        
    View the new statistic
    
    .. plot::
        :context: close-figs
        
        >>> flow.Stats1DView(variable = 'Dox',
        ...                  statistic = ('MeanByDox', 'geom_mean'),
        ...                  xscale = 'log',
        ...                  yscale = 'log').plot(ex2)
    """
    
    # traits   
    id = Constant("edu.mit.synbio.cytoflow.view.stats1d")
    friendly_id = Constant("1D Statistics View")
    
    REMOVED_ERROR = Constant("Statistics changed dramatically in 0.5; please see the documentation")
    by = util.Removed(err_string = REMOVED_ERROR)
    yfunction = util.Removed(err_string = REMOVED_ERROR)
    ychannel = util.Removed(err_string = REMOVED_ERROR)
    xvariable = util.Deprecated(new = "variable")
    
    def enum_plots(self, experiment):
        """
        Returns an iterator over the possible plots that this View can
        produce.  The values returned can be passed to :meth:`plot`.
        """
                
        return super().enum_plots(experiment)
        
    
    def plot(self, experiment, plot_name = None, **kwargs):
        """Plot a chart of a variable's values against a statistic.
        
        Parameters
        ----------
        
        color : a matplotlib color
            The color to plot with.  Overridden if `huefacet` is not `None`
            
        linestyle : ['solid' | 'dashed', 'dashdot', 'dotted' | (offset, on-off-dash-seq) | '-' | '--' | '-.' | ':' | 'None' | ' ' | '']
            
        marker : a matplotlib marker style
            See http://matplotlib.org/api/markers_api.html#module-matplotlib.markers
            
        markersize : int
            The marker size in points
            
        markerfacecolor : a matplotlib color
            The color to make the markers.  Overridden (?) if `huefacet` is not `None`
            
        alpha : the alpha blending value, from 0.0 (transparent) to 1.0 (opaque)
        
        Notes
        -----
                
        Other `kwargs` are passed to `matplotlib.pyplot.plot <https://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.plot.html>`_
        
        """
        
        super().plot(experiment, plot_name, **kwargs)

    def _grid_plot(self, experiment, grid, xlim, ylim, xscale, yscale, **kwargs):

        data = grid.data

        stat = experiment.statistics[self.statistic]
        stat_name = stat.name
        if self.error_statistic[0]:
            err_stat = experiment.statistics[self.error_statistic]
            err_stat_name = err_stat.name
                    
        xlim = kwargs.pop("xlim", None)
        if xlim is None:
            xlim = (xscale.clip(data[self.variable].min() * 0.9),
                    xscale.clip(data[self.variable].max() * 1.1))
                      
        ylim = kwargs.pop("ylim", None)
        if ylim is None:
            ylim = (yscale.clip(data[stat_name].min() * 0.9),
                    yscale.clip(data[stat_name].max() * 1.1))
            
            if self.error_statistic[0]:
                try: 
                    ylim = (yscale.clip(min([x[0] for x in data[err_stat_name]]) * 0.9),
                            yscale.clip(max([x[1] for x in data[err_stat_name]]) * 1.1))
                except IndexError:
                    ylim = (yscale.clip((data[stat_name].min() - data[err_stat_name].min()) * 0.9), 
                            yscale.clip((data[stat_name].max() + data[err_stat_name].max()) * 1.1))


        # plot the error bars first so the axis labels don't get overwritten
        if self.error_statistic[0]:
            grid.map(_error_bars, self.variable, stat_name, err_stat_name, **kwargs)
        
        grid.map(plt.plot, self.variable, stat_name, **kwargs)
        
        return {'xlim' : xlim, 'ylim' : ylim}
Beispiel #2
0
class ImportOp(HasStrictTraits):
    """
    An operation for importing data and making an :class:`.Experiment`.
    
    To use, set the :attr:`conditions` dict to a mapping between condition name 
    and NumPy ``dtype``.  Useful dtypes include ``category``, ``float``, 
    ``int``, ``bool``.
    
    Next, set :attr:`tubes` to a list of :class:`Tube` containing FCS filenames 
    and the corresponding conditions.
    
    If you would rather not analyze every single event in every FCS file,
    set :attr:`events` to the number of events from each FCS file you want to 
    load.
    
    Call :meth:`apply` to load the data.  The usual ``experiment`` parameter
    can be ``None``.
    
    Attributes
    ----------
    conditions : Dict(Str, Str)
        A dictionary mapping condition names (keys) to NumPy ``dtype``s (values).
        Useful ``dtype``s include ``category``, ``float``, ``int``, and ``bool``.
        
    tubes : List(Tube)
        A list of :class:``Tube`` instances, which map FCS files to their corresponding
        experimental conditions.  Each :class:``Tube`` must have a 
        :attr:``~Tube.conditions`` dict whose keys match those of 
        :attr:`conditions`.
        
    channels : Dict(Str, Str)
        If you only need a subset of the channels available in the data set,
        specify them here.  Each ``(key, value)`` pair specifies a channel to
        include in the output experiment.  The key is the channel name in the 
        FCS file, and the value is the name of the channel in the Experiment.
        You can use this to rename channels as you import data (because flow
        channel names are frequently not terribly informative.)  New channel
        names must be valid Python identifiers: start with a letter or ``_``, and
        all characters must be letters, numbers or ``_``.  If :attr:`channels` is
        empty, load all channels in the FCS files.
        
    events : Int
        If not None, import only a random subset of events of size :attr:`events`. 
        Presumably the analysis will go faster but less precisely; good for
        interactive data exploration.  Then, unset :attr:`events` and re-run
        the analysis non-interactively.
        
    name_metadata : {None, "$PnN", "$PnS"} (default = None)
        Which FCS metadata is the channel name?  If ``None``, attempt to  
        autodetect.
        
    data_set : Int (default = 0)
        The FCS standard allows you to encode multiple data sets in a single
        FCS file.  Some software (such as the Beckman-Coulter software)
        also encode the same data in two different formats -- for example,
        FCS2.0 and FCS3.0.  To access a data set other than the first one,
        set :attr:`data_set` to the 0-based index of the data set you
        would like to use.  This will be used for *all FCS files imported by
        this operation.*
            
    ignore_v : List(Str)
        :class:`cytoflow` is designed to operate on an :class:`.Experiment` containing
        tubes that were all collected under the same instrument settings.
        In particular, the same PMT voltages ensure that data can be
        compared across samples.
        
        *Very rarely*, you may need to set up an :class:`.Experiment` with 
        different voltage settings on different :class:`Tube`s.  This is likely 
        only to be the case when you are trying to figure out which voltages 
        should be used in future experiments.  If so, set :attr:`ignore_v` to a 
        :class:`List` of channel names to ignore particular channels.  
        
        .. warning::
        
            THIS WILL BREAK REAL EXPERIMENTS
        
    Examples
    --------
    >>> tube1 = flow.Tube(file = 'RFP_Well_A3.fcs', conditions = {"Dox" : 10.0})
    >>> tube2 = flow.Tube(file='CFP_Well_A4.fcs', conditions = {"Dox" : 1.0})
    >>> import_op = flow.ImportOp(conditions = {"Dox" : "float"},
    ...                           tubes = [tube1, tube2])
    >>> ex = import_op.apply()
    """

    id = Constant("edu.mit.synbio.cytoflow.operations.import")
    friendly_id = Constant("Import")
    name = Constant("Import Data")

    # experimental conditions: name --> dtype.
    conditions = Dict(Str, Str)

    # the tubes
    tubes = List(Tube)

    # which channels do we import?
    channels = Dict(Str, Str)

    # which FCS metadata has the channel names in it?
    name_metadata = Enum(None, "$PnN", "$PnS")

    # which data set to get out of the FCS files?
    data_set = Int(0)

    # are we subsetting?
    events = util.CIntOrNone(None)
    coarse_events = util.Deprecated(new='events')

    # DON'T DO THIS
    ignore_v = List(Str)

    def apply(self, experiment=None, metadata_only=False):
        """
        Load a new :class:`.Experiment`.  
        
        Parameters
        ----------
        experiment : Experiment
            Ignored
            
        metadata_only : bool (default = False)
            Only "import" the metadata, creating an Experiment with all the
            expected metadata and structure but 0 events.
        
        Returns
        -------
        Experiment
            The new :class:`.Experiment`.  New channels have the following
            metadata:
            
            - **voltage** - int
                The voltage that this channel was collected at.  Determined
                by the ``$PnV`` field from the first FCS file.
                
            - **range** - int
                The maximum range of this channel.  Determined by the ``$PnR``
                field from the first FCS file.
                
            New experimental conditions do not have **voltage** or **range**
            metadata, obviously.  Instead, they have **experiment** set to 
            ``True``, to distinguish the experimental variables from the
            conditions that were added by gates, etc.
            
            If :attr:`ignore_v` is set, it is added as a key to the 
            :class:`.Experiment`-wide metadata.
            
        """

        if not self.tubes or len(self.tubes) == 0:
            raise util.CytoflowOpError('tubes', "Must specify some tubes!")

        # if we have channel renaming, make sure the new names are valid
        # python identifiers
        if self.channels:
            for old_name, new_name in self.channels.items():
                if old_name != new_name and new_name != util.sanitize_identifier(
                        new_name):
                    raise util.CytoflowOpError(
                        'channels', "Channel name {} must be a "
                        "valid Python identifier.".format(new_name))

        # make sure each tube has the same conditions
        tube0_conditions = set(self.tubes[0].conditions)
        for tube in self.tubes:
            tube_conditions = set(tube.conditions)
            if len(tube0_conditions ^ tube_conditions) > 0:
                raise util.CytoflowOpError(
                    'tubes', "Tube {0} didn't have the same "
                    "conditions as tube {1}".format(tube.file,
                                                    self.tubes[0].file))

        # make sure experimental conditions are unique
        for idx, i in enumerate(self.tubes[0:-1]):
            for j in self.tubes[idx + 1:]:
                if i.conditions_equal(j):
                    raise util.CytoflowOpError(
                        'tubes', "The same conditions specified for "
                        "tube {0} and tube {1}".format(i.file, j.file))

        experiment = Experiment()

        experiment.metadata["ignore_v"] = self.ignore_v

        for condition, dtype in list(self.conditions.items()):
            experiment.add_condition(condition, dtype)
            experiment.metadata[condition]['experiment'] = True

        try:
            # silence warnings about duplicate channels;
            # we'll figure that out below
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                tube0_meta = fcsparser.parse(self.tubes[0].file,
                                             data_set=self.data_set,
                                             meta_data_only=True,
                                             reformat_meta=True)
        except Exception as e:
            raise util.CytoflowOpError(
                'tubes', "FCS reader threw an error reading metadata "
                "for tube {}: {}".format(self.tubes[0].file, str(e))) from e

        meta_channels = tube0_meta["_channels_"]

        if self.name_metadata:
            experiment.metadata["name_metadata"] = self.name_metadata
        else:
            experiment.metadata["name_metadata"] = autodetect_name_metadata(
                self.tubes[0].file, data_set=self.data_set)

        meta_channels['Index'] = meta_channels.index
        meta_channels.set_index(experiment.metadata["name_metadata"],
                                inplace=True)

        channels = list(self.channels.keys()) if self.channels \
                   else list(meta_channels.index.values)

        # make sure everything in self.channels is in the tube channels
        for channel in channels:
            if channel not in meta_channels.index:
                raise util.CytoflowOpError(
                    'channels', "Channel {0} not in tube {1}".format(
                        channel, self.tubes[0].file))

        # now that we have the metadata, load it into experiment

        for channel in channels:
            experiment.add_channel(channel)

            experiment.metadata[channel]["fcs_name"] = channel

            # keep track of the channel's PMT voltage
            if ("$PnV" in meta_channels.loc[channel]):
                v = meta_channels.loc[channel]['$PnV']
                if v: experiment.metadata[channel]["voltage"] = v

            # add the maximum possible value for this channel.
            data_range = meta_channels.loc[channel]['$PnR']
            data_range = float(data_range)
            experiment.metadata[channel]['range'] = data_range

        experiment.metadata['fcs_metadata'] = {}
        for tube in self.tubes:
            if metadata_only:
                tube_meta, tube_data = parse_tube(tube.file,
                                                  experiment,
                                                  data_set=self.data_set,
                                                  metadata_only=True)
            else:
                tube_meta, tube_data = parse_tube(tube.file,
                                                  experiment,
                                                  data_set=self.data_set)

                if self.events:
                    if self.events <= len(tube_data):
                        tube_data = tube_data.loc[np.random.choice(
                            tube_data.index, self.events, replace=False)]
                    else:
                        warnings.warn(
                            "Only {0} events in tube {1}".format(
                                len(tube_data), tube.file),
                            util.CytoflowWarning)

                experiment.add_events(tube_data[channels], tube.conditions)

            # extract the row and column from wells collected on a
            # BD HTS
            if 'WELL ID' in tube_meta:
                pos = tube_meta['WELL ID']
                tube_meta['CF_Row'] = pos[0]
                tube_meta['CF_Col'] = int(pos[1:3])

            for i, channel in enumerate(channels):
                # remove the PnV tube metadata

                if '$P{}V'.format(i + 1) in tube_meta:
                    del tube_meta['$P{}V'.format(i + 1)]

                # work around a bug where the PnR is sometimes not the detector range
                # but the data range.
                pnr = '$P{}R'.format(i + 1)
                if pnr in tube_meta and float(
                        tube_meta[pnr]
                ) > experiment.metadata[channel]['range']:
                    experiment.metadata[channel]['range'] = float(
                        tube_meta[pnr])

            tube_meta['CF_File'] = Path(tube.file).stem

            experiment.metadata['fcs_metadata'][tube.file] = tube_meta

        for channel in channels:
            if self.channels and channel in self.channels:
                new_name = self.channels[channel]
                if channel == new_name:
                    continue
                experiment.data.rename(columns={channel: new_name},
                                       inplace=True)
                experiment.metadata[new_name] = experiment.metadata[channel]
                experiment.metadata[new_name]["fcs_name"] = channel
                del experiment.metadata[channel]

            # this catches an odd corner case where some instruments store
            # instrument-specific info in the "extra" bits.  we have to
            # clear them out.
            if tube0_meta['$DATATYPE'] == 'I':
                data_bits = int(meta_channels.loc[channel]['$PnB'])
                data_range = float(meta_channels.loc[channel]['$PnR'])
                range_bits = int(math.log(data_range, 2))

                if range_bits < data_bits:
                    mask = 1
                    for _ in range(1, range_bits):
                        mask = mask << 1 | 1

                    experiment.data[channel] = experiment.data[
                        channel].values.astype('int') & mask

            # re-scale the data to linear if if's recorded as log-scaled with
            # integer channels
            data_range = float(meta_channels.loc[channel]['$PnR'])
            f1 = float(meta_channels.loc[channel]['$PnE'][0])
            f2 = float(meta_channels.loc[channel]['$PnE'][1])

            if f1 > 0.0 and f2 == 0.0:
                warnings.warn(
                    'Invalid $PnE = {},{} for channel {}, changing it to {},1.0'
                    .format(f1, f2, channel, f1), util.CytoflowWarning)
                f2 = 1.0

            if f1 > 0.0 and f2 > 0.0 and tube0_meta['$DATATYPE'] == 'I':
                warnings.warn(
                    'Converting channel {} from logarithmic to linear'.format(
                        channel), util.CytoflowWarning)


#                 experiment.data[channel] = 10 ** (f1 * experiment.data[channel] / data_range) * f2

        return experiment
Beispiel #3
0
class ImportOp(HasStrictTraits):
    """
    An operation for importing data and making an :class:`.Experiment`.
    
    To use, set the :attr:`conditions` dict to a mapping between condition name 
    and NumPy ``dtype``.  Useful dtypes include ``category``, ``float``, 
    ``int``, ``bool``.
    
    Next, set :attr:`tubes` to a list of :class:`Tube` containing FCS filenames 
    and the corresponding conditions.
    
    If you would rather not analyze every single event in every FCS file,
    set :attr:`events` to the number of events from each FCS file you want to 
    load.
    
    Call :meth:`apply` to load the data.  The usual ``experiment`` parameter
    can be ``None``.
    
    Attributes
    ----------
    conditions : Dict(Str, Str)
        A dictionary mapping condition names (keys) to NumPy ``dtype``s (values).
        Useful ``dtype``s include ``category``, ``float``, ``int``, and ``bool``.
        
    tubes : List(Tube)
        A list of :class:``Tube`` instances, which map FCS files to their corresponding
        experimental conditions.  Each :class:``Tube`` must have a 
        :attr:``~Tube.conditions`` dict whose keys match those of 
        :attr:`conditions`.
        
    channels : Dict(Str, Str)
        If you only need a subset of the channels available in the data set,
        specify them here.  Each ``(key, value)`` pair specifies a channel to
        include in the output experiment.  The key is the channel name in the 
        FCS file, and the value is the name of the channel in the Experiment.
        You can use this to rename channels as you import data (because flow
        channel names are frequently not terribly informative.)  New channel
        names must be valid Python identifiers: start with a letter or ``_``, and
        all characters must be letters, numbers or ``_``.  If :attr:`channels` is
        empty, load all channels in the FCS files.
        
    events : Int (default = 0)
        If ``> 0``, import only a random subset of events of size :attr:`events`. 
        Presumably the analysis will go faster but less precisely; good for
        interactive data exploration.  Then, unset :attr:`events` and re-run
        the analysis non-interactively.
        
    name_metadata : {None, "$PnN", "$PnS"} (default = None)
        Which FCS metadata is the channel name?  If ``None``, attempt to  
        autodetect.
        
    ignore_v : List(Str)
        :class:`cytoflow` is designed to operate on an :class:`.Experiment` containing
        tubes that were all collected under the same instrument settings.
        In particular, the same PMT voltages ensure that data can be
        compared across samples.
        
        *Very rarely*, you may need to set up an :class:`.Experiment` with 
        different voltage settings on different :class:`Tube`s.  This is likely 
        only to be the case when you are trying to figure out which voltages 
        should be used in future experiments.  If so, set :attr:`ignore_v` to a 
        :class:`List` of channel names to ignore particular channels.  
        
        .. warning::
        
            THIS WILL BREAK REAL EXPERIMENTS
        
    Examples
    --------
    >>> tube1 = flow.Tube(file = 'RFP_Well_A3.fcs', conditions = {"Dox" : 10.0})
    >>> tube2 = flow.Tube(file='CFP_Well_A4.fcs', conditions = {"Dox" : 1.0})
    >>> import_op = flow.ImportOp(conditions = {"Dox" : "float"},
    ...                           tubes = [tube1, tube2])
    >>> ex = import_op.apply()
    """

    id = Constant("edu.mit.synbio.cytoflow.operations.import")
    friendly_id = Constant("Import")
    name = Constant("Import Data")

    # experimental conditions: name --> dtype.
    conditions = Dict(Str, Str)

    # the tubes
    tubes = List(Tube)

    # which channels do we import?
    channels = Dict(Str, Str)

    # which FCS metadata has the channel names in it?
    name_metadata = Enum(None, "$PnN", "$PnS")

    # are we subsetting?
    events = util.PositiveInt(0, allow_zero=True)
    coarse_events = util.Deprecated(new='events')

    # DON'T DO THIS
    ignore_v = List(Str)

    def apply(self, experiment=None):
        """
        Load a new :class:`.Experiment`.  
        
        Returns
        -------
        Experiment
            The new :class:`.Experiment`.  New channels have the following
            metadata:
            
            - **voltage** - int
                The voltage that this channel was collected at.  Determined
                by the ``$PnV`` field from the first FCS file.
                
            - **range** - int
                The maximum range of this channel.  Determined by the ``$PnR``
                field from the first FCS file.
                
            New experimental conditions do not have **voltage** or **range**
            metadata, obviously.  Instead, they have **experiment** set to 
            ``True``, to distinguish the experimental variables from the
            conditions that were added by gates, etc.
            
            If :attr:`ignore_v` is set, it is added as a key to the 
            :class:`.Experiment`-wide metadata.
            
        """

        if not self.tubes or len(self.tubes) == 0:
            raise util.CytoflowOpError('tubes', "Must specify some tubes!")

        # if we have channel renaming, make sure the new names are valid
        # python identifiers
        if self.channels:
            for old_name, new_name in self.channels.items():
                if old_name != new_name and new_name != util.sanitize_identifier(
                        new_name):
                    raise util.CytoflowOpError(
                        'channels', "Channel name {} must be a "
                        "valid Python identifier.".format(new_name))

        # make sure each tube has the same conditions
        tube0_conditions = set(self.tubes[0].conditions)
        for tube in self.tubes:
            tube_conditions = set(tube.conditions)
            if len(tube0_conditions ^ tube_conditions) > 0:
                raise util.CytoflowOpError(
                    'tubes', "Tube {0} didn't have the same "
                    "conditions as tube {1}".format(tube.file,
                                                    self.tubes[0].file))

        # make sure experimental conditions are unique
        for idx, i in enumerate(self.tubes[0:-1]):
            for j in self.tubes[idx + 1:]:
                if i.conditions_equal(j):
                    raise util.CytoflowOpError(
                        'tubes', "The same conditions specified for "
                        "tube {0} and tube {1}".format(i.file, j.file))

        experiment = Experiment()

        experiment.metadata["ignore_v"] = self.ignore_v

        for condition, dtype in list(self.conditions.items()):
            experiment.add_condition(condition, dtype)
            experiment.metadata[condition]['experiment'] = True

        try:
            # silence warnings about duplicate channels;
            # we'll figure that out below
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                tube0_meta = fcsparser.parse(self.tubes[0].file,
                                             meta_data_only=True,
                                             reformat_meta=True)
        except Exception as e:
            raise util.CytoflowOpError(
                'tubes', "FCS reader threw an error reading metadata "
                "for tube {}".format(self.tubes[0].file)) from e

        meta_channels = tube0_meta["_channels_"]

        if self.name_metadata:
            experiment.metadata["name_metadata"] = self.name_metadata
        else:
            # try to autodetect the metadata
            if "$PnN" in meta_channels and not "$PnS" in meta_channels:
                experiment.metadata["name_metadata"] = "$PnN"
            elif "$PnN" not in meta_channels and "$PnS" in meta_channels:
                experiment.metadata["name_metadata"] = "$PnS"
            else:
                PnN = meta_channels["$PnN"]
                PnS = meta_channels["$PnS"]

                # sometimes one is unique and the other isn't
                if (len(set(PnN)) == len(PnN) and len(set(PnS)) != len(PnS)):
                    experiment.metadata["name_metadata"] = "$PnN"
                elif (len(set(PnN)) != len(PnN) and len(set(PnS)) == len(PnS)):
                    experiment.metadata["name_metadata"] = "$PnS"
                else:
                    # as per fcsparser.api, $PnN is the "short name" (like FL-1)
                    # and $PnS is the "actual name" (like "FSC-H").  so let's
                    # use $PnS.
                    experiment.metadata["name_metadata"] = "$PnS"

        meta_channels.set_index(experiment.metadata["name_metadata"],
                                inplace=True)

        channels = list(self.channels.keys()) if self.channels \
                   else list(tube0_meta["_channel_names_"])

        # make sure everything in self.channels is in the tube channels

        for channel in channels:
            if channel not in meta_channels.index:
                raise util.CytoflowOpError(
                    'channels', "Channel {0} not in tube {1}".format(
                        channel, self.tubes[0].file))

        # now that we have the metadata, load it into experiment

        for channel in channels:
            experiment.add_channel(channel)

            experiment.metadata[channel]["fcs_name"] = channel

            # keep track of the channel's PMT voltage
            if ("$PnV" in meta_channels.loc[channel]):
                v = meta_channels.loc[channel]['$PnV']
                if v: experiment.metadata[channel]["voltage"] = v

            # add the maximum possible value for this channel.
            data_range = meta_channels.loc[channel]['$PnR']
            data_range = float(data_range)
            experiment.metadata[channel]['range'] = data_range

        experiment.metadata['fcs_metadata'] = {}
        for tube in self.tubes:
            tube_meta, tube_data = parse_tube(tube.file, experiment)

            if self.events:
                if self.events <= len(tube_data):
                    tube_data = tube_data.loc[np.random.choice(tube_data.index,
                                                               self.events,
                                                               replace=False)]
                else:
                    warnings.warn(
                        "Only {0} events in tube {1}".format(
                            len(tube_data), tube.file), util.CytoflowWarning)

            experiment.add_events(tube_data[channels], tube.conditions)
            experiment.metadata['fcs_metadata'][tube.file] = tube_meta

        for channel in channels:
            if self.channels and channel in self.channels:
                new_name = self.channels[channel]
                if channel == new_name:
                    continue
                experiment.data.rename(columns={channel: new_name},
                                       inplace=True)
                experiment.metadata[new_name] = experiment.metadata[channel]
                experiment.metadata[new_name]["fcs_name"] = channel
                del experiment.metadata[channel]

        return experiment
Beispiel #4
0
class BaseStatisticsView(BaseView):
    """
    The base class for statisticxs views (as opposed to data views).
    
    Attributes
    ----------
    variable : str
        The condition that varies when plotting this statistic: used for the
        x axis of line plots, the bar groups in bar plots, etc.
        
    subset : str
        An expression that specifies the subset of the statistic to plot.

    """

    # deprecated or removed attributes give warnings & errors, respectively
    by = util.Deprecated(
        new='variable', err_string="'by' is deprecated, please use 'variable'")

    variable = Str
    subset = Str

    def enum_plots(self, experiment, data):
        """
        Enumerate the named plots we can make from this set of statistics.
        """

        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")

        if not self.variable:
            raise util.CytoflowViewError('variable', "variable not set")

        if self.variable not in experiment.conditions:
            raise util.CytoflowViewError(
                'variable',
                "variable {0} not in the experiment".format(self.variable))

        data, facets, names = self._subset_data(data)

        by = list(set(names) - set(facets))

        class plot_enum(object):
            def __init__(self, data, by):
                self.by = by
                self._iter = None
                self._returned = False

                if by:
                    self._iter = data.groupby(by).__iter__()

            def __iter__(self):
                return self

            def __next__(self):
                if self._iter:
                    return next(self._iter)[0]
                else:
                    if self._returned:
                        raise StopIteration
                    else:
                        self._returned = True
                        return None

        return plot_enum(data.reset_index(), by)

    def plot(self, experiment, data, plot_name=None, **kwargs):
        """
        Plot some data from a statistic.
        
        This function takes care of checking for facet name validity and 
        subsetting, then passes the dataframe to `BaseView.plot`

        """

        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")

        if not self.variable:
            raise util.CytoflowViewError('variable', "variable not set")

        if self.variable not in experiment.conditions:
            raise util.CytoflowViewError(
                'variable',
                "variable {0} not in the experiment".format(self.variable))

        data, facets, names = self._subset_data(data)

        unused_names = list(set(names) - set(facets))

        if plot_name is not None and not unused_names:
            raise util.CytoflowViewError(
                'plot_name', "You specified a plot name, but all "
                "the facets are already used")

        if unused_names:
            groupby = data.groupby(unused_names)

            if plot_name is None:
                raise util.CytoflowViewError(
                    'plot_name', "You must use facets {} in either the "
                    "plot variables or the plot name. "
                    "Possible plot names: {}".format(
                        unused_names, list(groupby.groups.keys())))

            if plot_name not in set(groupby.groups.keys()):
                raise util.CytoflowViewError(
                    'plot_name', "Plot {} not from plot_enum; must "
                    "be one of {}".format(plot_name,
                                          list(groupby.groups.keys())))

            data = groupby.get_group(plot_name)

        # FacetGrid needs a "long" data set
        data.reset_index(inplace=True)
        super().plot(experiment, data, **kwargs)

    def _subset_data(self, data):

        if self.subset:
            try:
                # TODO - either sanitize column names, or check to see that
                # all conditions are valid Python variables
                data = data.query(self.subset)
            except Exception as e:
                raise util.CytoflowViewError(
                    'subset', "Subset string '{0}' isn't valid".format(
                        self.subset)) from e

            if len(data) == 0:
                raise util.CytoflowViewError(
                    'subset', "Subset string '{0}' returned no values".format(
                        self.subset))

        names = list(data.index.names)

        for name in names:
            unique_values = data.index.get_level_values(name).unique()
            if len(unique_values) == 1:
                warn("Only one value for level {}; dropping it.".format(name),
                     util.CytoflowViewWarning)
                try:
                    data.index = data.index.droplevel(name)
                except AttributeError as e:
                    raise util.CytoflowViewError(
                        None, "Must have more than one "
                        "value to plot.") from e

        names = list(data.index.names)

        if self.xfacet and self.xfacet not in data.index.names:
            raise util.CytoflowViewError(
                'xfacet',
                "X facet {} not in statistics; must be one of {}".format(
                    self.xfacet, data.index.names))

        if self.yfacet and self.yfacet not in data.index.names:
            raise util.CytoflowViewError(
                'yfacet',
                "Y facet {} not in statistics; must be one of {}".format(
                    self.yfacet, data.index.names))

        if self.huefacet and self.huefacet not in data.index.names:
            raise util.CytoflowViewError(
                'huefacet',
                "Hue facet {} not in statistics; must be one of {}".format(
                    self.huefacet, data.index.names))

        facets = [
            x
            for x in [self.variable, self.xfacet, self.yfacet, self.huefacet]
            if x
        ]
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError(None, "Can't reuse facets")

        return data, facets, names
Beispiel #5
0
class Base1DStatisticsView(BaseStatisticsView):
    """
    The base class for 1-dimensional statistic views -- ie, the :attr:`variable`
    attribute is on the x axis, and the statistic value is on the y axis.
    
    Attributes
    ----------
    statistic : (str, str)
        The name of the statistic to plot.  Must be a key in the  
        :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment`
        being plotted.
        
    error_statistic : (str, str)
        The name of the statistic used to plot error bars.  Must be a key in the
        :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment`
        being plotted.
        
    scale : {'linear', 'log', 'logicle'}
        The scale applied to the data before plotting it.
    """

    REMOVED_ERROR = "Statistics changed dramatically in 0.5; please see the documentation"
    by = util.Removed(err_string=REMOVED_ERROR)
    yfunction = util.Removed(err_string=REMOVED_ERROR)
    ychannel = util.Removed(err_string=REMOVED_ERROR)
    channel = util.Removed(err_string=REMOVED_ERROR)
    function = util.Removed(err_string=REMOVED_ERROR)
    error_bars = util.Removed(err_string=REMOVED_ERROR)

    xvariable = util.Deprecated(new="variable")

    statistic = Tuple(Str, Str)
    error_statistic = Tuple(Str, Str)

    scale = util.ScaleEnum

    def enum_plots(self, experiment):
        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")
        data = self._make_data(experiment)
        return super().enum_plots(experiment, data)

    def plot(self, experiment, plot_name=None, **kwargs):
        """
        Parameters
        ----------
        orientation : {'vertical', 'horizontal'}
        
        lim : (float, float)
            Set the range of the plot's axis.
        """

        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")

        data = self._make_data(experiment)

        if not self.variable:
            raise util.CytoflowViewError('variable', "variable not set")

        if self.variable not in experiment.conditions:
            raise util.CytoflowViewError(
                'variable',
                "variable {0} not in the experiment".format(self.variable))

        scale = util.scale_factory(self.scale,
                                   experiment,
                                   statistic=self.statistic,
                                   error_statistic=self.error_statistic)

        super().plot(experiment,
                     data,
                     plot_name=plot_name,
                     scale=scale,
                     **kwargs)

    def _make_data(self, experiment):
        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")

        if not self.statistic:
            raise util.CytoflowViewError('statistic', "Statistic not set")

        if self.statistic not in experiment.statistics:
            raise util.CytoflowViewError(
                'statistic',
                "Can't find the statistic {} in the experiment".format(
                    self.statistic))
        else:
            stat = experiment.statistics[self.statistic]

        if not util.is_numeric(stat):
            raise util.CytoflowViewError('statistic',
                                         "Statistic must be numeric")

        if self.error_statistic[0]:
            if self.error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    'error_statistic',
                    "Can't find the error statistic in the experiment")
            else:
                error_stat = experiment.statistics[self.error_statistic]
        else:
            error_stat = None

        if error_stat is not None:

            try:
                error_stat.index = error_stat.index.reorder_levels(
                    stat.index.names)
                error_stat.sort_index(inplace=True)
            except AttributeError:
                pass

            if not stat.index.equals(error_stat.index):
                raise util.CytoflowViewError(
                    'error_statistic', "Data statistic and error statistic "
                    " don't have the same index.")

            if stat.name == error_stat.name:
                raise util.CytoflowViewError(
                    'error_statistic',
                    "Data statistic and error statistic can "
                    "not have the same name.")

        data = pd.DataFrame(index=stat.index)
        data[stat.name] = stat

        if error_stat is not None:
            data[error_stat.name] = error_stat

        return data
Beispiel #6
0
class Base1DStatisticsView(BaseStatisticsView):

    REMOVED_ERROR = "Statistics changed dramatically in 0.5; please see the documentation"
    by = util.Removed(err_string=REMOVED_ERROR)
    yfunction = util.Removed(err_string=REMOVED_ERROR)
    ychannel = util.Removed(err_string=REMOVED_ERROR)
    channel = util.Removed(err_string=REMOVED_ERROR)
    function = util.Removed(err_string=REMOVED_ERROR)
    error_bars = util.Removed(err_string=REMOVED_ERROR)

    xvariable = util.Deprecated(new="variable")

    statistic = Tuple(Str, Str)
    error_statistic = Tuple(Str, Str)

    def enum_plots(self, experiment):
        data = self._make_data(experiment)
        return super().enum_plots(experiment, data)

    def plot(self, experiment, plot_name=None, **kwargs):
        data = self._make_data(experiment)

        if util.is_numeric(experiment[self.variable]):
            xscale = util.scale_factory(self.xscale,
                                        experiment,
                                        condition=self.variable)
        else:
            xscale = None

        yscale = util.scale_factory(self.yscale,
                                    experiment,
                                    statistic=self.statistic,
                                    error_statistic=self.error_statistic)

        super().plot(experiment,
                     data,
                     plot_name,
                     xscale=xscale,
                     yscale=yscale,
                     **kwargs)

    def _make_data(self, experiment):
        if experiment is None:
            raise util.CytoflowViewError("No experiment specified")

        if not self.statistic:
            raise util.CytoflowViewError("Statistic not set")

        if self.statistic not in experiment.statistics:
            raise util.CytoflowViewError(
                "Can't find the statistic {} in the experiment".format(
                    self.statistic))
        else:
            stat = experiment.statistics[self.statistic]

        if not util.is_numeric(stat):
            raise util.CytoflowViewError("Statistic must be numeric")

        if self.error_statistic[0]:
            if self.error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    "Can't find the error statistic in the experiment")
            else:
                error_stat = experiment.statistics[self.error_statistic]
        else:
            error_stat = None

        if error_stat is not None:
            if not stat.index.equals(error_stat.index):
                raise util.CytoflowViewError(
                    "Data statistic and error statistic "
                    " don't have the same index.")

            if stat.name == error_stat.name:
                raise util.CytoflowViewError(
                    "Data statistic and error statistic can "
                    "not have the same name.")

        data = pd.DataFrame(index=stat.index)
        data[stat.name] = stat

        if error_stat is not None:
            data[error_stat.name] = error_stat

        return data
Beispiel #7
0
class Stats1DView(Base1DStatisticsView):
    """
    Plot a statistic.  The value of the statistic will be plotted on the
    Y axis; a numeric conditioning variable must be chosen for the X axis.
    Every variable in the statistic must be specified as either the `variable`
    or one of the plot facets.
    
    Attributes
    ----------
    variable_scale : {'linear', 'log', 'logicle'}
        The scale applied to the variable (on the X axis)
        
    Examples
    --------
    
    .. plot::
        :context: close-figs
        
        Make a little data set.
    
        >>> import cytoflow as flow
        >>> import_op = flow.ImportOp()
        >>> import_op.tubes = [flow.Tube(file = "Plate01/RFP_Well_A3.fcs",
        ...                              conditions = {'Dox' : 10.0}),
        ...                    flow.Tube(file = "Plate01/CFP_Well_A4.fcs",
        ...                              conditions = {'Dox' : 1.0})]
        >>> import_op.conditions = {'Dox' : 'float'}
        >>> ex = import_op.apply()
    
    Create and a new statistic.
    
    .. plot::
        :context: close-figs
        
        >>> ch_op = flow.ChannelStatisticOp(name = 'MeanByDox',
        ...                     channel = 'Y2-A',
        ...                     function = flow.geom_mean,
        ...                     by = ['Dox'])
        >>> ex2 = ch_op.apply(ex)
        
    View the new statistic
    
    .. plot::
        :context: close-figs
        
        >>> flow.Stats1DView(variable = 'Dox',
        ...                  statistic = ('MeanByDox', 'geom_mean'),
        ...                  variable_scale = 'log',
        ...                  scale = 'log').plot(ex2)
    """

    # traits
    id = Constant("edu.mit.synbio.cytoflow.view.stats1d")
    friendly_id = Constant("1D Statistics View")

    REMOVED_ERROR = Constant(
        "Statistics changed dramatically in 0.5; please see the documentation")
    by = util.Removed(err_string=REMOVED_ERROR)
    yfunction = util.Removed(err_string=REMOVED_ERROR)
    ychannel = util.Removed(err_string=REMOVED_ERROR)
    xvariable = util.Deprecated(new="variable")
    xscale = util.Deprecated(new='variable_scale')

    variable_scale = util.ScaleEnum

    def enum_plots(self, experiment):
        """
        Returns an iterator over the possible plots that this View can
        produce.  The values returned can be passed to :meth:`plot`.
        """

        return super().enum_plots(experiment)

    def plot(self, experiment, plot_name=None, **kwargs):
        """Plot a chart of a variable's values against a statistic.
        
        Parameters
        ----------
        
        variable_lim : (float, float)
            The limits on the variable axis
        
        color : a matplotlib color
            The color to plot with.  Overridden if `huefacet` is not `None`
            
        linewidth : float
            The width of the line, in points
            
        linestyle : ['solid' | 'dashed', 'dashdot', 'dotted' | (offset, on-off-dash-seq) | '-' | '--' | '-.' | ':' | 'None' | ' ' | '']
            
        marker : a matplotlib marker style
            See http://matplotlib.org/api/markers_api.html#module-matplotlib.markers
            
        markersize : int
            The marker size in points
            
        markerfacecolor : a matplotlib color
            The color to make the markers.  Overridden (?) if `huefacet` is not `None`
            
        alpha : the alpha blending value, from 0.0 (transparent) to 1.0 (opaque)
        
        capsize : scalar
            The size of the error bar caps, in points
            
        shade_error : bool
            If `False` (the default), plot the error statistic as traditional 
            "error bars."  If `True`, plot error statistic as a filled, shaded
            region.
            
        shade_alpha : float
            The transparency of the shaded error region, from 0.0 (transparent)
            to 1.0 (opaque.)  Default is 0.2.
        
        Notes
        -----
                
        Other `kwargs` are passed to `matplotlib.pyplot.plot <https://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.plot.html>`_
        
        """

        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")

        if self.variable not in experiment.conditions:
            raise util.CytoflowError(
                'variable',
                "Variable {} not in the experiment".format(self.variable))

        if not util.is_numeric(experiment[self.variable]):
            raise util.CytoflowError(
                'variable',
                "Variable {} must be numeric".format(self.variable))

        variable_scale = util.scale_factory(self.variable_scale,
                                            experiment,
                                            condition=self.variable)

        super().plot(experiment,
                     plot_name,
                     variable_scale=variable_scale,
                     **kwargs)

    def _grid_plot(self, experiment, grid, **kwargs):

        data = grid.data
        data_scale = kwargs.pop('scale')
        variable_scale = kwargs.pop('variable_scale')

        stat = experiment.statistics[self.statistic]
        stat_name = stat.name
        if self.error_statistic[0]:
            err_stat = experiment.statistics[self.error_statistic]
            err_stat_name = err_stat.name
        else:
            err_stat = None

        variable_lim = kwargs.pop("variable_lim", None)
        if variable_lim is None:
            variable_lim = (variable_scale.clip(
                data[self.variable].min() *
                0.9), variable_scale.clip(data[self.variable].max() * 1.1))

        lim = kwargs.pop("lim", None)
        if lim is None:
            lim = (data_scale.clip(data[stat_name].min() * 0.9),
                   data_scale.clip(data[stat_name].max() * 1.1))

            if self.error_statistic[0]:
                try:
                    lim = (data_scale.clip(
                        min([x[0] for x in data[err_stat_name]]) * 0.9),
                           data_scale.clip(
                               max([x[1] for x in data[err_stat_name]]) * 1.1))
                except (TypeError, IndexError):
                    lim = (data_scale.clip(
                        (data[stat_name].min() - data[err_stat_name].min()) *
                        0.9),
                           data_scale.clip((data[stat_name].max() +
                                            data[err_stat_name].max()) * 1.1))

        orientation = kwargs.pop('orientation', 'vertical')
        capsize = kwargs.pop('capsize', None)
        shade_error = kwargs.pop('shade_error', False)
        shade_alpha = kwargs.pop('shade_alpha', 0.2)

        if orientation == 'vertical':
            # plot the error bars first so the axis labels don't get overwritten
            if err_stat is not None:
                if shade_error:
                    grid.map(_v_error_shade,
                             self.variable,
                             stat_name,
                             err_stat_name,
                             alpha=shade_alpha)
                else:
                    grid.map(_v_error_bars,
                             self.variable,
                             stat_name,
                             err_stat_name,
                             capsize=capsize)

            grid.map(plt.plot, self.variable, stat_name, **kwargs)

            return dict(xscale=variable_scale,
                        xlim=variable_lim,
                        yscale=data_scale,
                        ylim=lim)
        else:
            # plot the error bars first so the axis labels don't get overwritten
            if err_stat is not None:
                if shade_error:
                    grid.map(_h_error_shade,
                             stat_name,
                             self.variable,
                             err_stat_name,
                             alpha=shade_alpha)
                else:
                    grid.map(_h_error_bars,
                             stat_name,
                             self.variable,
                             err_stat_name,
                             capsize=capsize)

            grid.map(plt.plot, stat_name, self.variable, **kwargs)

            return dict(yscale=variable_scale,
                        ylim=variable_lim,
                        xscale=data_scale,
                        xlim=lim)
Beispiel #8
0
class Stats2DView(HasStrictTraits):
    """
    Plot two statistics on a scatter plot.  A point (X,Y) is drawn for every
    pair of elements with the same value of `variable`; the X value is from 
    `xstatistic` and the Y value is from `ystatistic`.
    
    Attributes
    ----------
    name : Str
        The plot's name 
    
    variable : Str
        the name of the conditioning variable
        
    xstatistic : Tuple(Str, Str)
        The statistic to plot on the X axis.  Must have the same indices
        as `ystatistic`.
        
    xscale : Enum("linear", "log", "logicle") (default = "linear")
        What scale to use on the X axis
    
    ystatistic : Tuple(Str, Str)
       The statistic to plot on the Y axis.  Must have the same indices
       as `xstatistic`.
        
    yscale : Enum("linear", "log", "logicle") (default = "linear")
        What scale to use on the Y axis
        
    xfacet : Str
        the conditioning variable for horizontal subplots
        
    yfacet : Str
        the conditioning variable for vertical subplots
        
    huefacet : 
        the conditioning variable for color.
        
    huescale : Enum("linear", "log", "logicle") (default = "linear")
        scale for the hue facet, if there are a lot of hue values.
        
    x_error_statistic, y_error_statistic : Tuple(Str, Str)
        if specified, draw error bars.  must be the name of a statistic,
        with the same indices as `xstatistic` and `ystatistic`.
    
    subset : Str
        What subset of the data to plot?
        
    Examples
    --------
    
    Assume we want an input-output curve for a repressor that's under the
    control of a Dox-inducible promoter.  We have an "input" channel
    `(Dox --> eYFP, FITC-A channel)` and an output channel 
    `(Dox --> repressor --| eBFP, Pacific Blue channel)` as well as a 
    constitutive expression channel (mKate, PE-Tx-Red-YG-A channel). 
    We have induced several wells with different amounts of Dox.  We want 
    to plot the relationship between the input and output channels (binned by 
    input channel intensity) as we vary Dox, faceted by constitutive channel 
    bin.
    
    >>> cfp_bin_op = flow.BinningOp(name = "CFP_Bin",
    ...                             channel = "PE-Tx-Red-YG-A",
    ...                             scale = "log",
    ...                             bin_width = 0.1)
    >>> ifp_bin_op = flow.BinningOp(name = "IFP_Bin",
    ...                             channel = "Pacific Blue-A",
    ...                             scale = "log",
    ...                             bin_width = 0.1).apply(ex_cfp_binned)
    >>> ifp_mean = flow.ChannelStatisticOp(name = "IFP",
    ...                                    channel = "FITC-A",
    ...                                    by = ["IFP_Bin", "CFP_Bin"],
    ...                                    function = flow.geom_mean)
    >>> ofp_mean = flow.ChannelStatisticOp(name = "OFP",
    ...                                    channel = "Pacific_Blue-A",
    ...                                    by = ["IFP_Bin", "CFP_Bin"],
    ...                                    function = flow.geom_mean)
    >>> ex = cfp_bin_op.apply(ex)
    >>> ex = ifp_bin_op.apply(ex)
    >>> ex = ifp_mean.apply(ex)
    >>> ex = ofp_mean.apply(ex)
    >>> view = flow.Stats2DView(name = "IFP vs OFP",
    ...                         variable = "IFP_Bin",
    ...                         xstatistic = ("IFP", "geom_mean"),
    ...                         ystatistic = ("OFP", "geom_mean"),
    ...                         huefacet = "CFP_Bin").plot(ex_ifp_binned)
    >>> view.plot(ex_binned)
    """

    # traits
    id = "edu.mit.synbio.cytoflow.view.stats2d"
    friendly_id = "2D Statistics View"

    # deprecated or removed attributes give warnings & errors, respectively
    by = util.Deprecated(
        new='variable', err_string="'by' is deprecated, please use 'variable'")

    STATS_REMOVED = "{} has been removed. Statistics changed dramatically in 0.5; please see the documentation."

    xchannel = util.Removed(err_string=STATS_REMOVED)
    xfunction = util.Removed(err_string=STATS_REMOVED)
    ychannel = util.Removed(err_string=STATS_REMOVED)
    yfunction = util.Removed(err_string=STATS_REMOVED)

    name = Str
    variable = Str
    xstatistic = Tuple(Str, Str)
    xscale = util.ScaleEnum
    ystatistic = Tuple(Str, Str)
    yscale = util.ScaleEnum

    xfacet = Str
    yfacet = Str
    huefacet = Str
    huescale = util.ScaleEnum

    x_error_statistic = Tuple(Str, Str)
    y_error_statistic = Tuple(Str, Str)

    subset = Str

    def enum_plots(self, experiment):
        """
        Returns an iterator over the possible plots that this View can
        produce.  The values returned can be passed to "plot".
        """

        # TODO - all this is copied from below.  can we abstract it out somehow?

        if not experiment:
            raise util.CytoflowViewError("No experiment specified")

        if not self.variable:
            raise util.CytoflowViewError("variable not set")

        if self.variable not in experiment.conditions:
            raise util.CytoflowViewError(
                "variable {0} not in the experiment".format(self.variable))

        if not self.xstatistic:
            raise util.CytoflowViewError("X statistic not set")

        if self.xstatistic not in experiment.statistics:
            raise util.CytoflowViewError(
                "Can't find X statistic {} in experiment".format(
                    self.ystatistic))
        else:
            xstat = experiment.statistics[self.xstatistic]

        if not self.ystatistic:
            raise util.CytoflowViewError("Y statistic not set")

        if self.ystatistic not in experiment.statistics:
            raise util.CytoflowViewError(
                "Can't find Y statistic {} in experiment".format(
                    self.ystatistic))
        else:
            ystat = experiment.statistics[self.ystatistic]

        if not xstat.index.equals(ystat.index):
            raise util.CytoflowViewError(
                "X statistic and Y statistic must have "
                "the same indices: {}".format(xstat.index.names))

        if self.x_error_statistic[0]:
            if self.x_error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    "X error statistic not in experiment")
            else:
                x_error_stat = experiment.statistics[self.x_error_statistic]

            if not x_error_stat.index.equals(xstat.index):
                raise util.CytoflowViewError(
                    "X error statistic doesn't have the "
                    "same indices as the X statistic")
        else:
            x_error_stat = None

        if self.y_error_statistic[0]:
            if self.y_error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    "Y error statistic not in experiment")
            else:
                y_error_stat = experiment.statistics[self.y_error_statistic]

            if not y_error_stat.index.equals(ystat.index):
                raise util.CytoflowViewError(
                    "Y error statistic doesn't have the "
                    "same indices as the Y statistic")
        else:
            y_error_stat = None

        data = pd.DataFrame(index=xstat.index)

        xname = util.random_string(6)
        data[xname] = xstat

        yname = util.random_string(6)
        data[yname] = ystat

        if x_error_stat is not None:
            #x_error_data = x_error_stat.reset_index()
            x_error_name = util.random_string(6)
            data[x_error_name] = x_error_stat

        if y_error_stat is not None:
            y_error_name = util.random_string(6)
            data[y_error_name] = y_error_stat

        if y_error_stat is not None:
            y_error_data = y_error_stat.reset_index()
            y_error_name = util.random_string()
            data[y_error_name] = y_error_data[y_error_stat.name]

        if self.subset:
            try:
                # TODO - either sanitize column names, or check to see that
                # all conditions are valid Python variables
                data = data.query(self.subset)
            except:
                raise util.CytoflowViewError(
                    "Subset string '{0}' isn't valid".format(self.subset))

            if len(data) == 0:
                raise util.CytoflowViewError(
                    "Subset string '{0}' returned no values".format(
                        self.subset))

        names = list(data.index.names)

        for name in names:
            unique_values = data.index.get_level_values(name).unique()
            if len(unique_values) == 1:
                warn("Only one value for level {}; dropping it.".format(name),
                     util.CytoflowViewWarning)
                try:
                    data.index = data.index.droplevel(name)
                except AttributeError:
                    raise util.CytoflowViewError("Must have more than one "
                                                 "value to plot.")

        names = list(data.index.names)

        if not self.variable in experiment.conditions:
            raise util.CytoflowViewError(
                "Variable {} not in experiment".format(self.variable))

        if not self.variable in data.index.names:
            raise util.CytoflowViewError(
                "Variable {} not in statistic; must be one of {}".format(
                    self.variable, data.index.names))

        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "X facet {} not in the experiment".format(self.xfacet))

        if self.xfacet and self.xfacet not in data.index.names:
            raise util.CytoflowViewError(
                "X facet {} not in statistics; must be one of {}".format(
                    self.xfacet, data.index.names))

        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Y facet {} not in the experiment".format(self.yfacet))

        if self.yfacet and self.yfacet not in data.index.names:
            raise util.CytoflowViewError(
                "Y facet {} not in statistics; must be one of {}".format(
                    self.yfacet, data.index.names))

        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError(
                "Hue facet {} not in the experiment".format(self.huefacet))

        if self.huefacet and self.huefacet not in data.index.names:
            raise util.CytoflowViewError(
                "Hue facet {} not in statistics; must be one of {}".format(
                    self.huefacet, data.index.names))

        facets = filter(
            lambda x: x,
            [self.variable, self.xfacet, self.yfacet, self.huefacet])
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError("Can't reuse facets")

        by = list(set(names) - set(facets))

        class plot_enum(object):
            def __init__(self, experiment, by):
                self._iter = None
                self._returned = False

                if by:
                    self._iter = experiment.data.groupby(by).__iter__()

            def __iter__(self):
                return self

            def next(self):
                if self._iter:
                    return self._iter.next()[0]
                else:
                    if self._returned:
                        raise StopIteration
                    else:
                        self._returned = True
                        return None

        return plot_enum(experiment, by)

    def plot(self, experiment, plot_name=None, **kwargs):
        """Plot a bar chart"""

        if not experiment:
            raise util.CytoflowViewError("No experiment specified")

        if not self.variable:
            raise util.CytoflowViewError("variable not set")

        if self.variable not in experiment.conditions:
            raise util.CytoflowViewError(
                "variable {0} not in the experiment".format(self.variable))

        if not self.xstatistic:
            raise util.CytoflowViewError("X statistic not set")

        if self.xstatistic not in experiment.statistics:
            raise util.CytoflowViewError(
                "Can't find X statistic {} in experiment".format(
                    self.ystatistic))
        else:
            xstat = experiment.statistics[self.xstatistic]

        if not self.ystatistic:
            raise util.CytoflowViewError("Y statistic not set")

        if self.ystatistic not in experiment.statistics:
            raise util.CytoflowViewError(
                "Can't find Y statistic {} in experiment".format(
                    self.ystatistic))
        else:
            ystat = experiment.statistics[self.ystatistic]

        if not xstat.index.equals(ystat.index):
            raise util.CytoflowViewError(
                "X statistic and Y statistic must have "
                "the same indices: {}".format(xstat.index.names))

        if self.x_error_statistic[0]:
            if self.x_error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    "X error statistic not in experiment")
            else:
                x_error_stat = experiment.statistics[self.x_error_statistic]

            if not x_error_stat.index.equals(xstat.index):
                raise util.CytoflowViewError(
                    "X error statistic doesn't have the "
                    "same indices as the X statistic")
        else:
            x_error_stat = None

        if self.y_error_statistic[0]:
            if self.y_error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    "Y error statistic not in experiment")
            else:
                y_error_stat = experiment.statistics[self.y_error_statistic]

            if not y_error_stat.index.equals(ystat.index):
                raise util.CytoflowViewError(
                    "Y error statistic doesn't have the "
                    "same indices as the Y statistic")
        else:
            y_error_stat = None

        col_wrap = kwargs.pop('col_wrap', None)

        if col_wrap and self.yfacet:
            raise util.CytoflowViewError(
                "Can't set yfacet and col_wrap at the same time.")

        data = pd.DataFrame(index=xstat.index)

        xname = util.random_string(6)
        data[xname] = xstat

        yname = util.random_string(6)
        data[yname] = ystat

        if x_error_stat is not None:
            #x_error_data = x_error_stat.reset_index()
            x_error_name = util.random_string(6)
            data[x_error_name] = x_error_stat

        if y_error_stat is not None:
            y_error_name = util.random_string(6)
            data[y_error_name] = y_error_stat

        if y_error_stat is not None:
            y_error_data = y_error_stat.reset_index()
            y_error_name = util.random_string()
            data[y_error_name] = y_error_data[y_error_stat.name]

        if self.subset:
            try:
                # TODO - either sanitize column names, or check to see that
                # all conditions are valid Python variables
                data = data.query(self.subset)
            except:
                raise util.CytoflowViewError(
                    "Subset string '{0}' isn't valid".format(self.subset))

            if len(data) == 0:
                raise util.CytoflowViewError(
                    "Subset string '{0}' returned no values".format(
                        self.subset))

        names = list(data.index.names)
        for name in names:
            unique_values = data.index.get_level_values(name).unique()
            if len(unique_values) == 1:
                warn("Only one value for level {}; dropping it.".format(name),
                     util.CytoflowViewWarning)
                try:
                    data.index = data.index.droplevel(name)
                except AttributeError:
                    raise util.CytoflowViewError("Must have more than one "
                                                 "value to plot.")

        names = list(data.index.names)

        if not self.variable in experiment.conditions:
            raise util.CytoflowViewError(
                "Variable {} not in experiment".format(self.variable))

        if not self.variable in data.index.names:
            raise util.CytoflowViewError(
                "Variable {} not in statistic; must be one of {}".format(
                    self.variable, data.index.names))

        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "X facet {} not in the experiment".format(self.xfacet))

        if self.xfacet and self.xfacet not in data.index.names:
            raise util.CytoflowViewError(
                "X facet {} not in statistics; must be one of {}".format(
                    self.xfacet, data.index.names))

        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Y facet {} not in the experiment".format(self.yfacet))

        if self.yfacet and self.yfacet not in data.index.names:
            raise util.CytoflowViewError(
                "Y facet {} not in statistics; must be one of {}".format(
                    self.yfacet, data.index.names))

        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError(
                "Hue facet {} not in the experiment".format(self.huefacet))

        if self.huefacet and self.huefacet not in data.index.names:
            raise util.CytoflowViewError(
                "Hue facet {} not in statistics; must be one of {}".format(
                    self.huefacet, data.index.names))

        col_wrap = kwargs.pop('col_wrap', None)

        if col_wrap and self.yfacet:
            raise util.CytoflowViewError(
                "Can't set yfacet and col_wrap at the same time.")

        if col_wrap and not self.xfacet:
            raise util.CytoflowViewError("Must set xfacet to use col_wrap.")

        facets = filter(
            lambda x: x,
            [self.variable, self.xfacet, self.yfacet, self.huefacet])
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError("Can't reuse facets")

        unused_names = list(set(names) - set(facets))

        if plot_name is not None and not unused_names:
            raise util.CytoflowViewError("You specified a plot name, but all "
                                         "the facets are already used")

        data.reset_index(inplace=True)

        if unused_names:
            groupby = data.groupby(unused_names)

            if plot_name is None:
                raise util.CytoflowViewError(
                    "You must use facets {} in either the "
                    "plot variables or the plot name. "
                    "Possible plot names: {}".format(unused_names,
                                                     groupby.groups.keys()))

            if plot_name not in set(groupby.groups.keys()):
                raise util.CytoflowViewError(
                    "Plot {} not from plot_enum; must "
                    "be one of {}".format(plot_name, groupby.groups.keys()))

            data = groupby.get_group(plot_name)

        if self.x_error_statistic is not None:
            xscale = util.scale_factory(self.xscale,
                                        experiment,
                                        statistic=self.xstatistic)
        else:
            xscale = util.scale_factory(self.xscale,
                                        experiment,
                                        statistic=self.x_error_statistic)

        if self.y_error_statistic is not None:
            yscale = util.scale_factory(self.yscale,
                                        experiment,
                                        statistic=self.ystatistic)
        else:
            yscale = util.scale_factory(self.yscale,
                                        experiment,
                                        statistic=self.y_error_statistic)

        xlim = kwargs.pop("xlim", None)
        if xlim is None:
            xlim = (xscale.clip(data[xname].min() * 0.9),
                    xscale.clip(data[xname].max() * 1.1))

            if x_error_stat is not None:
                try:
                    xlim = (xscale.clip(
                        min([x[0] for x in x_error_stat]) * 0.9),
                            xscale.clip(
                                max([x[1] for x in x_error_stat]) * 1.1))
                except IndexError:
                    xlim = (xscale.clip(x_error_stat.min() * 0.9),
                            xscale.clip(x_error_stat.max() * 1.1))

        ylim = kwargs.pop("ylim", None)
        if ylim is None:
            ylim = (yscale.clip(data[yname].min() * 0.9),
                    yscale.clip(data[yname].max() * 1.1))

            if y_error_stat is not None:
                try:
                    ylim = (yscale.clip(
                        min([x[0] for x in y_error_stat]) * 0.9),
                            yscale.clip(
                                max([x[1] for x in y_error_stat]) * 1.1))
                except IndexError:
                    ylim = (yscale.clip(y_error_stat.min() * 0.9),
                            yscale.clip(y_error_stat.max() * 1.1))

        kwargs.setdefault('antialiased', True)

        cols = col_wrap if col_wrap else \
               len(data[self.xfacet].unique()) if self.xfacet else 1

        sharex = kwargs.pop('sharex', True)
        sharey = kwargs.pop('sharey', True)

        grid = sns.FacetGrid(data,
                             size=(6 / cols),
                             aspect=1.5,
                             col=(self.xfacet if self.xfacet else None),
                             row=(self.yfacet if self.yfacet else None),
                             hue=(self.huefacet if self.huefacet else None),
                             col_order=(np.sort(data[self.xfacet].unique())
                                        if self.xfacet else None),
                             row_order=(np.sort(data[self.yfacet].unique())
                                        if self.yfacet else None),
                             hue_order=(np.sort(data[self.huefacet].unique())
                                        if self.huefacet else None),
                             col_wrap=col_wrap,
                             legend_out=False,
                             sharex=sharex,
                             sharey=sharey,
                             xlim=xlim,
                             ylim=ylim)

        for ax in grid.axes.flatten():
            ax.set_xscale(self.xscale, **xscale.mpl_params)
            ax.set_yscale(self.yscale, **yscale.mpl_params)

        # plot the error bars first so the axis labels don't get overwritten
        if x_error_stat:
            grid.map(_x_error_bars, xname, yname, x_error_name)

        if y_error_stat:
            grid.map(_y_error_bars, xname, yname, y_error_name)

        grid.map(plt.plot, xname, yname, **kwargs)

        # if we have an xfacet, make sure the y scale is the same for each
        fig = plt.gcf()
        fig_y_min = float("inf")
        fig_y_max = float("-inf")
        for ax in fig.get_axes():
            ax_y_min, ax_y_max = ax.get_ylim()
            if ax_y_min < fig_y_min:
                fig_y_min = ax_y_min
            if ax_y_max > fig_y_max:
                fig_y_max = ax_y_max

        for ax in fig.get_axes():
            ax.set_ylim(fig_y_min, fig_y_max)

        # if we have a yfacet, make sure the x scale is the same for each
        fig = plt.gcf()
        fig_x_min = float("inf")
        fig_x_max = float("-inf")

        for ax in fig.get_axes():
            ax_x_min, ax_x_max = ax.get_xlim()
            if ax_x_min < fig_x_min:
                fig_x_min = ax_x_min
            if ax_x_max > fig_x_max:
                fig_x_max = ax_x_max

        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.

        if self.huefacet:
            current_palette = mpl.rcParams['axes.color_cycle']
            if util.is_numeric(experiment.data[self.huefacet]) and \
               len(grid.hue_names) > len(current_palette):

                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(
                    sns.color_palette("husl", n_colors=len(grid.hue_names)))
                cax, _ = mpl.colorbar.make_axes(plt.gca())
                hue_scale = util.scale_factory(self.huescale,
                                               experiment,
                                               condition=self.huefacet)
                mpl.colorbar.ColorbarBase(cax,
                                          cmap=cmap,
                                          norm=hue_scale.color_norm(),
                                          label=self.huefacet)
                plt.sca(plot_ax)
            else:
                grid.add_legend(title=self.huefacet)

        plt.xlabel(self.xstatistic)
        plt.ylabel(self.ystatistic)

        if unused_names and plot_name is not None:
            plt.title("{0} = {1}".format(unused_names, plot_name))
Beispiel #9
0
class Stats1DView(Base1DStatisticsView):
    """
    Plot a statistic.  The value of the statistic will be plotted on the
    Y axis; a numeric conditioning variable must be chosen for the X axis.
    Every variable in the statistic must be specified as either the `variable`
    or one of the plot facets.
    
    Attributes
    ----------
    name : Str
        The plot's name 
        
    statistic : Tuple(Str, Str)
        The statistic to plot.  The first element is the name of the module that
        added the statistic, and the second element is the name of the statistic.
    
    variable : Str
        the name of the conditioning variable to put on the X axis.  Must be
        numeric (float or int).
        
    xscale : Enum("linear", "log") (default = "linear")
        The scale to use on the X axis
        
    yscale : Enum("linear", "log", "logicle") (default = "linear")
        The scale to use on the Y axis
        
    xfacet : Str
        the conditioning variable for horizontal subplots
        
    yfacet : Str
        the conditioning variable for vertical subplots
        
    huefacet : 
        the conditioning variable for color.
        
    huescale :
        the scale to use on the "hue" axis, if there are many values of
        the hue facet.
        
    error_statistic : Tuple(Str, Str)
        A statistic to use to draw error bars; the bars are +- the value of
        the statistic.
        
    subset : String
        Passed to pandas.DataFrame.query(), to get a subset of the statistic
        before we plot it.

        
    Examples
    --------
    
    Assume we want a Dox induction curve in a transient transfection experiment.  
    We have induced several wells with different amounts of Dox and the output
    of the Dox-inducible channel is "Pacific Blue-A".  We have a constitutive
    expression channel in "PE-Tx-Red-YG-A". We want to bin all the data by
    constitutive expression level, then plot the dose-response (geometric mean)
    curve in each bin. 
    
    >>> ex_bin = flow.BinningOp(name = "CFP_Bin",
    ...                         channel = "PE-Tx-Red-YG-A",
    ...                         scale = "log",
    ...                         bin_width = 0.1).apply(ex)
    >>> ex_stat = flow.ChannelStatisticOp(name = "DoxCFP",
    ...                                   by = ["Dox", "CFP_Bin"],
    ...                                   channel = "Pacific Blue-A",
    ...                                   function = flow.geom_mean).apply(ex_bin)
    >>> view = flow.Stats1DView(name = "Dox vs IFP",
    ...                         statistic = ("DoxCFP", "geom_mean"),
    ...                         variable = "Dox",
    ...                         xscale = "log",
    ...                         huefacet = "CFP_Bin").plot(ex_stat)
    >>> view.plot(ex_stat)
    """

    # traits
    id = "edu.mit.synbio.cytoflow.view.stats1d"
    friendly_id = "1D Statistics View"

    REMOVED_ERROR = "Statistics changed dramatically in 0.5; please see the documentation"
    by = util.Removed(err_string=REMOVED_ERROR)
    yfunction = util.Removed(err_string=REMOVED_ERROR)
    ychannel = util.Removed(err_string=REMOVED_ERROR)
    xvariable = util.Deprecated(new="variable")

    def enum_plots(self, experiment):
        """
        Returns an iterator over the possible plots that this View can
        produce.  The values returned can be passed to "plot".
        """

        return super().enum_plots(experiment)

    def plot(self, experiment, plot_name=None, **kwargs):
        """Plot a chart of a variable's values against a statistic.
        
        Parameters
        ----------
        
        color : a matplotlib color
            The color to plot with.  Overridden if `huefacet` is not `None`
            
        linestyle : ['solid' | 'dashed', 'dashdot', 'dotted' | (offset, on-off-dash-seq) | '-' | '--' | '-.' | ':' | 'None' | ' ' | '']
            
        marker : a matplotlib marker style
            See http://matplotlib.org/api/markers_api.html#module-matplotlib.markers
            
        markersize : int
            The marker size in points
            
        markerfacecolor : a matplotlib color
            The color to make the markers.  Overridden (?) if `huefacet` is not `None`
            
        alpha : the alpha blending value, from 0.0 (transparent) to 1.0 (opaque)
        
        Other Parameters
        ----------------
        
        Other `kwargs` are passed to matplotlib.pyplot.plot_.
    
        .. _matplotlib.pyplot.hist: https://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.plot.html
        
        See Also
        --------
        BaseView.plot : common parameters for data views
        
        """

        super().plot(experiment, plot_name, **kwargs)

    def _grid_plot(self, experiment, grid, xlim, ylim, xscale, yscale,
                   **kwargs):

        data = grid.data

        stat = experiment.statistics[self.statistic]
        stat_name = stat.name
        if self.error_statistic[0]:
            err_stat = experiment.statistics[self.error_statistic]
            err_stat_name = err_stat.name

        xlim = kwargs.pop("xlim", None)
        if xlim is None:
            xlim = (xscale.clip(data[self.variable].min() * 0.9),
                    xscale.clip(data[self.variable].max() * 1.1))

        ylim = kwargs.pop("ylim", None)
        if ylim is None:
            ylim = (yscale.clip(data[stat_name].min() * 0.9),
                    yscale.clip(data[stat_name].max() * 1.1))

            if self.error_statistic[0]:
                try:
                    ylim = (yscale.clip(
                        min([x[0] for x in data[err_stat_name]]) * 0.9),
                            yscale.clip(
                                max([x[1]
                                     for x in data[err_stat_name]]) * 1.1))
                except IndexError:
                    ylim = (yscale.clip(
                        (data[stat_name].min() - data[err_stat_name].min()) *
                        0.9),
                            yscale.clip((data[stat_name].max() +
                                         data[err_stat_name].max()) * 1.1))

        # plot the error bars first so the axis labels don't get overwritten
        if self.error_statistic[0]:
            grid.map(_error_bars, self.variable, stat_name, err_stat_name,
                     **kwargs)

        grid.map(plt.plot, self.variable, stat_name, **kwargs)

        return {'xlim': xlim, 'ylim': ylim}
Beispiel #10
0
class BarChartView(HasStrictTraits):
    """Plots a bar chart of some summary statistic
    
    Attributes
    ----------
    name : Str
        The bar chart's name 
    
    statistic : Tuple(Str, Str)
        the statistic we're plotting
        
    scale : Enum("linear", "log", "logicle") (default = "linear")
        The scale to use on the Y axis.
        
    variable : Str
        the name of the conditioning variable to group the chart's bars
        
    error_statistic : Tuple(Str, Str)
        if specified, a statistic to draw error bars.  if values are numeric,
        the bars are drawn +/- the value.  if the values are tuples, then
        the first element is the low error and the second element is the
        high error.
        
    xfacet : Str
        the conditioning variable for horizontal subplots
        
    yfacet : Str
        the conditioning variable for vertical subplots
        
    huefacet : Str
        the conditioning variable to make multiple bar colors
        
    orientation : Enum("horizontal", "vertical")
        do we plot the bar chart horizontally or vertically?
        TODO - waiting on seaborn v0.6
        
    subset : String
        Passed to pandas.DataFrame.query(), to get a subset of the statistic
        before we plot it.
        
    Examples
    --------
    >>> bar = flow.BarChartView()
    >>> bar.name = "Bar Chart"
    >>> bar.channel = 'Y2-A'
    >>> bar.variable = 'Y2-A+'
    >>> bar.huefacet = 'Dox'
    >>> bar.function = len
    >>> bar.plot(ex)
    """

    # traits
    id = "edu.mit.synbio.cytoflow.view.barchart"
    friendly_id = "Bar Chart"

    REMOVED_ERROR = "Statistics have changed dramatically in 0.5; please see the documentation"
    channel = util.Removed(err_string=REMOVED_ERROR)
    function = util.Removed(err_string=REMOVED_ERROR)
    error_bars = util.Removed(err_string=REMOVED_ERROR)

    by = util.Deprecated(new='variable')

    name = Str
    statistic = Tuple(Str, Str)
    scale = util.ScaleEnum
    variable = Str
    orientation = Enum("vertical", "horizontal")

    xfacet = Str
    yfacet = Str
    huefacet = Str

    error_statistic = Tuple(Str, Str)
    subset = Str

    def enum_plots(self, experiment):
        """
        Returns an iterator over the possible plots that this View can
        produce.  The values returned can be passed to "plot".
        """

        # TODO - all this is copied from below.  can we abstract it out somehow?

        if not experiment:
            raise util.CytoflowViewError("No experiment specified")

        if self.statistic not in experiment.statistics:
            raise util.CytoflowViewError(
                "Can't find the statistic {} in the experiment".format(
                    self.statistic))
        else:
            stat = experiment.statistics[self.statistic]

        if self.error_statistic[0]:
            if self.error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    "Can't find the error statistic in the experiment")
            else:
                error_stat = experiment.statistics[self.error_statistic]
        else:
            error_stat = None

        if error_stat is not None:
            if not stat.index.equals(error_stat.index):
                raise util.CytoflowViewError(
                    "Data statistic and error statistic "
                    " don't have the same index.")

        data = pd.DataFrame(index=stat.index)

        data[stat.name] = stat

        if error_stat is not None:
            error_name = util.random_string(6)
            data[error_name] = error_stat
        else:
            error_name = None

        if self.subset:
            try:
                data = data.query(self.subset)
            except:
                raise util.CytoflowViewError(
                    "Subset string '{0}' isn't valid".format(self.subset))

            if len(data) == 0:
                raise util.CytoflowViewError(
                    "Subset string '{0}' returned no values".format(
                        self.subset))

        names = list(data.index.names)

        for name in names:
            unique_values = data.index.get_level_values(name).unique()
            if len(unique_values) == 1:
                warn("Only one value for level {}; dropping it.".format(name),
                     util.CytoflowViewWarning)
                try:
                    data.index = data.index.droplevel(name)
                except AttributeError:
                    raise util.CytoflowViewError("Must have more than one "
                                                 "value to plot.")

        names = list(data.index.names)

        if not self.variable:
            raise util.CytoflowViewError("variable not specified")

        if not self.variable in data.index.names:
            raise util.CytoflowViewError("Variable {} isn't in the statistic; "
                                         "must be one of {}".format(
                                             self.variable, data.index.names))

        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "X facet {0} isn't in the experiment".format(self.xfacet))

        if self.xfacet and self.xfacet not in data.index.names:
            raise util.CytoflowViewError(
                "X facet {} is not a statistic index; "
                "must be one of {}".format(self.xfacet, data.index.names))

        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Y facet {0} isn't in the experiment".format(self.yfacet))

        if self.yfacet and self.yfacet not in data.index.names:
            raise util.CytoflowViewError(
                "Y facet {} is not a statistic index; "
                "must be one of {}".format(self.yfacet, data.index.names))

        if self.huefacet and self.huefacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Hue facet {0} isn't in the experiment".format(self.huefacet))

        if self.huefacet and self.huefacet not in data.index.names:
            raise util.CytoflowViewError(
                "Hue facet {} is not a statistic index; "
                "must be one of {}".format(self.huefacet, data.index.names))

        facets = filter(
            lambda x: x,
            [self.variable, self.xfacet, self.yfacet, self.huefacet])
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError("Can't reuse facets")

        by = list(set(names) - set(facets))

        class plot_enum(object):
            def __init__(self, experiment, by):
                self._iter = None
                self._returned = False

                if by:
                    self._iter = experiment.data.groupby(by).__iter__()

            def __iter__(self):
                return self

            def next(self):
                if self._iter:
                    return self._iter.next()[0]
                else:
                    if self._returned:
                        raise StopIteration
                    else:
                        self._returned = True
                        return None

        return plot_enum(experiment, by)

    def plot(self, experiment, plot_name=None, **kwargs):
        """Plot a bar chart"""

        if not experiment:
            raise util.CytoflowViewError("No experiment specified")

        if self.statistic not in experiment.statistics:
            raise util.CytoflowViewError(
                "Can't find the statistic {} in the experiment".format(
                    self.statistic))
        else:
            stat = experiment.statistics[self.statistic]

        if self.error_statistic[0]:
            if self.error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    "Can't find the error statistic in the experiment")
            else:
                error_stat = experiment.statistics[self.error_statistic]
        else:
            error_stat = None

        if error_stat is not None:
            if not stat.index.equals(error_stat.index):
                raise util.CytoflowViewError(
                    "Data statistic and error statistic "
                    " don't have the same index.")

        data = pd.DataFrame(index=stat.index)

        data[stat.name] = stat

        if error_stat is not None:
            error_name = util.random_string(6)
            data[error_name] = error_stat
        else:
            error_name = None

        if self.subset:
            try:
                data = data.query(self.subset)
            except:
                raise util.CytoflowViewError(
                    "Subset string '{0}' isn't valid".format(self.subset))

            if len(data) == 0:
                raise util.CytoflowViewError(
                    "Subset string '{0}' returned no values".format(
                        self.subset))

        names = list(data.index.names)
        for name in names:
            unique_values = data.index.get_level_values(name).unique()
            if len(unique_values) == 1:
                warn("Only one value for level {}; dropping it.".format(name),
                     util.CytoflowViewWarning)
                try:
                    data.index = data.index.droplevel(name)
                except AttributeError:
                    raise util.CytoflowViewError("Must have more than one "
                                                 "value to plot.")

        names = list(data.index.names)

        if not self.variable:
            raise util.CytoflowViewError("variable not specified")

        if not self.variable in names:
            raise util.CytoflowViewError("Variable {} isn't in the statistic; "
                                         "must be one of {}".format(
                                             self.variable, names))

        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "X facet {0} isn't in the experiment".format(self.xfacet))

        if self.xfacet and self.xfacet not in names:
            raise util.CytoflowViewError(
                "X facet {} is not a statistic index; "
                "must be one of {}".format(self.xfacet, names))

        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Y facet {0} isn't in the experiment".format(self.yfacet))

        if self.yfacet and self.yfacet not in names:
            raise util.CytoflowViewError(
                "Y facet {} is not a statistic index; "
                "must be one of {}".format(self.yfacet, names))

        if self.huefacet and self.huefacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Hue facet {0} isn't in the experiment".format(self.huefacet))

        if self.huefacet and self.huefacet not in names:
            raise util.CytoflowViewError(
                "Hue facet {} is not a statistic index; "
                "must be one of {}".format(self.huefacet, names))

        col_wrap = kwargs.pop('col_wrap', None)

        if col_wrap and self.yfacet:
            raise util.CytoflowViewError(
                "Can't set yfacet and col_wrap at the same time.")

        if col_wrap and not self.xfacet:
            raise util.CytoflowViewError("Must set xfacet to use col_wrap.")

        facets = filter(
            lambda x: x,
            [self.variable, self.xfacet, self.yfacet, self.huefacet])
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError("Can't reuse facets")

        unused_names = list(set(names) - set(facets))

        if plot_name is not None and not unused_names:
            raise util.CytoflowViewError("You specified a plot name, but all "
                                         "the facets are already used")

        data.reset_index(inplace=True)
        if unused_names:
            groupby = data.groupby(unused_names)

            if plot_name is None:
                raise util.CytoflowViewError(
                    "You must use facets {} in either the "
                    "plot variables or the plot name. "
                    "Possible plot names: {}".format(unused_names,
                                                     groupby.groups.keys()))

            if plot_name not in set(groupby.groups.keys()):
                raise util.CytoflowViewError(
                    "Plot {} not from plot_enum; must "
                    "be one of {}".format(plot_name, groupby.groups.keys()))

            data = groupby.get_group(plot_name)

        sharex = kwargs.pop('sharex', True)
        sharey = kwargs.pop('sharey', True)

        cols = col_wrap if col_wrap else \
               len(data[self.xfacet].unique()) if self.xfacet else 1

        g = sns.FacetGrid(data,
                          size=(6 / cols),
                          aspect=1.5,
                          col=(self.xfacet if self.xfacet else None),
                          row=(self.yfacet if self.yfacet else None),
                          col_order=(np.sort(data[self.xfacet].unique())
                                     if self.xfacet else None),
                          row_order=(np.sort(data[self.yfacet].unique())
                                     if self.yfacet else None),
                          col_wrap=col_wrap,
                          legend_out=False,
                          sharex=sharex,
                          sharey=sharey)

        scale = util.scale_factory(self.scale,
                                   experiment,
                                   statistic=self.statistic)

        # because the bottom of a bar chart is "0", masking out bad
        # values on a log scale doesn't work.  we must clip instead.
        if self.scale == "log":
            scale.mode = "clip"

        # set the scale for each set of axes; can't just call plt.xscale()
        for ax in g.axes.flatten():
            if self.orientation == 'horizontal':
                ax.set_xscale(self.scale, **scale.mpl_params)
            else:
                ax.set_yscale(self.scale, **scale.mpl_params)

        map_args = [self.variable, stat.name]

        if self.huefacet:
            map_args.append(self.huefacet)

        if error_stat is not None:
            map_args.append(error_name)

        g.map(_barplot,
              *map_args,
              view=self,
              stat_name=stat.name,
              error_name=error_name,
              **kwargs)

        if sharex:
            # if are sharing axes make sure the x scale is the same for each
            fig = plt.gcf()
            fig_x_min = float("inf")
            fig_x_max = float("-inf")

            for ax in fig.get_axes():
                ax_x_min, ax_x_max = ax.get_xlim()
                if ax_x_min < fig_x_min:
                    fig_x_min = ax_x_min
                if ax_x_max > fig_x_max:
                    fig_x_max = ax_x_max

            for ax in fig.get_axes():
                ax.set_xlim(fig_x_min, fig_x_max)

        if sharey:
            # if we are sharing y axes, make sure the y scale is the same for each
            fig = plt.gcf()
            fig_y_min = float("inf")
            fig_y_max = float("-inf")

            for ax in fig.get_axes():
                ax_y_min, ax_y_max = ax.get_ylim()
                if ax_y_min < fig_y_min:
                    fig_y_min = ax_y_min
                if ax_y_max > fig_y_max:
                    fig_y_max = ax_y_max

            for ax in fig.get_axes():
                ax.set_ylim(fig_y_min, fig_y_max)

        if self.huefacet:
            labels = np.sort(data[self.huefacet].unique())
            labels = [str(x) for x in labels]
            g.add_legend(title=self.huefacet, label_order=labels)

        if self.orientation == 'horizontal':
            plt.sca(fig.get_axes()[0])
            plt.xlabel(self.statistic)
        else:
            plt.sca(fig.get_axes()[0])
            plt.ylabel(self.statistic)

        if unused_names and plot_name is not None:
            plt.title("{0} = {1}".format(unused_names, plot_name))
Beispiel #11
0
class Base1DStatisticsView(BaseStatisticsView):
    """
    The base class for 1-dimensional statistic views -- ie, the :attr:`variable`
    attribute is on the x axis, and the statistic value is on the y axis.
    
    Attributes
    ----------
    statistic : (str, str)
        The name of the statistic to plot.  Must be a key in the  
        :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment`
        being plotted.
        
    error_statistic : (str, str)
        The name of the statistic used to plot error bars.  Must be a key in the
        :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment`
        being plotted.
    """
    
    REMOVED_ERROR = "Statistics changed dramatically in 0.5; please see the documentation"
    by = util.Removed(err_string = REMOVED_ERROR)
    yfunction = util.Removed(err_string = REMOVED_ERROR)
    ychannel = util.Removed(err_string = REMOVED_ERROR)
    channel = util.Removed(err_string = REMOVED_ERROR)
    function = util.Removed(err_string = REMOVED_ERROR)
    error_bars = util.Removed(err_string = REMOVED_ERROR)
    
    xvariable = util.Deprecated(new = "variable")
    
    statistic = Tuple(Str, Str)
    error_statistic = Tuple(Str, Str)
    
    def enum_plots(self, experiment):
        data = self._make_data(experiment)
        return super().enum_plots(experiment, data)
    
    def plot(self, experiment, plot_name = None, **kwargs):       
        data = self._make_data(experiment)
        
        if not self.variable:
            raise util.CytoflowViewError('variable',
                                         "variable not set")
            
        if self.variable not in experiment.conditions:
            raise util.CytoflowViewError('variable',
                                         "variable {0} not in the experiment"
                                    .format(self.variable))
            
        if util.is_numeric(experiment[self.variable]):
            xscale = util.scale_factory(self.xscale, experiment, condition = self.variable)
        else:
            xscale = None 
        
        yscale = util.scale_factory(self.yscale, 
                                    experiment, 
                                    statistic = self.statistic, 
                                    error_statistic = self.error_statistic)
            
        super().plot(experiment, 
                     data, 
                     plot_name, 
                     xscale = xscale, 
                     yscale = yscale, 
                     **kwargs)
        
    def _make_data(self, experiment):
        if experiment is None:
            raise util.CytoflowViewError('experiment', "No experiment specified")
        
        if not self.statistic:
            raise util.CytoflowViewError('statistic', "Statistic not set")
        
        if self.statistic not in experiment.statistics:
            raise util.CytoflowViewError('statistic',
                                         "Can't find the statistic {} in the experiment"
                                         .format(self.statistic))
        else:
            stat = experiment.statistics[self.statistic]
            
        if not util.is_numeric(stat):
            raise util.CytoflowViewError('statistic',
                                         "Statistic must be numeric")
            
        if self.error_statistic[0]:
            if self.error_statistic not in experiment.statistics:
                raise util.CytoflowViewError('error_statistic',
                                             "Can't find the error statistic in the experiment")
            else:
                error_stat = experiment.statistics[self.error_statistic]
        else:
            error_stat = None
         
        if error_stat is not None:
            if not stat.index.equals(error_stat.index):
                raise util.CytoflowViewError('error_statistic',
                                             "Data statistic and error statistic "
                                             " don't have the same index.")
               
            if stat.name == error_stat.name:
                raise util.CytoflowViewError('error_statistic',
                                             "Data statistic and error statistic can "
                                             "not have the same name.")
               
        data = pd.DataFrame(index = stat.index)
        data[stat.name] = stat
        
        if error_stat is not None:
            data[error_stat.name] = error_stat
            
        return data
Beispiel #12
0
class Stats1DView(HasStrictTraits):
    """
    Plot a statistic.  The value of the statistic will be plotted on the
    Y axis; a numeric conditioning variable must be chosen for the X axis.
    Every variable in the statistic must be specified as either the `variable`
    or one of the plot facets.
    
    Attributes
    ----------
    name : Str
        The plot's name 
        
    statistic : Tuple(Str, Str)
        The statistic to plot.  The first element is the name of the module that
        added the statistic, and the second element is the name of the statistic.
    
    variable : Str
        the name of the conditioning variable to put on the X axis.  Must be
        numeric (float or int).
        
    xscale : Enum("linear", "log") (default = "linear")
        The scale to use on the X axis
        
    yscale : Enum("linear", "log", "logicle") (default = "linear")
        The scale to use on the Y axis
        
    xfacet : Str
        the conditioning variable for horizontal subplots
        
    yfacet : Str
        the conditioning variable for vertical subplots
        
    huefacet : 
        the conditioning variable for color.
        
    huescale :
        the scale to use on the "hue" axis, if there are many values of
        the hue facet.
        
    error_statistic : Tuple(Str, Str)
        A statistic to use to draw error bars; the bars are +- the value of
        the statistic.
        
    subset : String
        Passed to pandas.DataFrame.query(), to get a subset of the statistic
        before we plot it.

        
    Examples
    --------
    
    Assume we want a Dox induction curve in a transient transfection experiment.  
    We have induced several wells with different amounts of Dox and the output
    of the Dox-inducible channel is "Pacific Blue-A".  We have a constitutive
    expression channel in "PE-Tx-Red-YG-A". We want to bin all the data by
    constitutive expression level, then plot the dose-response (geometric mean)
    curve in each bin. 
    
    >>> ex_bin = flow.BinningOp(name = "CFP_Bin",
    ...                         channel = "PE-Tx-Red-YG-A",
    ...                         scale = "log",
    ...                         bin_width = 0.1).apply(ex)
    >>> ex_stat = flow.ChannelStatisticOp(name = "DoxCFP",
    ...                                   by = ["Dox", "CFP_Bin"],
    ...                                   channel = "Pacific Blue-A",
    ...                                   function = flow.geom_mean).apply(ex_bin)
    >>> view = flow.Stats1DView(name = "Dox vs IFP",
    ...                         statistic = ("DoxCFP", "geom_mean"),
    ...                         variable = "Dox",
    ...                         xscale = "log",
    ...                         huefacet = "CFP_Bin").plot(ex_stat)
    >>> view.plot(ex_stat)
    """
    
    # traits   
    id = "edu.mit.synbio.cytoflow.view.stats1d"
    friendly_id = "1D Statistics View" 
    
    REMOVED_ERROR = "Statistics have changed dramatically in 0.5; please see the documentation"
    by = util.Removed(err_string = REMOVED_ERROR)
    yfunction = util.Removed(err_string = REMOVED_ERROR)
    ychannel = util.Removed(err_string = REMOVED_ERROR)
    xvariable = util.Deprecated(new = "variable")
    
    name = Str
    statistic = Tuple(Str, Str)
    variable = Str
    xscale = util.ScaleEnum
    yscale = util.ScaleEnum
    xfacet = Str
    yfacet = Str
    huefacet = Str
    huescale = util.ScaleEnum # TODO - make this actually work
    
    error_statistic = Tuple(Str, Str)
    subset = Str
    
    def enum_plots(self, experiment):
        """
        Returns an iterator over the possible plots that this View can
        produce.  The values returned can be passed to "plot".
        """
        
        # TODO - all this is copied from below.  can we abstract it out somehow?
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if self.statistic not in experiment.statistics:
            raise util.CytoflowViewError("Can't find the statistic {} in the experiment"
                                         .format(self.statistic))
        else:
            stat = experiment.statistics[self.statistic]
            
        if self.error_statistic[0]:
            if self.error_statistic not in experiment.statistics:
                raise util.CytoflowViewError("Can't find the error statistic in the experiment")
            else:
                error_stat = experiment.statistics[self.error_statistic]
        else:
            error_stat = None
         
        if error_stat is not None:
            if not stat.index.equals(error_stat.index):
                raise util.CytoflowViewError("Data statistic and error statistic "
                                             " don't have the same index.")

        data = pd.DataFrame(index = stat.index)
        
        data[stat.name] = stat
                
        if error_stat is not None:
            error_name = util.random_string(6)
            data[error_name] = error_stat 
        else:
            error_name = None
            
        if self.subset:
            try:
                data = data.query(self.subset)
            except:
                raise util.CytoflowViewError("Subset string '{0}' isn't valid"
                                        .format(self.subset))
                
            if len(data) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no values"
                                        .format(self.subset))
            
        names = list(data.index.names)
        for name in names:
            unique_values = data.index.get_level_values(name).unique()
            if len(unique_values) == 1:
                warn("Only one value for level {}; dropping it.".format(name),
                     util.CytoflowViewWarning)
                try:
                    data.index = data.index.droplevel(name)
                except AttributeError:
                    raise util.CytoflowViewError("Must have more than one "
                                                 "value to plot.")
                
        names = list(data.index.names)
                        
        if not self.variable:
            raise util.CytoflowViewError("variable not specified")
        
        if not self.variable in data.index.names:
            raise util.CytoflowViewError("Variable {} isn't in the statistic; "
                                         "must be one of {}"
                                         .format(self.variable, data.index.names))
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} isn't in the experiment"
                                    .format(self.xfacet))
            
        if self.xfacet and self.xfacet not in data.index.names:
            raise util.CytoflowViewError("X facet {} is not a statistic index; "
                                         "must be one of {}".format(self.xfacet, data.index.names))
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} isn't in the experiment"
                                    .format(self.yfacet))

        if self.yfacet and self.yfacet not in data.index.names:
            raise util.CytoflowViewError("Y facet {} is not a statistic index; "
                                         "must be one of {}".format(self.yfacet, data.index.names))

        if self.huefacet and self.huefacet not in experiment.conditions:
            raise util.CytoflowViewError("Hue facet {0} isn't in the experiment"
                                    .format(self.huefacet))
            
        if self.huefacet and self.huefacet not in data.index.names:
            raise util.CytoflowViewError("Hue facet {} is not a statistic index; "
                                         "must be one of {}".format(self.huefacet, data.index.names)) 
            
        facets = filter(lambda x: x, [self.variable, self.xfacet, self.yfacet, self.huefacet])
        
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError("Can't reuse facets")
        
        by = list(set(names) - set(facets))
        
        class plot_enum(object):
            
            def __init__(self, experiment, by):
                self._iter = None
                self._returned = False
                
                if by:
                    self._iter = experiment.data.groupby(by).__iter__()
                
            def __iter__(self):
                return self
            
            def next(self):
                if self._iter:
                    return self._iter.next()[0]
                else:
                    if self._returned:
                        raise StopIteration
                    else:
                        self._returned = True
                        return None
            
        return plot_enum(experiment, by)
    
    def plot(self, experiment, plot_name = None, **kwargs):
        """Plot a chart"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if not self.statistic:
            raise util.CytoflowViewError("Statistic not set")
        
        if self.statistic not in experiment.statistics:
            raise util.CytoflowViewError("Can't find the statistic {} in the experiment"
                                         .format(self.statistic))
        else:
            stat = experiment.statistics[self.statistic]
            
        if self.error_statistic[0]:
            if self.error_statistic not in experiment.statistics:
                raise util.CytoflowViewError("Can't find the error statistic in the experiment")
            else:
                error_stat = experiment.statistics[self.error_statistic]
        else:
            error_stat = None
         
        if error_stat is not None:
            if not stat.index.equals(error_stat.index):
                raise util.CytoflowViewError("Data statistic and error statistic "
                                             " don't have the same index.")
               
        data = pd.DataFrame(index = stat.index)
        data[stat.name] = stat
        
        if error_stat is not None:
            error_name = util.random_string(6)
            data[error_name] = error_stat
        
        if self.subset:
            try:
                # TODO - either sanitize column names, or check to see that
                # all conditions are valid Python variables
                data = data.query(self.subset)
            except:
                raise util.CytoflowViewError("Subset string '{0}' isn't valid"
                                        .format(self.subset))
                
            if len(data) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no values"
                                        .format(self.subset))
                
        names = list(data.index.names)
        for name in names:
            unique_values = data.index.get_level_values(name).unique()
            if len(unique_values) == 1:
                warn("Only one value for level {}; dropping it.".format(name),
                     util.CytoflowViewWarning)
                try:
                    data.index = data.index.droplevel(name)
                except AttributeError:
                    raise util.CytoflowViewError("Must have more than one "
                                                 "value to plot.")

        names = list(data.index.names)
               
        if not self.variable:
            raise util.CytoflowViewError("X variable not set")
            
        if self.variable not in experiment.conditions:
            raise util.CytoflowViewError("X variable {0} not in the experiment"
                                    .format(self.variable))
                        
        if self.variable not in names:
            raise util.CytoflowViewError("X variable {} is not a statistic index; "
                                         "must be one of {}".format(self.variable, names))
                
        if experiment.conditions[self.variable].dtype.kind not in "biufc": 
            raise util.CytoflowViewError("X variable {0} isn't numeric"
                                    .format(self.variable))
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment")
        
        if self.xfacet and self.xfacet not in names:
            raise util.CytoflowViewError("X facet {} is not a statistic index; "
                                         "must be one of {}".format(self.xfacet, names))
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment")
        
        if self.yfacet and self.yfacet not in names:
            raise util.CytoflowViewError("Y facet {} is not a statistic index; "
                                         "must be one of {}".format(self.yfacet, names))
        
        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment")   
        
        if self.huefacet and self.huefacet not in names:
            raise util.CytoflowViewError("Hue facet {} is not a statistic index; "
                                         "must be one of {}".format(self.huefacet, names))  
            
        col_wrap = kwargs.pop('col_wrap', None)
        
        if col_wrap and self.yfacet:
            raise util.CytoflowViewError("Can't set yfacet and col_wrap at the same time.") 
        
        if col_wrap and not self.xfacet:
            raise util.CytoflowViewError("Must set xfacet to use col_wrap.")
            
        facets = filter(lambda x: x, [self.variable, self.xfacet, self.yfacet, self.huefacet])
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError("Can't reuse facets")
        
        unused_names = list(set(names) - set(facets))

        if unused_names and plot_name is None:
            for plot in self.enum_plots(experiment):
                self.plot(experiment, plot, **kwargs)
            return

        data.reset_index(inplace = True)
        
        if plot_name is not None:
            if plot_name is not None and not unused_names:
                raise util.CytoflowViewError("Plot {} not from plot_enum"
                                             .format(plot_name))
                               
            groupby = data.groupby(unused_names)

            if plot_name not in set(groupby.groups.keys()):
                raise util.CytoflowViewError("Plot {} not from plot_enum"
                                             .format(plot_name))
                
            data = groupby.get_group(plot_name)
            data.reset_index(drop = True, inplace = True)
            
        xscale = util.scale_factory(self.xscale, experiment, condition = self.variable) 
        
        if error_stat is not None:
            yscale = util.scale_factory(self.yscale, experiment, statistic = self.error_statistic)
        else:
            yscale = util.scale_factory(self.yscale, experiment, statistic = self.statistic)
                        
        xlim = kwargs.pop("xlim", None)
        if xlim is None:
            xlim = (xscale.clip(data[self.variable].min() * 0.9),
                    xscale.clip(data[self.variable].max() * 1.1))
                      
        ylim = kwargs.pop("ylim", None)
        if ylim is None:
            ylim = (yscale.clip(data[stat.name].min() * 0.9),
                    yscale.clip(data[stat.name].max() * 1.1))
            
            if error_stat is not None:
                try: 
                    ylim = (yscale.clip(min([x[0] for x in error_stat]) * 0.9),
                            yscale.clip(max([x[1] for x in error_stat]) * 1.1))
                except IndexError:
                    ylim = (yscale.clip(error_stat.min() * 0.9), 
                            yscale.clip(error_stat.max() * 1.1))

        kwargs.setdefault('antialiased', True)  
        
        cols = col_wrap if col_wrap else \
               len(data[self.xfacet].unique()) if self.xfacet else 1
               
        sharex = kwargs.pop('sharex', True)
        sharey = kwargs.pop('sharey', True)
                  
        grid = sns.FacetGrid(data,
                             size = (6 / cols),
                             aspect = 1.5,
                             col = (self.xfacet if self.xfacet else None),
                             row = (self.yfacet if self.yfacet else None),
                             hue = (self.huefacet if self.huefacet else None),
                             col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                             row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                             hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                             col_wrap = col_wrap,
                             legend_out = False,
                             sharex = sharex,
                             sharey = sharey,
                             xlim = xlim,
                             ylim = ylim)

        for ax in grid.axes.flatten():
            ax.set_xscale(self.xscale, **xscale.mpl_params)
            ax.set_yscale(self.yscale, **yscale.mpl_params)

        # plot the error bars first so the axis labels don't get overwritten
        if error_stat is not None:
            grid.map(_error_bars, self.variable, stat.name, error_name, **kwargs)
        
        grid.map(plt.plot, self.variable, stat.name, **kwargs)
        
        # if we are sharing y axes, make sure the y scale is the same for each
        if sharey:
            fig = plt.gcf()
            fig_y_min = float("inf")
            fig_y_max = float("-inf")
            
            for ax in fig.get_axes():
                ax_y_min, ax_y_max = ax.get_ylim()
                if ax_y_min < fig_y_min:
                    fig_y_min = ax_y_min
                if ax_y_max > fig_y_max:
                    fig_y_max = ax_y_max
                    
            for ax in fig.get_axes():
                ax.set_ylim(fig_y_min, fig_y_max)
            
        # if we are sharing x axes, make sure the x scale is the same for each
        if sharex:
            fig = plt.gcf()
            fig_x_min = float("inf")
            fig_x_max = float("-inf")
            
            for ax in fig.get_axes():
                ax_x_min, ax_x_max = ax.get_xlim()
                if ax_x_min < fig_x_min:
                    fig_x_min = ax_x_min
                if ax_x_max > fig_x_max:
                    fig_x_max = ax_x_max
            
            for ax in fig.get_axes():
                ax.set_xlim(fig_x_min, fig_x_max)
        
        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.
        
        if self.huefacet:
            current_palette = mpl.rcParams['axes.color_cycle']
            if util.is_numeric(experiment.data[self.huefacet]) and \
               len(grid.hue_names) > len(current_palette):
                
                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(sns.color_palette("husl", 
                                                                   n_colors = len(grid.hue_names)))
                cax, kw = mpl.colorbar.make_axes(plt.gca())
                norm = mpl.colors.Normalize(vmin = np.min(grid.hue_names), 
                                            vmax = np.max(grid.hue_names), 
                                            clip = False)
                mpl.colorbar.ColorbarBase(cax, 
                                          cmap = cmap, 
                                          norm = norm,
                                          label = self.huefacet, 
                                          **kw)
                plt.sca(plot_ax)
            else:
                grid.add_legend(title = self.huefacet)
                
        if unused_names and plot_name:
            plt.title("{0} = {1}".format(unused_names, plot_name))
                
        plt.ylabel(self.statistic)