예제 #1
0
class BinningOp(HasStrictTraits):
    """
    Bin data along an axis.
    
    This operation creates equally spaced bins (in linear or log space)
    along an axis and adds a condition assigning each event to a bin.  The
    value of the event's condition is the left end of the bin's interval in
    which the event is located.
    
    Attributes
    ----------
    name : Str
        The operation name.  Used to name the new metadata field in the
        experiment that's created by apply()
        
    channel : Str
        The name of the channel along which to bin.

    scale : {"linear", "log", "logicle"}
        Make the bins equidistant along what scale?
        
    bin_width : Float
        The width of the bins. If :attr:`scale` is ``log``, :attr:`bin_width` 
        is in log-10 units; if :attr:`scale` is ``logicle``, and error is 
        thrown because the units are ill-defined.
 

        
    Examples
    --------
    Create a small experiment:
    
    .. plot::
        :context: close-figs
    
        >>> import cytoflow as flow
        >>> import_op = flow.ImportOp()
        >>> import_op.tubes = [flow.Tube(file = "tasbe/rby.fcs")]
        >>> ex = import_op.apply()
    
    Create and parameterize the operation
    
    .. plot::
        :context: close-figs

        >>> bin_op = flow.BinningOp()
        >>> bin_op.name = "Bin"
        >>> bin_op.channel = "FITC-A"
        >>> bin_op.scale = "log"
        >>> bin_op.bin_width = 0.2
    
    Apply the operation to the experiment
    
    .. plot::
        :context: close-figs 
    
        >>> ex2 = bin_op.apply(ex)
    
    Plot the result
    
    .. plot::
        :context: close-figs

        >>> bin_op.default_view().plot(ex2)  

    """

    # traits
    id = Constant('edu.mit.synbio.cytoflow.operations.binning')
    friendly_id = Constant("Binning")

    name = CStr()
    bin_count_name = CStr()
    channel = Str()
    num_bins = util.Removed(err_string="'num_bins' was removed in 0.9")
    bin_width = util.PositiveFloat(0, allow_zero=True)
    scale = util.ScaleEnum

    _max_num_bins = Int(100)

    def apply(self, experiment):
        """
        Applies the binning to an experiment.
        
        Parameters
        ----------
        experiment : Experiment
            the old_experiment to which this op is applied
            
        Returns
        -------
        Experiment
            A new experiment with a condition column named :attr:`name`, which
            contains the location of the left-most edge of the bin that the
            event is in.  If :attr:`bin_count_name` is set, another column
            is added with that name as well, containing the number of events
            in the same bin as the event.

        """
        if experiment is None:
            raise util.CytoflowOpError('experiment', "no experiment specified")

        if not self.name:
            raise util.CytoflowOpError('name', "Name is not set")

        if self.name != util.sanitize_identifier(self.name):
            raise util.CytoflowOpError(
                'name',
                "Name can only contain letters, numbers and underscores.".
                format(self.name))

        if self.name in experiment.data.columns:
            raise util.CytoflowOpError(
                'name',
                "Name {} is in the experiment already".format(self.name))

        if self.bin_count_name and self.bin_count_name in experiment.data.columns:
            raise util.CytoflowOpError(
                'bin_count_name',
                "bin_count_name {} is in the experiment already".format(
                    self.bin_count_name))

        if not self.channel:
            raise util.CytoflowOpError('channel', "channel is not set")

        if self.channel not in experiment.data.columns:
            raise util.CytoflowOpError(
                'channel',
                "channel {} isn't in the experiment".format(self.channel))

        if not self.bin_width:
            raise util.CytoflowOpError('bin_width', "must set bin width")

        if not (self.scale == "linear" or self.scale == "log"):
            raise util.CytoflowOpError(
                'scale', "Can only use binning op with linear or log scale")

        scale = util.scale_factory(self.scale,
                                   experiment,
                                   channel=self.channel)

        scaled_min = scale(scale.clip(experiment.data[self.channel]).min())
        scaled_max = scale(scale.clip(experiment.data[self.channel]).max())

        if self.scale == 'linear':
            start = 0
        else:
            start = 1

        scaled_bins_left = np.arange(start=-1.0 * start,
                                     stop=(-1.0 * scaled_min) + self.bin_width,
                                     step=self.bin_width) * -1.0
        scaled_bins_left = scaled_bins_left[::-1][:-1]

        scaled_bins_right = np.arange(start=start,
                                      stop=scaled_max + self.bin_width,
                                      step=self.bin_width)
        scaled_bins = np.append(scaled_bins_left, scaled_bins_right)

        if len(scaled_bins) > self._max_num_bins:
            raise util.CytoflowOpError(
                None, "Too many bins! To increase this limit, "
                "change _max_num_bins (currently {})".format(
                    self._max_num_bins))

        if len(scaled_bins) < 2:
            raise util.CytoflowOpError('bin_width',
                                       "Must have more than one bin")

        # now, back into data space
        bins = scale.inverse(scaled_bins)

        # reduce to 4 sig figs
        bins = ['%.4g' % x for x in bins]
        bins = [float(x) for x in bins]
        bins = np.array(bins)

        # put the data in bins
        bin_idx = np.digitize(experiment.data[self.channel], bins[1:-1])

        new_experiment = experiment.clone()
        new_experiment.add_condition(self.name, "float64", bins[bin_idx])

        # keep track of the bins we used, for prettier plotting later.
        new_experiment.metadata[self.name]["bin_scale"] = self.scale
        new_experiment.metadata[self.name]["bins"] = bins

        if self.bin_count_name:
            # TODO - this is a HUGE memory hog?!
            # TODO - fix this, then turn it on by default
            agg_count = new_experiment.data.groupby(self.name).count()
            agg_count = agg_count[agg_count.columns[0]]

            # have to make the condition a float64, because if we're in log
            # space there may be events that have NaN as the bin number.

            new_experiment.add_condition(
                self.bin_count_name, "float64",
                new_experiment[self.name].map(agg_count))

        new_experiment.history.append(
            self.clone_traits(transient=lambda _: True))
        return new_experiment

    def default_view(self, **kwargs):
        """
        Returns a diagnostic plot to check the binning.
        
        Returns
        -------
        IView
            An view instance, call :meth:`plot()` to plot the bins.
        """
        v = BinningView(op=self)
        v.trait_set(**kwargs)
        return v
예제 #2
0
class Base1DStatisticsView(BaseStatisticsView):
    """
    The base class for 1-dimensional statistic views -- ie, the :attr:`variable`
    attribute is on the x axis, and the statistic value is on the y axis.
    
    Attributes
    ----------
    statistic : (str, str)
        The name of the statistic to plot.  Must be a key in the  
        :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment`
        being plotted.
        
    error_statistic : (str, str)
        The name of the statistic used to plot error bars.  Must be a key in the
        :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment`
        being plotted.
        
    scale : {'linear', 'log', 'logicle'}
        The scale applied to the data before plotting it.
    """

    REMOVED_ERROR = "Statistics changed dramatically in 0.5; please see the documentation"
    by = util.Removed(err_string=REMOVED_ERROR)
    yfunction = util.Removed(err_string=REMOVED_ERROR)
    ychannel = util.Removed(err_string=REMOVED_ERROR)
    channel = util.Removed(err_string=REMOVED_ERROR)
    function = util.Removed(err_string=REMOVED_ERROR)
    error_bars = util.Removed(err_string=REMOVED_ERROR)

    xvariable = util.Deprecated(new="variable")

    statistic = Tuple(Str, Str)
    error_statistic = Tuple(Str, Str)

    scale = util.ScaleEnum

    def enum_plots(self, experiment):
        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")
        data = self._make_data(experiment)
        return super().enum_plots(experiment, data)

    def plot(self, experiment, plot_name=None, **kwargs):
        """
        Parameters
        ----------
        orientation : {'vertical', 'horizontal'}
        
        lim : (float, float)
            Set the range of the plot's axis.
        """

        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")

        data = self._make_data(experiment)

        if not self.variable:
            raise util.CytoflowViewError('variable', "variable not set")

        if self.variable not in experiment.conditions:
            raise util.CytoflowViewError(
                'variable',
                "variable {0} not in the experiment".format(self.variable))

        scale = util.scale_factory(self.scale,
                                   experiment,
                                   statistic=self.statistic,
                                   error_statistic=self.error_statistic)

        super().plot(experiment,
                     data,
                     plot_name=plot_name,
                     scale=scale,
                     **kwargs)

    def _make_data(self, experiment):
        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")

        if not self.statistic:
            raise util.CytoflowViewError('statistic', "Statistic not set")

        if self.statistic not in experiment.statistics:
            raise util.CytoflowViewError(
                'statistic',
                "Can't find the statistic {} in the experiment".format(
                    self.statistic))
        else:
            stat = experiment.statistics[self.statistic]

        if not util.is_numeric(stat):
            raise util.CytoflowViewError('statistic',
                                         "Statistic must be numeric")

        if self.error_statistic[0]:
            if self.error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    'error_statistic',
                    "Can't find the error statistic in the experiment")
            else:
                error_stat = experiment.statistics[self.error_statistic]
        else:
            error_stat = None

        if error_stat is not None:

            try:
                error_stat.index = error_stat.index.reorder_levels(
                    stat.index.names)
                error_stat.sort_index(inplace=True)
            except AttributeError:
                pass

            if not stat.index.equals(error_stat.index):
                raise util.CytoflowViewError(
                    'error_statistic', "Data statistic and error statistic "
                    " don't have the same index.")

            if stat.name == error_stat.name:
                raise util.CytoflowViewError(
                    'error_statistic',
                    "Data statistic and error statistic can "
                    "not have the same name.")

        data = pd.DataFrame(index=stat.index)
        data[stat.name] = stat

        if error_stat is not None:
            data[error_stat.name] = error_stat

        return data
예제 #3
0
class Base2DStatisticsView(BaseStatisticsView):
    """
    The base class for 2-dimensional statistic views -- ie, the :attr:`variable`
    attribute varies independently, and the corresponding values from the x and
    y statistics are plotted on the x and y axes.
    
    Attributes
    ----------
    xstatistic, ystatistic : (str, str)
        The name of the statistics to plot.  Must be a keys in the  
        :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment`
        being plotted.
        
    x_error_statistic, y_error_statistic : (str, str)
        The name of the statistics used to plot error bars.  Must be keys in the
        :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment`
        being plotted.
        
    xscale, yscale : {'linear', 'log', 'logicle'}
        The scales applied to the data before plotting it.
    """

    STATS_REMOVED = "{} has been removed. Statistics changed dramatically in 0.5; please see the documentation."

    xchannel = util.Removed(err_string=STATS_REMOVED)
    xfunction = util.Removed(err_string=STATS_REMOVED)
    ychannel = util.Removed(err_string=STATS_REMOVED)
    yfunction = util.Removed(err_string=STATS_REMOVED)

    xstatistic = Tuple(Str, Str)
    ystatistic = Tuple(Str, Str)
    x_error_statistic = Tuple(Str, Str)
    y_error_statistic = Tuple(Str, Str)

    xscale = util.ScaleEnum
    yscale = util.ScaleEnum

    def enum_plots(self, experiment):
        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")
        data = self._make_data(experiment)
        return super().enum_plots(experiment, data)

    def plot(self, experiment, plot_name=None, **kwargs):
        """
        Parameters
        ----------
        xlim, ylim : (float, float)
            Set the range of the plot's axis.
            
        """
        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")

        data = self._make_data(experiment)

        xscale = util.scale_factory(self.xscale,
                                    experiment,
                                    statistic=self.xstatistic,
                                    error_statistic=self.x_error_statistic)

        yscale = util.scale_factory(self.yscale,
                                    experiment,
                                    statistic=self.ystatistic,
                                    error_statistic=self.y_error_statistic)

        super().plot(experiment,
                     data,
                     plot_name,
                     xscale=xscale,
                     yscale=yscale,
                     **kwargs)

    def _make_data(self, experiment):
        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")

        if not self.xstatistic:
            raise util.CytoflowViewError('xstatistic', "X Statistic not set")

        if self.xstatistic not in experiment.statistics:
            raise util.CytoflowViewError(
                'xstatistic',
                "Can't find the statistic {} in the experiment".format(
                    self.xstatistic))
        else:
            xstat = experiment.statistics[self.xstatistic]

        if not util.is_numeric(xstat):
            raise util.CytoflowViewError('xstatistic',
                                         "X statistic must be numeric")

        if self.x_error_statistic[0]:
            if self.x_error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    'x_error_statistic',
                    "Can't find the X error statistic in the experiment")
            else:
                x_error_stat = experiment.statistics[self.x_error_statistic]
        else:
            x_error_stat = None

        if x_error_stat is not None:

            try:
                x_error_stat.index = x_error_stat.index.reorder_levels(
                    xstat.index.names)
                x_error_stat.sort_index(inplace=True)
            except AttributeError:
                pass

            if not xstat.index.equals(x_error_stat.index):
                raise util.CytoflowViewError(
                    'x_error_statistic', "Data statistic and error statistic "
                    " don't have the same index.")

            if xstat.name == x_error_stat.name:
                raise util.CytoflowViewError(
                    'x_error_statistic',
                    "Data statistic and error statistic can "
                    "not have the same name.")

        if not self.ystatistic:
            raise util.CytoflowViewError('ystatistic', "Y statistic not set")

        if self.ystatistic not in experiment.statistics:
            raise util.CytoflowViewError(
                'ystatistic',
                "Can't find the Y statistic {} in the experiment".format(
                    self.ystatistic))
        else:
            ystat = experiment.statistics[self.ystatistic]

        if not util.is_numeric(ystat):
            raise util.CytoflowViewError('ystatistic',
                                         "Y statistic must be numeric")

        if self.y_error_statistic[0]:
            if self.y_error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    'y_error_statistic',
                    "Can't find the Y error statistic in the experiment")
            else:
                y_error_stat = experiment.statistics[self.y_error_statistic]
        else:
            y_error_stat = None

        if y_error_stat is not None:

            try:
                y_error_stat.index = y_error_stat.index.reorder_levels(
                    ystat.index.names)
                y_error_stat.sort_index(inplace=True)
            except AttributeError:
                pass

            if not ystat.index.equals(y_error_stat.index):
                raise util.CytoflowViewError(
                    'y_error_statistic', "Data statistic and error statistic "
                    " don't have the same index.")

            if ystat.name == y_error_stat.name:
                raise util.CytoflowViewError(
                    'y_error_statistic',
                    "Data statistic and error statistic can "
                    "not have the same name.")

        if xstat.name == ystat.name:
            raise util.CytoflowViewError(
                'ystatistic', "X and Y statistics can "
                "not have the same name.")

        try:
            ystat.index = ystat.index.reorder_levels(xstat.index.names)
            ystat.sort_index(inplace=True)
        except AttributeError:
            pass

        intersect_idx = xstat.index.intersection(ystat.index)
        xstat = xstat.reindex(intersect_idx)
        xstat.sort_index(inplace=True)
        ystat = ystat.reindex(intersect_idx)
        ystat.sort_index(inplace=True)

        if self.x_error_statistic[0]:
            if self.x_error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    'x_error_statistic', "X error statistic not in experiment")
            else:
                x_error_stat = experiment.statistics[self.x_error_statistic]

            if set(x_error_stat.index.names) != set(xstat.index.names):
                raise util.CytoflowViewError(
                    'x_error_statistic', "X error statistic doesn't have the "
                    "same indices as the X statistic")

            try:
                x_error_stat.index = x_error_stat.index.reorder_levels(
                    xstat.index.names)
                x_error_stat.sort_index(inplace=True)
            except AttributeError:
                pass

            x_error_stat = x_error_stat.reindex(intersect_idx)
            x_error_stat.sort_index(inplace=True)

            if not x_error_stat.index.equals(xstat.index):
                raise util.CytoflowViewError(
                    'x_error_statistic', "X error statistic doesn't have the "
                    "same indices as the X statistic")
        else:
            x_error_stat = None

        if self.y_error_statistic[0]:
            if self.y_error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    'y_error_statistic', "Y error statistic not in experiment")
            else:
                y_error_stat = experiment.statistics[self.y_error_statistic]

            if set(y_error_stat.index.names) != set(ystat.index.names):
                raise util.CytoflowViewError(
                    'y_error_statistic', "Y error statistic doesn't have the "
                    "same indices as the Y statistic")

            try:
                y_error_stat.index = y_error_stat.index.reorder_levels(
                    ystat.index.names)
                y_error_stat.sort_index(inplace=True)
            except AttributeError:
                pass

            y_error_stat = y_error_stat.reindex(intersect_idx)
            y_error_stat.sort_index(inplace=True)

            if not y_error_stat.index.equals(ystat.index):
                raise util.CytoflowViewError(
                    'y_error_statistic', "Y error statistic doesn't have the "
                    "same values as the Y statistic")
        else:
            y_error_stat = None

        data = pd.DataFrame(index=xstat.index)
        data[xstat.name] = xstat
        data[ystat.name] = ystat

        if x_error_stat is not None:
            data[x_error_stat.name] = x_error_stat

        if y_error_stat is not None:
            data[y_error_stat.name] = y_error_stat

        return data
class ColorTranslationOp(HasStrictTraits):
    """
    Translate measurements from one color's scale to another, using a two-color
    or three-color control.
    
    To use, set up the :attr:`controls` dictionary with the channels to convert
    and the FCS files to compute the mapping.  Call :meth:`estimate` to
    paramterize the module; check that the plots look good by calling the 
    :meth:`~ColorTranslationDiagnostic.plot` method of the 
    :class:`ColorTranslationDiagnostic` instance returned by :meth:`default_view`;
    then call :meth:`apply` to apply the translation to an :class:`.Experiment`.
    
    Attributes
    ----------
    controls : Dict((Str, Str), File)
        Two-color controls used to determine the mapping.  They keys are 
        tuples of **from-channel** and **to-channel**.  The values are FCS files 
        containing two-color constitutive fluorescent expression data 
        for the mapping.
        
    mixture_model : Bool (default = False)
        If ``True``, try to model the **from** channel as a mixture of expressing
        cells and non-expressing cells (as you would get with a transient
        transfection), then weight the regression by the probability that the
        the cell is from the top (transfected) distribution.  Make sure you 
        check the diagnostic plots to see that this worked!
        
    linear_model : Bool (default = False)
        Set this to ``True`` to get a scaling that is strictly multiplicative,
        mirroring the TASBE approach.  Do check the diagnostic plot, though,
        to see how well (or poorly) your model fits the data.
        
    control_conditions : Dict((Str, Str), Dict(Str, Any))
        Occasionally, you'll need to specify the experimental conditions that
        the bleedthrough tubes were collected under (to apply the operations in the 
        history.)  Specify them here.  The key is a tuple of channel names; the 
        value is a dictionary of the conditions (same as you would specify for a
        :class:`~.Tube` )

        
    Notes
    -----
    In the TASBE workflow, this operation happens *after* the application of
    :class:`.AutofluorescenceOp` and :class:`.BleedthroughLinearOp`.  The entire
    operation history of the :class:`.Experiment` that is passed to 
    :meth:`estimate` is replayed on the control files in :attr:`controls`, so
    they are also corrected for autofluorescence and bleedthrough, and have
    metadata for subsetting.
    

    Examples
    --------
    Create a small experiment:
    
    .. plot::
        :context: close-figs
    
        >>> import cytoflow as flow
        >>> import_op = flow.ImportOp()
        >>> import_op.tubes = [flow.Tube(file = "tasbe/mkate.fcs")]
        >>> ex = import_op.apply()
    
    Create and parameterize the operation
    
    .. plot::
        :context: close-figs

        >>> color_op = flow.ColorTranslationOp()
        >>> color_op.controls = {("Pacific Blue-A", "FITC-A") : "tasbe/rby.fcs",
        ...                      ("PE-Tx-Red-YG-A", "FITC-A") : "tasbe/rby.fcs"}
        >>> color_op.mixture_model = True
    
    Estimate the model parameters
    
    .. plot::
        :context: close-figs 
    
        >>> color_op.estimate(ex)
    
    Plot the diagnostic plot
    
    .. plot::
        :context: close-figs

        >>> color_op.default_view().plot(ex)  

    Apply the operation to the experiment
    
    .. plot::
        :context: close-figs
    
        >>> ex = color_op.apply(ex)  
    """

    # traits
    id = Constant('edu.mit.synbio.cytoflow.operations.color_translation')
    friendly_id = Constant("Color translation")

    name = Constant("Color Translation")

    translation = util.Removed(
        err_string=
        "'translation' is removed; the same info is found in 'controls'",
        warning=True)
    controls = Dict(Tuple(Str, Str), File)
    controls_frames = Dict(Tuple(Str, Str), Instance(DataFrame))
    mixture_model = Bool(False)
    linear_model = Bool(False)

    control_conditions = Dict(Tuple(Str, Str), Dict(Str, Any), {})

    # The regression coefficients determined by `estimate()`, used to map
    # colors between channels.  The keys are tuples of (*from-channel*,
    # *to-channel) (corresponding to key-value pairs in `translation`).  The
    # values are lists of Float, the log-log coefficients for the color
    # translation (determined by `estimate()`).
    # TODO - why can't i make the value List(Float)?
    _coefficients = Dict(Tuple(Str, Str), Any, transient=True)
    _trans_fn = Dict(Tuple(Str, Str), Callable, transient=True)
    _sample = Dict(Tuple(Str, Str), Any, transient=True)
    _means = Dict(Tuple(Str, Str), Tuple(Float, Float), transient=True)

    def estimate(self, experiment, subset=None):
        """
        Estimate the mapping from the two-channel controls
        
        Parameters
        ----------
        experiment : Experiment
            The :class:`.Experiment` used to check the voltages, etc. of the
            control tubes.  Also the source of the operation history that
            is replayed on the control tubes.
            
        subset : Str
            A Python expression used to subset the controls before estimating
            the color translation parameters.
        """

        if experiment is None:
            raise util.CytoflowOpError('experiment', "No experiment specified")

        if not self.controls and not self.controls_frames:
            raise util.CytoflowOpError('controls', "No controls specified")

        self._coefficients.clear()
        self._trans_fn.clear()
        self._sample.clear()
        self._means.clear()

        tubes = {}

        if (self.controls != {}):
            controls = self.controls
        else:
            controls = self.controls_frames

        translation = {x[0]: x[1] for x in list(controls.keys())}

        for from_channel, to_channel in translation.items():

            if from_channel not in experiment.channels:
                raise util.CytoflowOpError(
                    'translation',
                    "Channel {0} not in the experiment".format(from_channel))

            if to_channel not in experiment.channels:
                raise util.CytoflowOpError(
                    'translation',
                    "Channel {0} not in the experiment".format(to_channel))

            if (from_channel, to_channel) not in controls:
                raise util.CytoflowOpError(
                    'translation', "Control file for {0} --> {1} "
                    "not specified".format(from_channel, to_channel))

            tube_file_or_frame = controls[(from_channel, to_channel)]
            tube_file_or_frame_key = (from_channel, to_channel)
            tube_conditions = self.control_conditions[(from_channel, to_channel)] \
                                    if (from_channel, to_channel) in self.control_conditions \
                                    else {}
            conditions = {
                k: experiment.data[k].dtype.name
                for k in tube_conditions.keys()
            }

            if tube_file_or_frame_key not in tubes:
                # if True:
                channels = {
                    experiment.metadata[c]["fcs_name"]: c
                    for c in experiment.channels
                }
                name_metadata = experiment.metadata['name_metadata']
                if (self.controls != {}):
                    # make a little Experiment
                    check_tube(controls[tube_file_or_frame_key], experiment)
                    tube_exp = ImportOp(tubes=[
                        Tube(file=controls[tube_file_or_frame_key],
                             conditions=tube_conditions)
                    ],
                                        conditions=conditions,
                                        channels=channels,
                                        name_metadata=name_metadata).apply()
                else:
                    tube_exp = ImportOp(tubes=[
                        Tube(frame=controls[tube_file_or_frame_key],
                             conditions=tube_conditions)
                    ],
                                        conditions=conditions,
                                        channels=channels,
                                        name_metadata=name_metadata).apply()

                # apply previous operations
                for op in experiment.history:
                    if hasattr(op, 'by'):
                        for by in op.by:
                            if 'experiment' in experiment.metadata[by]:
                                raise util.CytoflowOpError(
                                    'experiment',
                                    "Prior to applying this operation, "
                                    "you must not apply any operation with 'by' "
                                    "set to an experimental condition.")
                    tube_exp = op.apply(tube_exp)

                # subset the events
                if subset:
                    try:
                        tube_exp = tube_exp.query(subset)
                    except Exception as e:
                        raise util.CytoflowOpError(
                            'subset', "Subset string '{0}' isn't valid".format(
                                subset)) from e

                    if len(tube_exp.data) == 0:
                        raise util.CytoflowOpError(
                            'subset',
                            "Subset string '{0}' returned no events".format(
                                subset))

                tube_data = tube_exp.data

                tubes[tube_file_or_frame_key] = tube_data

            data = tubes[tube_file_or_frame_key][[from_channel,
                                                  to_channel]].copy()
            data = data[data[from_channel] > 0]
            data = data[data[to_channel] > 0]

            _ = data.reset_index(drop=True, inplace=True)

            # self._sample[(from_channel, to_channel)] = data.sample(n = min(len(data), 5000))
            self._sample[(from_channel,
                          to_channel)] = data.sample(n=min(len(data), 100))

            data[from_channel] = np.log10(data[from_channel])
            data[to_channel] = np.log10(data[to_channel])

            if self.mixture_model:
                gmm = sklearn.mixture.BayesianGaussianMixture(n_components=2,
                                                              random_state=1)
                fit = gmm.fit(data)

                self._means[(from_channel), (to_channel)] = \
                    (10 ** fit.means_[0][0], 10 ** fit.means_[1][0])

                # pick the component with the maximum mean
                idx = 0 if fit.means_[0][0] > fit.means_[1][0] else 1
                weights = [x[idx] for x in fit.predict_proba(data)]
            else:
                weights = [1] * len(data.index)

            if self.linear_model:
                # this mimics the TASBE approach, which constrains the fit to
                # a multiplicative scaling (eg, a linear fit with an intercept
                # of 0.)  I disagree that this is the right approach, which is
                # why it's not the default.

                f = lambda x: weights * (data[to_channel] - x[0] * data[
                    from_channel])
                x0 = [1]

                trans_fn = lambda data, x: np.power(data, x[0])

            else:

                # this code uses a different approach from TASBE. instead of
                # computing a multiplicative scaling constant, it computes a
                # full linear regression on the log-scaled data (ie, allowing
                # the intercept to vary as well as the slope).  this is a
                # more general model of the underlying physical behavior, and
                # fits the data better -- but it may not be more "correct."

                f = lambda x: weights * (data[to_channel] - x[0] * data[
                    from_channel] - x[1])
                x0 = [1, 0]

                trans_fn = lambda data, x: (10**x[1]) * np.power(data, x[0])

            opt = scipy.optimize.least_squares(f, x0)
            self._coefficients[(from_channel, to_channel)] = opt.x
            self._trans_fn[(
                from_channel,
                to_channel)] = lambda data, x=opt.x: trans_fn(data, x)

    def apply(self, experiment):
        """Applies the color translation to an experiment
        
        Parameters
        ----------
        experiment : Experiment
            the old_experiment to which this op is applied
            
        Returns
        -------
        Experiment 
            a new experiment with the color translation applied.  The corrected
            channels also have the following new metadata:
    
            **channel_translation** : Str
            Which channel was this one translated to?
        
            **channel_translation_fn** : Callable (pandas.Series --> pandas.Series)
            The function that translated this channel
        """

        if experiment is None:
            raise util.CytoflowOpError('experiment', "No experiment specified")

        if not self.controls and not self.controls_frames:
            raise util.CytoflowOpError('controls', "No controls specified")

        if not self._trans_fn:
            raise util.CytoflowOpError(
                None, "Transfer functions aren't set. "
                "Did you forget to call estimate()?")

        if (self.controls != {}):
            controls = self.controls
        else:
            controls = self.controls_frames

        translation = {x[0]: x[1] for x in list(controls.keys())}
        from_channels = [x[0] for x in list(controls.keys())]

        for key, val in translation.items():
            if (key, val) not in self._coefficients:
                raise util.CytoflowOpError(
                    None, "Coefficients aren't set for translation "
                    "{} --> {}.  Did you call estimate()?".format(key, val))

        new_experiment = experiment.clone()

        for channel in from_channels:
            new_experiment.data = \
                new_experiment.data[new_experiment.data[channel] > 0]

        for from_channel, to_channel in translation.items():
            trans_fn = self._trans_fn[(from_channel, to_channel)]

            new_experiment[from_channel] = trans_fn(experiment[from_channel])
            new_experiment.metadata[from_channel][
                'channel_translation_fn'] = trans_fn
            new_experiment.metadata[from_channel][
                'channel_translation'] = to_channel

        new_experiment.history.append(
            self.clone_traits(transient=lambda _: True))

        return new_experiment

    def default_view(self, **kwargs):
        """
        Returns a diagnostic plot to see if the bleedthrough spline estimation
        is working.
        
        Returns
        -------
        IView
            A diagnostic view, call :meth:`ColorTranslationDiagnostic.plot` to 
            see the diagnostic plots
        """

        v = ColorTranslationDiagnostic(op=self)
        v.trait_set(**kwargs)
        return v
예제 #5
0
class Stats1DView(Base1DStatisticsView):
    """
    Plot a statistic.  The value of the statistic will be plotted on the
    Y axis; a numeric conditioning variable must be chosen for the X axis.
    Every variable in the statistic must be specified as either the `variable`
    or one of the plot facets.
    
    Attributes
    ----------
    variable_scale : {'linear', 'log', 'logicle'}
        The scale applied to the variable (on the X axis)
        
    Examples
    --------
    
    .. plot::
        :context: close-figs
        
        Make a little data set.
    
        >>> import cytoflow as flow
        >>> import_op = flow.ImportOp()
        >>> import_op.tubes = [flow.Tube(file = "Plate01/RFP_Well_A3.fcs",
        ...                              conditions = {'Dox' : 10.0}),
        ...                    flow.Tube(file = "Plate01/CFP_Well_A4.fcs",
        ...                              conditions = {'Dox' : 1.0})]
        >>> import_op.conditions = {'Dox' : 'float'}
        >>> ex = import_op.apply()
    
    Create and a new statistic.
    
    .. plot::
        :context: close-figs
        
        >>> ch_op = flow.ChannelStatisticOp(name = 'MeanByDox',
        ...                     channel = 'Y2-A',
        ...                     function = flow.geom_mean,
        ...                     by = ['Dox'])
        >>> ex2 = ch_op.apply(ex)
        
    View the new statistic
    
    .. plot::
        :context: close-figs
        
        >>> flow.Stats1DView(variable = 'Dox',
        ...                  statistic = ('MeanByDox', 'geom_mean'),
        ...                  variable_scale = 'log',
        ...                  scale = 'log').plot(ex2)
    """

    # traits
    id = Constant("edu.mit.synbio.cytoflow.view.stats1d")
    friendly_id = Constant("1D Statistics View")

    REMOVED_ERROR = Constant(
        "Statistics changed dramatically in 0.5; please see the documentation")
    by = util.Removed(err_string=REMOVED_ERROR)
    yfunction = util.Removed(err_string=REMOVED_ERROR)
    ychannel = util.Removed(err_string=REMOVED_ERROR)
    xvariable = util.Deprecated(new="variable")
    xscale = util.Deprecated(new='variable_scale')

    variable_scale = util.ScaleEnum

    def enum_plots(self, experiment):
        """
        Returns an iterator over the possible plots that this View can
        produce.  The values returned can be passed to :meth:`plot`.
        """

        return super().enum_plots(experiment)

    def plot(self, experiment, plot_name=None, **kwargs):
        """Plot a chart of a variable's values against a statistic.
        
        Parameters
        ----------
        
        variable_lim : (float, float)
            The limits on the variable axis
        
        color : a matplotlib color
            The color to plot with.  Overridden if `huefacet` is not `None`
            
        linewidth : float
            The width of the line, in points
            
        linestyle : ['solid' | 'dashed', 'dashdot', 'dotted' | (offset, on-off-dash-seq) | '-' | '--' | '-.' | ':' | 'None' | ' ' | '']
            
        marker : a matplotlib marker style
            See http://matplotlib.org/api/markers_api.html#module-matplotlib.markers
            
        markersize : int
            The marker size in points
            
        markerfacecolor : a matplotlib color
            The color to make the markers.  Overridden (?) if `huefacet` is not `None`
            
        alpha : the alpha blending value, from 0.0 (transparent) to 1.0 (opaque)
        
        capsize : scalar
            The size of the error bar caps, in points
            
        shade_error : bool
            If `False` (the default), plot the error statistic as traditional 
            "error bars."  If `True`, plot error statistic as a filled, shaded
            region.
            
        shade_alpha : float
            The transparency of the shaded error region, from 0.0 (transparent)
            to 1.0 (opaque.)  Default is 0.2.
        
        Notes
        -----
                
        Other `kwargs` are passed to `matplotlib.pyplot.plot <https://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.plot.html>`_
        
        """

        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")

        if self.variable not in experiment.conditions:
            raise util.CytoflowError(
                'variable',
                "Variable {} not in the experiment".format(self.variable))

        if not util.is_numeric(experiment[self.variable]):
            raise util.CytoflowError(
                'variable',
                "Variable {} must be numeric".format(self.variable))

        variable_scale = util.scale_factory(self.variable_scale,
                                            experiment,
                                            condition=self.variable)

        super().plot(experiment,
                     plot_name,
                     variable_scale=variable_scale,
                     **kwargs)

    def _grid_plot(self, experiment, grid, **kwargs):

        data = grid.data
        data_scale = kwargs.pop('scale')
        variable_scale = kwargs.pop('variable_scale')

        stat = experiment.statistics[self.statistic]
        stat_name = stat.name
        if self.error_statistic[0]:
            err_stat = experiment.statistics[self.error_statistic]
            err_stat_name = err_stat.name
        else:
            err_stat = None

        variable_lim = kwargs.pop("variable_lim", None)
        if variable_lim is None:
            variable_lim = (variable_scale.clip(
                data[self.variable].min() *
                0.9), variable_scale.clip(data[self.variable].max() * 1.1))

        lim = kwargs.pop("lim", None)
        if lim is None:
            lim = (data_scale.clip(data[stat_name].min() * 0.9),
                   data_scale.clip(data[stat_name].max() * 1.1))

            if self.error_statistic[0]:
                try:
                    lim = (data_scale.clip(
                        min([x[0] for x in data[err_stat_name]]) * 0.9),
                           data_scale.clip(
                               max([x[1] for x in data[err_stat_name]]) * 1.1))
                except (TypeError, IndexError):
                    lim = (data_scale.clip(
                        (data[stat_name].min() - data[err_stat_name].min()) *
                        0.9),
                           data_scale.clip((data[stat_name].max() +
                                            data[err_stat_name].max()) * 1.1))

        orientation = kwargs.pop('orientation', 'vertical')
        capsize = kwargs.pop('capsize', None)
        shade_error = kwargs.pop('shade_error', False)
        shade_alpha = kwargs.pop('shade_alpha', 0.2)

        if orientation == 'vertical':
            # plot the error bars first so the axis labels don't get overwritten
            if err_stat is not None:
                if shade_error:
                    grid.map(_v_error_shade,
                             self.variable,
                             stat_name,
                             err_stat_name,
                             alpha=shade_alpha)
                else:
                    grid.map(_v_error_bars,
                             self.variable,
                             stat_name,
                             err_stat_name,
                             capsize=capsize)

            grid.map(plt.plot, self.variable, stat_name, **kwargs)

            return dict(xscale=variable_scale,
                        xlim=variable_lim,
                        yscale=data_scale,
                        ylim=lim)
        else:
            # plot the error bars first so the axis labels don't get overwritten
            if err_stat is not None:
                if shade_error:
                    grid.map(_h_error_shade,
                             stat_name,
                             self.variable,
                             err_stat_name,
                             alpha=shade_alpha)
                else:
                    grid.map(_h_error_bars,
                             stat_name,
                             self.variable,
                             err_stat_name,
                             capsize=capsize)

            grid.map(plt.plot, stat_name, self.variable, **kwargs)

            return dict(yscale=variable_scale,
                        ylim=variable_lim,
                        xscale=data_scale,
                        xlim=lim)
예제 #6
0
class BarChartView(Base1DStatisticsView):
    """Plots a bar chart of some summary statistic
    
    Attributes
    ----------
    name : Str
        The bar chart's name 
    
    statistic : Tuple(Str, Str)
        the statistic we're plotting
        
    scale : Enum("linear", "log", "logicle") (default = "linear")
        The scale to use on the Y axis.
        
    variable : Str
        the name of the conditioning variable to group the chart's bars
        
    error_statistic : Tuple(Str, Str)
        if specified, a statistic to draw error bars.  if values are numeric,
        the bars are drawn +/- the value.  if the values are tuples, then
        the first element is the low error and the second element is the
        high error.
        
    xfacet : Str
        the conditioning variable for horizontal subplots
        
    yfacet : Str
        the conditioning variable for vertical subplots
        
    huefacet : Str
        the conditioning variable to make multiple bar colors
        
    subset : String
        Passed to pandas.DataFrame.query(), to get a subset of the statistic
        before we plot it.
        
    Examples
    --------
    >>> bar = flow.BarChartView()
    >>> bar.name = "Bar Chart"
    >>> bar.channel = 'Y2-A'
    >>> bar.variable = 'Y2-A+'
    >>> bar.huefacet = 'Dox'
    >>> bar.function = len
    >>> bar.plot(ex)
    """

    # traits
    id = "edu.mit.synbio.cytoflow.view.barchart"
    friendly_id = "Bar Chart"

    orientation = util.Removed(
        err_string="`orientation` is now a parameter to `plot`")

    def enum_plots(self, experiment):
        """
        Returns an iterator over the possible plots that this View can
        produce.  The values returned can be passed to "plot".
        """

        return super().enum_plots(experiment)

    def plot(self, experiment, plot_name=None, **kwargs):
        """
        Plot a bar chart
        
        Parameters
        ----------
        orientation : ['vertical', 'horizontal']
            Sets the orientation to vertical (the default) or horizontal
            
        color : a matplotlib color
            Sets the colors of all the bars, even if there is a hue facet
            
        errwidth : scalar
            The width of the error bars, in points
            
        errcolor : a matplotlib color
            The color of the error bars
            
        capsize : scalar
            The size of the error bar caps, in points
            
        Other Parameters
        ----------------
        Other `kwargs` are passed to matplotlib.axes.Axes.bar_.
    
        .. _matplotlib.axes.Axes.bar_: https://matplotlib.org/devdocs/api/_as_gen/matplotlib.axes.Axes.bar.html

        See Also
        --------
        BaseView.plot : common parameters for data views
        """

        super().plot(experiment, plot_name, **kwargs)

    def _grid_plot(self, experiment, grid, xlim, ylim, xscale, yscale,
                   **kwargs):

        # because the bottom of a bar chart is "0", masking out bad
        # values on a log scale doesn't work.  we must clip instead.
        orient = kwargs.pop('orientation', 'vertical')

        # Base1DStatistic uses xscale for the variable and yscale for
        # the statistic.

        if yscale.name == "log":
            yscale.mode = "clip"

        # set the scale for each set of axes; can't just call plt.xscale()
        for ax in grid.axes.flatten():
            if orient == 'horizontal':
                ax.set_xscale(yscale.name, **yscale.mpl_params)
            elif orient == 'vertical':
                ax.set_yscale(yscale.name, **yscale.mpl_params)
            else:
                raise util.CytoflowViewError(
                    "'orient' param must be 'h' or 'v'")

        stat = experiment.statistics[self.statistic]
        map_args = [self.variable, stat.name]

        if self.huefacet:
            map_args.append(self.huefacet)

        if self.error_statistic[0]:
            error_stat = experiment.statistics[self.error_statistic]
            map_args.append(error_stat.name)
        else:
            error_stat = None

        grid.map(_barplot,
                 *map_args,
                 view=self,
                 stat_name=stat.name,
                 error_name=error_stat.name if error_stat else None,
                 orient=orient,
                 grid=grid,
                 **kwargs)

        return {}
예제 #7
0
class Base2DStatisticsView(BaseStatisticsView):

    STATS_REMOVED = "{} has been removed. Statistics changed dramatically in 0.5; please see the documentation."

    xchannel = util.Removed(err_string=STATS_REMOVED)
    xfunction = util.Removed(err_string=STATS_REMOVED)
    ychannel = util.Removed(err_string=STATS_REMOVED)
    yfunction = util.Removed(err_string=STATS_REMOVED)

    xstatistic = Tuple(Str, Str)
    ystatistic = Tuple(Str, Str)
    x_error_statistic = Tuple(Str, Str)
    y_error_statistic = Tuple(Str, Str)

    def enum_plots(self, experiment):
        data = self._make_data(experiment)
        return super().enum_plots(experiment, data)

    def plot(self, experiment, plot_name=None, **kwargs):
        data = self._make_data(experiment)

        xscale = util.scale_factory(self.xscale,
                                    experiment,
                                    condition=self.variable)

        yscale = util.scale_factory(self.yscale,
                                    experiment,
                                    statistic=self.statistic,
                                    error_statistic=self.error_statistic)

        super().plot(experiment,
                     data,
                     plot_name,
                     xscale=xscale,
                     yscale=yscale,
                     **kwargs)

    def _make_data(self, experiment):
        if experiment is None:
            raise util.CytoflowViewError("No experiment specified")

        if not self.xstatistic:
            raise util.CytoflowViewError("X Statistic not set")

        if self.xstatistic not in experiment.statistics:
            raise util.CytoflowViewError(
                "Can't find the statistic {} in the experiment".format(
                    self.xstatistic))
        else:
            xstat = experiment.statistics[self.xstatistic]

        if not util.is_numeric(xstat):
            raise util.CytoflowViewError("X statistic must be numeric")

        if self.x_error_statistic[0]:
            if self.x_error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    "Can't find the X error statistic in the experiment")
            else:
                x_error_stat = experiment.statistics[self.x_error_statistic]
        else:
            x_error_stat = None

        if x_error_stat is not None:
            if not xstat.index.equals(x_error_stat.index):
                raise util.CytoflowViewError(
                    "Data statistic and error statistic "
                    " don't have the same index.")

            if xstat.name == x_error_stat.name:
                raise util.CytoflowViewError(
                    "Data statistic and error statistic can "
                    "not have the same name.")

        if not self.ystatistic:
            raise util.CytoflowViewError("Y statistic not set")

        if self.ystatistic not in experiment.statistics:
            raise util.CytoflowViewError(
                "Can't find the Y statistic {} in the experiment".format(
                    self.ystatistic))
        else:
            ystat = experiment.statistics[self.ystatistic]

        if not util.is_numeric(ystat):
            raise util.CytoflowViewError("Y statistic must be numeric")

        if self.y_error_statistic[0]:
            if self.y_error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    "Can't find the Y error statistic in the experiment")
            else:
                y_error_stat = experiment.statistics[self.y_error_statistic]
        else:
            y_error_stat = None

        if y_error_stat is not None:
            if not ystat.index.equals(y_error_stat.index):
                raise util.CytoflowViewError(
                    "Data statistic and error statistic "
                    " don't have the same index.")

            if ystat.name == y_error_stat.name:
                raise util.CytoflowViewError(
                    "Data statistic and error statistic can "
                    "not have the same name.")

        if xstat.name == ystat.name:
            raise util.CytoflowViewError("X and Y statistics can "
                                         "not have the same name.")

        try:
            ystat.index = ystat.index.reorder_levels(xstat.index.names)
            ystat.sort_index(inplace=True)
        except AttributeError:
            pass

        intersect_idx = xstat.index.intersection(ystat.index)
        xstat = xstat.reindex(intersect_idx)
        xstat.sort_index(inplace=True)
        ystat = ystat.reindex(intersect_idx)
        ystat.sort_index(inplace=True)

        if self.x_error_statistic[0]:
            if self.x_error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    "X error statistic not in experiment")
            else:
                x_error_stat = experiment.statistics[self.x_error_statistic]

            if set(x_error_stat.index.names) != set(xstat.index.names):
                raise util.CytoflowViewError(
                    "X error statistic doesn't have the "
                    "same indices as the X statistic")

            try:
                x_error_stat.index = x_error_stat.index.reorder_levels(
                    xstat.index.names)
                x_error_stat.sort_index(inplace=True)
            except AttributeError:
                pass

            x_error_stat = x_error_stat.reindex(intersect_idx)
            x_error_stat.sort_index(inplace=True)

            if not x_error_stat.index.equals(xstat.index):
                raise util.CytoflowViewError(
                    "X error statistic doesn't have the "
                    "same values as the X statistic")
        else:
            x_error_stat = None

        if self.y_error_statistic[0]:
            if self.y_error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    "Y error statistic not in experiment")
            else:
                y_error_stat = experiment.statistics[self.y_error_statistic]

            if set(y_error_stat.index.names) != set(ystat.index.names):
                raise util.CytoflowViewError(
                    "Y error statistic doesn't have the "
                    "same indices as the Y statistic")

            try:
                y_error_stat.index = y_error_stat.index.reorder_levels(
                    ystat.index.names)
                y_error_stat.sort_index(inplace=True)
            except AttributeError:
                pass

            y_error_stat = y_error_stat.reindex(intersect_idx)
            y_error_stat.sort_index(inplace=True)

            if not y_error_stat.index.equals(ystat.index):
                raise util.CytoflowViewError(
                    "Y error statistic doesn't have the "
                    "same values as the Y statistic")
        else:
            y_error_stat = None

        data = pd.DataFrame(index=xstat.index)
        data[xstat.name] = xstat
        data[ystat.name] = ystat

        if x_error_stat is not None:
            data[x_error_stat.name] = x_error_stat

        if y_error_stat is not None:
            data[y_error_stat.name] = y_error_stat

        return data
예제 #8
0
class Stats2DView(HasStrictTraits):
    """
    Plot two statistics on a scatter plot.  A point (X,Y) is drawn for every
    pair of elements with the same value of `variable`; the X value is from 
    `xstatistic` and the Y value is from `ystatistic`.
    
    Attributes
    ----------
    name : Str
        The plot's name 
    
    variable : Str
        the name of the conditioning variable
        
    xstatistic : Tuple(Str, Str)
        The statistic to plot on the X axis.  Must have the same indices
        as `ystatistic`.
        
    xscale : Enum("linear", "log", "logicle") (default = "linear")
        What scale to use on the X axis
    
    ystatistic : Tuple(Str, Str)
       The statistic to plot on the Y axis.  Must have the same indices
       as `xstatistic`.
        
    yscale : Enum("linear", "log", "logicle") (default = "linear")
        What scale to use on the Y axis
        
    xfacet : Str
        the conditioning variable for horizontal subplots
        
    yfacet : Str
        the conditioning variable for vertical subplots
        
    huefacet : 
        the conditioning variable for color.
        
    huescale : Enum("linear", "log", "logicle") (default = "linear")
        scale for the hue facet, if there are a lot of hue values.
        
    x_error_statistic, y_error_statistic : Tuple(Str, Str)
        if specified, draw error bars.  must be the name of a statistic,
        with the same indices as `xstatistic` and `ystatistic`.
    
    subset : Str
        What subset of the data to plot?
        
    Examples
    --------
    
    Assume we want an input-output curve for a repressor that's under the
    control of a Dox-inducible promoter.  We have an "input" channel
    `(Dox --> eYFP, FITC-A channel)` and an output channel 
    `(Dox --> repressor --| eBFP, Pacific Blue channel)` as well as a 
    constitutive expression channel (mKate, PE-Tx-Red-YG-A channel). 
    We have induced several wells with different amounts of Dox.  We want 
    to plot the relationship between the input and output channels (binned by 
    input channel intensity) as we vary Dox, faceted by constitutive channel 
    bin.
    
    >>> cfp_bin_op = flow.BinningOp(name = "CFP_Bin",
    ...                             channel = "PE-Tx-Red-YG-A",
    ...                             scale = "log",
    ...                             bin_width = 0.1)
    >>> ifp_bin_op = flow.BinningOp(name = "IFP_Bin",
    ...                             channel = "Pacific Blue-A",
    ...                             scale = "log",
    ...                             bin_width = 0.1).apply(ex_cfp_binned)
    >>> ifp_mean = flow.ChannelStatisticOp(name = "IFP",
    ...                                    channel = "FITC-A",
    ...                                    by = ["IFP_Bin", "CFP_Bin"],
    ...                                    function = flow.geom_mean)
    >>> ofp_mean = flow.ChannelStatisticOp(name = "OFP",
    ...                                    channel = "Pacific_Blue-A",
    ...                                    by = ["IFP_Bin", "CFP_Bin"],
    ...                                    function = flow.geom_mean)
    >>> ex = cfp_bin_op.apply(ex)
    >>> ex = ifp_bin_op.apply(ex)
    >>> ex = ifp_mean.apply(ex)
    >>> ex = ofp_mean.apply(ex)
    >>> view = flow.Stats2DView(name = "IFP vs OFP",
    ...                         variable = "IFP_Bin",
    ...                         xstatistic = ("IFP", "geom_mean"),
    ...                         ystatistic = ("OFP", "geom_mean"),
    ...                         huefacet = "CFP_Bin").plot(ex_ifp_binned)
    >>> view.plot(ex_binned)
    """

    # traits
    id = "edu.mit.synbio.cytoflow.view.stats2d"
    friendly_id = "2D Statistics View"

    # deprecated or removed attributes give warnings & errors, respectively
    by = util.Deprecated(
        new='variable', err_string="'by' is deprecated, please use 'variable'")

    STATS_REMOVED = "{} has been removed. Statistics changed dramatically in 0.5; please see the documentation."

    xchannel = util.Removed(err_string=STATS_REMOVED)
    xfunction = util.Removed(err_string=STATS_REMOVED)
    ychannel = util.Removed(err_string=STATS_REMOVED)
    yfunction = util.Removed(err_string=STATS_REMOVED)

    name = Str
    variable = Str
    xstatistic = Tuple(Str, Str)
    xscale = util.ScaleEnum
    ystatistic = Tuple(Str, Str)
    yscale = util.ScaleEnum

    xfacet = Str
    yfacet = Str
    huefacet = Str
    huescale = util.ScaleEnum

    x_error_statistic = Tuple(Str, Str)
    y_error_statistic = Tuple(Str, Str)

    subset = Str

    def enum_plots(self, experiment):
        """
        Returns an iterator over the possible plots that this View can
        produce.  The values returned can be passed to "plot".
        """

        # TODO - all this is copied from below.  can we abstract it out somehow?

        if not experiment:
            raise util.CytoflowViewError("No experiment specified")

        if not self.variable:
            raise util.CytoflowViewError("variable not set")

        if self.variable not in experiment.conditions:
            raise util.CytoflowViewError(
                "variable {0} not in the experiment".format(self.variable))

        if not self.xstatistic:
            raise util.CytoflowViewError("X statistic not set")

        if self.xstatistic not in experiment.statistics:
            raise util.CytoflowViewError(
                "Can't find X statistic {} in experiment".format(
                    self.ystatistic))
        else:
            xstat = experiment.statistics[self.xstatistic]

        if not self.ystatistic:
            raise util.CytoflowViewError("Y statistic not set")

        if self.ystatistic not in experiment.statistics:
            raise util.CytoflowViewError(
                "Can't find Y statistic {} in experiment".format(
                    self.ystatistic))
        else:
            ystat = experiment.statistics[self.ystatistic]

        if not xstat.index.equals(ystat.index):
            raise util.CytoflowViewError(
                "X statistic and Y statistic must have "
                "the same indices: {}".format(xstat.index.names))

        if self.x_error_statistic[0]:
            if self.x_error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    "X error statistic not in experiment")
            else:
                x_error_stat = experiment.statistics[self.x_error_statistic]

            if not x_error_stat.index.equals(xstat.index):
                raise util.CytoflowViewError(
                    "X error statistic doesn't have the "
                    "same indices as the X statistic")
        else:
            x_error_stat = None

        if self.y_error_statistic[0]:
            if self.y_error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    "Y error statistic not in experiment")
            else:
                y_error_stat = experiment.statistics[self.y_error_statistic]

            if not y_error_stat.index.equals(ystat.index):
                raise util.CytoflowViewError(
                    "Y error statistic doesn't have the "
                    "same indices as the Y statistic")
        else:
            y_error_stat = None

        data = pd.DataFrame(index=xstat.index)

        xname = util.random_string(6)
        data[xname] = xstat

        yname = util.random_string(6)
        data[yname] = ystat

        if x_error_stat is not None:
            #x_error_data = x_error_stat.reset_index()
            x_error_name = util.random_string(6)
            data[x_error_name] = x_error_stat

        if y_error_stat is not None:
            y_error_name = util.random_string(6)
            data[y_error_name] = y_error_stat

        if y_error_stat is not None:
            y_error_data = y_error_stat.reset_index()
            y_error_name = util.random_string()
            data[y_error_name] = y_error_data[y_error_stat.name]

        if self.subset:
            try:
                # TODO - either sanitize column names, or check to see that
                # all conditions are valid Python variables
                data = data.query(self.subset)
            except:
                raise util.CytoflowViewError(
                    "Subset string '{0}' isn't valid".format(self.subset))

            if len(data) == 0:
                raise util.CytoflowViewError(
                    "Subset string '{0}' returned no values".format(
                        self.subset))

        names = list(data.index.names)

        for name in names:
            unique_values = data.index.get_level_values(name).unique()
            if len(unique_values) == 1:
                warn("Only one value for level {}; dropping it.".format(name),
                     util.CytoflowViewWarning)
                try:
                    data.index = data.index.droplevel(name)
                except AttributeError:
                    raise util.CytoflowViewError("Must have more than one "
                                                 "value to plot.")

        names = list(data.index.names)

        if not self.variable in experiment.conditions:
            raise util.CytoflowViewError(
                "Variable {} not in experiment".format(self.variable))

        if not self.variable in data.index.names:
            raise util.CytoflowViewError(
                "Variable {} not in statistic; must be one of {}".format(
                    self.variable, data.index.names))

        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "X facet {} not in the experiment".format(self.xfacet))

        if self.xfacet and self.xfacet not in data.index.names:
            raise util.CytoflowViewError(
                "X facet {} not in statistics; must be one of {}".format(
                    self.xfacet, data.index.names))

        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Y facet {} not in the experiment".format(self.yfacet))

        if self.yfacet and self.yfacet not in data.index.names:
            raise util.CytoflowViewError(
                "Y facet {} not in statistics; must be one of {}".format(
                    self.yfacet, data.index.names))

        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError(
                "Hue facet {} not in the experiment".format(self.huefacet))

        if self.huefacet and self.huefacet not in data.index.names:
            raise util.CytoflowViewError(
                "Hue facet {} not in statistics; must be one of {}".format(
                    self.huefacet, data.index.names))

        facets = filter(
            lambda x: x,
            [self.variable, self.xfacet, self.yfacet, self.huefacet])
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError("Can't reuse facets")

        by = list(set(names) - set(facets))

        class plot_enum(object):
            def __init__(self, experiment, by):
                self._iter = None
                self._returned = False

                if by:
                    self._iter = experiment.data.groupby(by).__iter__()

            def __iter__(self):
                return self

            def next(self):
                if self._iter:
                    return self._iter.next()[0]
                else:
                    if self._returned:
                        raise StopIteration
                    else:
                        self._returned = True
                        return None

        return plot_enum(experiment, by)

    def plot(self, experiment, plot_name=None, **kwargs):
        """Plot a bar chart"""

        if not experiment:
            raise util.CytoflowViewError("No experiment specified")

        if not self.variable:
            raise util.CytoflowViewError("variable not set")

        if self.variable not in experiment.conditions:
            raise util.CytoflowViewError(
                "variable {0} not in the experiment".format(self.variable))

        if not self.xstatistic:
            raise util.CytoflowViewError("X statistic not set")

        if self.xstatistic not in experiment.statistics:
            raise util.CytoflowViewError(
                "Can't find X statistic {} in experiment".format(
                    self.ystatistic))
        else:
            xstat = experiment.statistics[self.xstatistic]

        if not self.ystatistic:
            raise util.CytoflowViewError("Y statistic not set")

        if self.ystatistic not in experiment.statistics:
            raise util.CytoflowViewError(
                "Can't find Y statistic {} in experiment".format(
                    self.ystatistic))
        else:
            ystat = experiment.statistics[self.ystatistic]

        if not xstat.index.equals(ystat.index):
            raise util.CytoflowViewError(
                "X statistic and Y statistic must have "
                "the same indices: {}".format(xstat.index.names))

        if self.x_error_statistic[0]:
            if self.x_error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    "X error statistic not in experiment")
            else:
                x_error_stat = experiment.statistics[self.x_error_statistic]

            if not x_error_stat.index.equals(xstat.index):
                raise util.CytoflowViewError(
                    "X error statistic doesn't have the "
                    "same indices as the X statistic")
        else:
            x_error_stat = None

        if self.y_error_statistic[0]:
            if self.y_error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    "Y error statistic not in experiment")
            else:
                y_error_stat = experiment.statistics[self.y_error_statistic]

            if not y_error_stat.index.equals(ystat.index):
                raise util.CytoflowViewError(
                    "Y error statistic doesn't have the "
                    "same indices as the Y statistic")
        else:
            y_error_stat = None

        col_wrap = kwargs.pop('col_wrap', None)

        if col_wrap and self.yfacet:
            raise util.CytoflowViewError(
                "Can't set yfacet and col_wrap at the same time.")

        data = pd.DataFrame(index=xstat.index)

        xname = util.random_string(6)
        data[xname] = xstat

        yname = util.random_string(6)
        data[yname] = ystat

        if x_error_stat is not None:
            #x_error_data = x_error_stat.reset_index()
            x_error_name = util.random_string(6)
            data[x_error_name] = x_error_stat

        if y_error_stat is not None:
            y_error_name = util.random_string(6)
            data[y_error_name] = y_error_stat

        if y_error_stat is not None:
            y_error_data = y_error_stat.reset_index()
            y_error_name = util.random_string()
            data[y_error_name] = y_error_data[y_error_stat.name]

        if self.subset:
            try:
                # TODO - either sanitize column names, or check to see that
                # all conditions are valid Python variables
                data = data.query(self.subset)
            except:
                raise util.CytoflowViewError(
                    "Subset string '{0}' isn't valid".format(self.subset))

            if len(data) == 0:
                raise util.CytoflowViewError(
                    "Subset string '{0}' returned no values".format(
                        self.subset))

        names = list(data.index.names)
        for name in names:
            unique_values = data.index.get_level_values(name).unique()
            if len(unique_values) == 1:
                warn("Only one value for level {}; dropping it.".format(name),
                     util.CytoflowViewWarning)
                try:
                    data.index = data.index.droplevel(name)
                except AttributeError:
                    raise util.CytoflowViewError("Must have more than one "
                                                 "value to plot.")

        names = list(data.index.names)

        if not self.variable in experiment.conditions:
            raise util.CytoflowViewError(
                "Variable {} not in experiment".format(self.variable))

        if not self.variable in data.index.names:
            raise util.CytoflowViewError(
                "Variable {} not in statistic; must be one of {}".format(
                    self.variable, data.index.names))

        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "X facet {} not in the experiment".format(self.xfacet))

        if self.xfacet and self.xfacet not in data.index.names:
            raise util.CytoflowViewError(
                "X facet {} not in statistics; must be one of {}".format(
                    self.xfacet, data.index.names))

        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Y facet {} not in the experiment".format(self.yfacet))

        if self.yfacet and self.yfacet not in data.index.names:
            raise util.CytoflowViewError(
                "Y facet {} not in statistics; must be one of {}".format(
                    self.yfacet, data.index.names))

        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError(
                "Hue facet {} not in the experiment".format(self.huefacet))

        if self.huefacet and self.huefacet not in data.index.names:
            raise util.CytoflowViewError(
                "Hue facet {} not in statistics; must be one of {}".format(
                    self.huefacet, data.index.names))

        col_wrap = kwargs.pop('col_wrap', None)

        if col_wrap and self.yfacet:
            raise util.CytoflowViewError(
                "Can't set yfacet and col_wrap at the same time.")

        if col_wrap and not self.xfacet:
            raise util.CytoflowViewError("Must set xfacet to use col_wrap.")

        facets = filter(
            lambda x: x,
            [self.variable, self.xfacet, self.yfacet, self.huefacet])
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError("Can't reuse facets")

        unused_names = list(set(names) - set(facets))

        if plot_name is not None and not unused_names:
            raise util.CytoflowViewError("You specified a plot name, but all "
                                         "the facets are already used")

        data.reset_index(inplace=True)

        if unused_names:
            groupby = data.groupby(unused_names)

            if plot_name is None:
                raise util.CytoflowViewError(
                    "You must use facets {} in either the "
                    "plot variables or the plot name. "
                    "Possible plot names: {}".format(unused_names,
                                                     groupby.groups.keys()))

            if plot_name not in set(groupby.groups.keys()):
                raise util.CytoflowViewError(
                    "Plot {} not from plot_enum; must "
                    "be one of {}".format(plot_name, groupby.groups.keys()))

            data = groupby.get_group(plot_name)

        if self.x_error_statistic is not None:
            xscale = util.scale_factory(self.xscale,
                                        experiment,
                                        statistic=self.xstatistic)
        else:
            xscale = util.scale_factory(self.xscale,
                                        experiment,
                                        statistic=self.x_error_statistic)

        if self.y_error_statistic is not None:
            yscale = util.scale_factory(self.yscale,
                                        experiment,
                                        statistic=self.ystatistic)
        else:
            yscale = util.scale_factory(self.yscale,
                                        experiment,
                                        statistic=self.y_error_statistic)

        xlim = kwargs.pop("xlim", None)
        if xlim is None:
            xlim = (xscale.clip(data[xname].min() * 0.9),
                    xscale.clip(data[xname].max() * 1.1))

            if x_error_stat is not None:
                try:
                    xlim = (xscale.clip(
                        min([x[0] for x in x_error_stat]) * 0.9),
                            xscale.clip(
                                max([x[1] for x in x_error_stat]) * 1.1))
                except IndexError:
                    xlim = (xscale.clip(x_error_stat.min() * 0.9),
                            xscale.clip(x_error_stat.max() * 1.1))

        ylim = kwargs.pop("ylim", None)
        if ylim is None:
            ylim = (yscale.clip(data[yname].min() * 0.9),
                    yscale.clip(data[yname].max() * 1.1))

            if y_error_stat is not None:
                try:
                    ylim = (yscale.clip(
                        min([x[0] for x in y_error_stat]) * 0.9),
                            yscale.clip(
                                max([x[1] for x in y_error_stat]) * 1.1))
                except IndexError:
                    ylim = (yscale.clip(y_error_stat.min() * 0.9),
                            yscale.clip(y_error_stat.max() * 1.1))

        kwargs.setdefault('antialiased', True)

        cols = col_wrap if col_wrap else \
               len(data[self.xfacet].unique()) if self.xfacet else 1

        sharex = kwargs.pop('sharex', True)
        sharey = kwargs.pop('sharey', True)

        grid = sns.FacetGrid(data,
                             size=(6 / cols),
                             aspect=1.5,
                             col=(self.xfacet if self.xfacet else None),
                             row=(self.yfacet if self.yfacet else None),
                             hue=(self.huefacet if self.huefacet else None),
                             col_order=(np.sort(data[self.xfacet].unique())
                                        if self.xfacet else None),
                             row_order=(np.sort(data[self.yfacet].unique())
                                        if self.yfacet else None),
                             hue_order=(np.sort(data[self.huefacet].unique())
                                        if self.huefacet else None),
                             col_wrap=col_wrap,
                             legend_out=False,
                             sharex=sharex,
                             sharey=sharey,
                             xlim=xlim,
                             ylim=ylim)

        for ax in grid.axes.flatten():
            ax.set_xscale(self.xscale, **xscale.mpl_params)
            ax.set_yscale(self.yscale, **yscale.mpl_params)

        # plot the error bars first so the axis labels don't get overwritten
        if x_error_stat:
            grid.map(_x_error_bars, xname, yname, x_error_name)

        if y_error_stat:
            grid.map(_y_error_bars, xname, yname, y_error_name)

        grid.map(plt.plot, xname, yname, **kwargs)

        # if we have an xfacet, make sure the y scale is the same for each
        fig = plt.gcf()
        fig_y_min = float("inf")
        fig_y_max = float("-inf")
        for ax in fig.get_axes():
            ax_y_min, ax_y_max = ax.get_ylim()
            if ax_y_min < fig_y_min:
                fig_y_min = ax_y_min
            if ax_y_max > fig_y_max:
                fig_y_max = ax_y_max

        for ax in fig.get_axes():
            ax.set_ylim(fig_y_min, fig_y_max)

        # if we have a yfacet, make sure the x scale is the same for each
        fig = plt.gcf()
        fig_x_min = float("inf")
        fig_x_max = float("-inf")

        for ax in fig.get_axes():
            ax_x_min, ax_x_max = ax.get_xlim()
            if ax_x_min < fig_x_min:
                fig_x_min = ax_x_min
            if ax_x_max > fig_x_max:
                fig_x_max = ax_x_max

        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.

        if self.huefacet:
            current_palette = mpl.rcParams['axes.color_cycle']
            if util.is_numeric(experiment.data[self.huefacet]) and \
               len(grid.hue_names) > len(current_palette):

                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(
                    sns.color_palette("husl", n_colors=len(grid.hue_names)))
                cax, _ = mpl.colorbar.make_axes(plt.gca())
                hue_scale = util.scale_factory(self.huescale,
                                               experiment,
                                               condition=self.huefacet)
                mpl.colorbar.ColorbarBase(cax,
                                          cmap=cmap,
                                          norm=hue_scale.color_norm(),
                                          label=self.huefacet)
                plt.sca(plot_ax)
            else:
                grid.add_legend(title=self.huefacet)

        plt.xlabel(self.xstatistic)
        plt.ylabel(self.ystatistic)

        if unused_names and plot_name is not None:
            plt.title("{0} = {1}".format(unused_names, plot_name))
예제 #9
0
파일: table.py 프로젝트: cehnjun/cytoflow
class TableView(HasStrictTraits):
    """
    "Plot" a tabular view of a statistic.  Mostly useful for GUIs.  Each level 
    of the statistic's index must be used in :attr:`row_facet`, 
    :attr:`column_facet`, :attr:`subrow_facet`, or :attr:`subcolumn_facet`.
    This module can't "plot" a statistic with more than four index levels
    unless :attr:`subset` is set and that results in extra levels being 
    dropped.
    
    Attributes
    ----------
    statistic : (str, str)
        The name of the statistic to plot.  Must be a key in the  
        :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment`
        being plotted.  Each level of the statistic's index must be used 
        in :attr:`row_facet`, :attr:`column_facet`, :attr:`subrow_facet`, or
        :attr:`subcolumn_facet`.
        
    row_facet, column_facet : str
        The statistic facets to be used as row and column headers.
        
    subrow_facet, subcolumn_facet : str
        The statistic facets to be used as subrow and subcolumn headers.
        
    subset : str
        A Python expression used to select a subset of the statistic to plot.
        
    Examples
    --------
    
    Make a little data set.
    
    .. plot::
        :context: close-figs
            
        >>> import cytoflow as flow
        >>> import_op = flow.ImportOp()
        >>> import_op.tubes = [flow.Tube(file = "Plate01/RFP_Well_A3.fcs",
        ...                              conditions = {'Dox' : 10.0}),
        ...                    flow.Tube(file = "Plate01/CFP_Well_A4.fcs",
        ...                              conditions = {'Dox' : 1.0})]
        >>> import_op.conditions = {'Dox' : 'float'}
        >>> ex = import_op.apply()
        
    Add a threshold gate
    
    .. plot::
        :context: close-figs
    
        >>> ex2 = flow.ThresholdOp(name = 'Threshold',
        ...                        channel = 'Y2-A',
        ...                        threshold = 2000).apply(ex)
        
    Add a statistic
    
    .. plot::
        :context: close-figs

        >>> ex3 = flow.ChannelStatisticOp(name = "ByDox",
        ...                               channel = "Y2-A",
        ...                               by = ['Dox', 'Threshold'],
        ...                               function = len).apply(ex2) 
    
    "Plot" the table
    
    .. plot::
        :context: close-figs
        
        >>> flow.TableView(statistic = ("ByDox", "len"),
        ...                row_facet = "Dox",
        ...                column_facet = "Threshold").plot(ex3)
        
    """

    # traits   
    id = Constant("edu.mit.synbio.cytoflow.view.table")
    friendly_id = Constant("Table View") 

    REMOVED_ERROR = Constant("Statistics have changed dramatically in 0.5; please see the documentation")
    channel = util.Removed(err_string = REMOVED_ERROR)
    function = util.Removed(err_string = REMOVED_ERROR)
    
    statistic = Tuple(Str, Str)
    row_facet = Str
    subrow_facet = Str
    column_facet = Str
    subcolumn_facet = Str
    
    subset = Str

    def plot(self, experiment, plot_name = None, **kwargs):
        """Plot a table"""
        
        if experiment is None:
            raise util.CytoflowViewError('experiment', "No experiment specified")   
        
        if self.statistic not in experiment.statistics:
            raise util.CytoflowViewError('statistic', 
                                         "Can't find the statistic {} in the experiment"
                                         .format(self.statistic))
        else:
            stat = experiment.statistics[self.statistic]    
            
        data = pd.DataFrame(index = stat.index)
        data[stat.name] = stat   
        
        if self.subset:
            try:
                data = data.query(self.subset)
            except Exception as e:
                raise util.CytoflowViewError('subset',
                                             "Subset string '{0}' isn't valid"
                                             .format(self.subset)) from e
                
            if len(data) == 0:
                raise util.CytoflowViewError('subset',
                                             "Subset string '{0}' returned no values"
                                             .format(self.subset))
            
        names = list(data.index.names)
        for name in names:
            unique_values = data.index.get_level_values(name).unique()
            if len(unique_values) == 1:
                warn("Only one value for level {}; dropping it.".format(name),
                     util.CytoflowViewWarning)
                try:
                    data.index = data.index.droplevel(name)
                except AttributeError as e:
                    raise util.CytoflowViewError(None,
                                                 "Must have more than one "
                                                 "value to plot.") from e
        
        if not (self.row_facet or self.column_facet):
            raise util.CytoflowViewError('row_facet',
                                         "Must set at least one of row_facet "
                                         "or column_facet")
            
        if self.subrow_facet and not self.row_facet:
            raise util.CytoflowViewError('subrow_facet',
                                         "Must set row_facet before using "
                                         "subrow_facet")
            
        if self.subcolumn_facet and not self.column_facet:
            raise util.CytoflowViewError('subcolumn_facet',
                                         "Must set column_facet before using "
                                         "subcolumn_facet")
            
        if self.row_facet and self.row_facet not in experiment.conditions:
            raise util.CytoflowViewError('row_facet',
                                         "Row facet {} not in the experiment, "
                                         "must be one of {}"
                                         .format(self.row_facet, experiment.conditions))        

        if self.row_facet and self.row_facet not in data.index.names:
            raise util.CytoflowViewError('row_facet',
                                         "Row facet {} not a statistic index; "
                                         "must be one of {}"
                                         .format(self.row_facet, data.index.names))  
            
        if self.subrow_facet and self.subrow_facet not in experiment.conditions:
            raise util.CytoflowViewError('subrow_facet',
                                         "Subrow facet {} not in the experiment, "
                                         "must be one of {}"
                                         .format(self.subrow_facet, experiment.conditions))  
            
        if self.subrow_facet and self.subrow_facet not in data.index.names:
            raise util.CytoflowViewError('subrow_facet',
                                         "Subrow facet {} not a statistic index; "
                                         "must be one of {}"
                                         .format(self.subrow_facet, data.index.names))  
            
        if self.column_facet and self.column_facet not in experiment.conditions:
            raise util.CytoflowViewError('column_facet',
                                         "Column facet {} not in the experiment, "
                                         "must be one of {}"
                                         .format(self.column_facet, experiment.conditions))  
            
        if self.column_facet and self.column_facet not in data.index.names:
            raise util.CytoflowViewError('column_facet',
                                         "Column facet {} not a statistic index; "
                                         "must be one of {}"
                                         .format(self.column_facet, data.index.names)) 
            
        if self.subcolumn_facet and self.subcolumn_facet not in experiment.conditions:
            raise util.CytoflowViewError('subcolumn_facet',
                                         "Subcolumn facet {} not in the experiment, "
                                         "must be one of {}"
                                         .format(self.subcolumn_facet, experiment.conditions))  
            
        if self.subcolumn_facet and self.subcolumn_facet not in data.index.names:
            raise util.CytoflowViewError('subcolumn_facet',
                                         "Subcolumn facet {} not a statistic index; "
                                         "must be one of {}"
                                         .format(self.subcolumn_facet, data.index.names))  

        facets = [x for x in [self.row_facet, self.subrow_facet, 
                                      self.column_facet, self.subcolumn_facet] if x]
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError(None, 
                                         "Can't reuse facets")
        
        if set(facets) != set(data.index.names):
            raise util.CytoflowViewError(None,
                                         "Must use all the statistic indices as variables or facets: {}"
                                         .format(data.index.names))
            
        row_groups = data.index.get_level_values(self.row_facet).unique() \
                     if self.row_facet else [None]
                     
        subrow_groups = data.index.get_level_values(self.subrow_facet).unique() \
                        if self.subrow_facet else [None] 
        
        col_groups = data.index.get_level_values(self.column_facet).unique() \
                     if self.column_facet else [None]
                     
        subcol_groups = data.index.get_level_values(self.subcolumn_facet).unique() \
                        if self.subcolumn_facet else [None]

        row_offset = (self.column_facet != "") + (self.subcolumn_facet != "")        
        col_offset = (self.row_facet != "") + (self.subrow_facet != "")
        
        num_cols = len(col_groups) * len(subcol_groups) + col_offset
        
        fig = plt.figure()
        ax = fig.add_subplot(111)
        
        # hide the plot axes that matplotlib tries to make
        ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)
        for sp in ax.spines.values():
            sp.set_color('w')
            sp.set_zorder(0)
        
        loc = 'upper left'
        bbox = None
        
        t = Table(ax, loc, bbox, **kwargs)
        t.auto_set_font_size(False)
        for c in range(num_cols):
            t.auto_set_column_width(c)

        width = [0.2] * num_cols

        height = t._approx_text_height() * 1.8
         
        # make the main table       
        for (ri, r) in enumerate(row_groups):
            for (rri, rr) in enumerate(subrow_groups):
                for (ci, c) in enumerate(col_groups):
                    for (cci, cc) in enumerate(subcol_groups):
                        row_idx = ri * len(subrow_groups) + rri + row_offset
                        col_idx = ci * len(subcol_groups) + cci + col_offset
                        
                        # this is not pythonic, but i'm tired
                        agg_idx = []
                        for data_idx in data.index.names:
                            if data_idx == self.row_facet:
                                agg_idx.append(r)
                            elif data_idx == self.subrow_facet:
                                agg_idx.append(rr)
                            elif data_idx == self.column_facet:
                                agg_idx.append(c)
                            elif data_idx == self.subcolumn_facet:
                                agg_idx.append(cc)
                        
                        agg_idx = tuple(agg_idx)
                        if len(agg_idx) == 1:
                            agg_idx = agg_idx[0]
                            
                        try:
                            text = "{:g}".format(data.loc[agg_idx][stat.name])
                        except ValueError:
                            text = data.loc[agg_idx][stat.name]
                        t.add_cell(row_idx, 
                                   col_idx,
                                   width = width[col_idx],
                                   height = height,
                                   text = text)
                        
        # row headers
        if self.row_facet:
            for (ri, r) in enumerate(row_groups):
                row_idx = ri * len(subrow_groups) + row_offset
                try:
                    text = "{0} = {1:g}".format(self.row_facet, r)
                except ValueError:
                    text = "{0} = {1}".format(self.row_facet, r)
                t.add_cell(row_idx,
                           0,
                           width = width[0],
                           height = height,
                           text = text)
                
        # subrow headers
        if self.subrow_facet:
            for (ri, r) in enumerate(row_groups):
                for (rri, rr) in enumerate(subrow_groups):
                    row_idx = ri * len(subrow_groups) + rri + row_offset
                    try:
                        text = "{0} = {1:g}".format(self.subrow_facet, rr)
                    except ValueError:
                        text = "{0} = {1}".format(self.subrow_facet, rr)
                        
                    t.add_cell(row_idx,
                               1,
                               width = width[1],
                               height = height,
                               text = text)
                    
        # column headers
        if self.column_facet:
            for (ci, c) in enumerate(col_groups):
                col_idx = ci * len(subcol_groups) + col_offset
                try:
                    text = "{0} = {1:g}".format(self.column_facet, c)
                except ValueError:
                    text = "{0} = {1}".format(self.column_facet, c)
                t.add_cell(0,
                           col_idx,
                           width = width[col_idx],
                           height = height,
                           text = text)

        # subcolumn headers
        if self.subcolumn_facet:
            for (ci, c) in enumerate(col_groups):
                for (cci, cc) in enumerate(subcol_groups):
                    col_idx = ci * len(subcol_groups) + cci + col_offset
                    try:
                        text = "{0} = {1:g}".format(self.subcolumn_facet, cc)
                    except ValueError:
                        text = "{0} = {1}".format(self.subcolumn_facet, cc)
                    t.add_cell(1,
                               col_idx,
                               width = width[col_idx],
                               height = height,
                               text = text)                
                        
        ax.add_table(t)
예제 #10
0
class TableView(HasStrictTraits):

    # traits
    id = "edu.mit.synbio.cytoflow.view.table"
    friendly_id = "Table View"

    REMOVED_ERROR = "Statistics have changed dramatically in 0.5; please see the documentation"
    channel = util.Removed(err_string=REMOVED_ERROR)
    function = util.Removed(err_string=REMOVED_ERROR)

    name = Str
    statistic = Tuple(Str, Str)
    row_facet = Str
    subrow_facet = Str
    column_facet = Str
    subcolumn_facet = Str

    subset = Str

    def plot(self, experiment, plot_name=None, **kwargs):
        """Plot a table"""

        if not experiment:
            raise util.CytoflowViewError("No experiment specified")

        if self.statistic not in experiment.statistics:
            raise util.CytoflowViewError(
                "Can't find the statistic {} in the experiment".format(
                    self.statistic))
        else:
            stat = experiment.statistics[self.statistic]

        data = pd.DataFrame(index=stat.index)
        data[stat.name] = stat

        if self.subset:
            try:
                data = data.query(self.subset)
            except:
                raise util.CytoflowViewError(
                    "Subset string '{0}' isn't valid".format(self.subset))

            if len(data) == 0:
                raise util.CytoflowViewError(
                    "Subset string '{0}' returned no values".format(
                        self.subset))

        names = list(data.index.names)
        for name in names:
            unique_values = data.index.get_level_values(name).unique()
            if len(unique_values) == 1:
                warn("Only one value for level {}; dropping it.".format(name),
                     util.CytoflowViewWarning)
                try:
                    data.index = data.index.droplevel(name)
                except AttributeError:
                    raise util.CytoflowViewError("Must have more than one "
                                                 "value to plot.")

        if not (self.row_facet or self.column_facet):
            raise util.CytoflowViewError("Must set at least one of row_facet "
                                         "or column_facet")

        if self.subrow_facet and not self.row_facet:
            raise util.CytoflowViewError("Must set row_facet before using "
                                         "subrow_facet")

        if self.subcolumn_facet and not self.column_facet:
            raise util.CytoflowViewError("Must set column_facet before using "
                                         "subcolumn_facet")

        if self.row_facet and self.row_facet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Row facet {} not in the experiment".format(self.row_facet))

        if self.row_facet and self.row_facet not in data.index.names:
            raise util.CytoflowViewError("Row facet {} not a statistic index; "
                                         "must be one of {}".format(
                                             self.row_facet, data.index.names))

        if self.subrow_facet and self.subrow_facet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Subrow facet {} not in the experiment".format(
                    self.subrow_facet))

        if self.subrow_facet and self.subrow_facet not in data.index.names:
            raise util.CytoflowViewError(
                "Subrow facet {} not a statistic index; "
                "must be one of {}".format(self.subrow_facet,
                                           data.index.names))

        if self.column_facet and self.column_facet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Column facet {} not in the experiment".format(
                    self.column_facet))

        if self.column_facet and self.column_facet not in data.index.names:
            raise util.CytoflowViewError(
                "Column facet {} not a statistic index; "
                "must be one of {}".format(self.column_facet,
                                           data.index.names))

        if self.subcolumn_facet and self.subcolumn_facet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Subcolumn facet {} not in the experiment".format(
                    self.subcolumn_facet))

        if self.subcolumn_facet and self.subcolumn_facet not in data.index.names:
            raise util.CytoflowViewError(
                "Subcolumn facet {} not a statistic index; "
                "must be one of {}".format(self.subcolumn_facet,
                                           data.index.names))

        facets = filter(lambda x: x, [
            self.row_facet, self.subrow_facet, self.column_facet,
            self.subcolumn_facet
        ])
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError("Can't reuse facets")

        if set(facets) != set(data.index.names):
            raise util.CytoflowViewError(
                "Must use all the statistic indices as variables or facets: {}"
                .format(data.index.names))

        row_groups = data.index.get_level_values(self.row_facet).unique() \
                     if self.row_facet else [None]

        subrow_groups = data.index.get_level_values(self.subrow_facet).unique() \
                        if self.subrow_facet else [None]

        col_groups = data.index.get_level_values(self.column_facet).unique() \
                     if self.column_facet else [None]

        subcol_groups = data.index.get_level_values(self.subcolumn_facet).unique() \
                        if self.subcolumn_facet else [None]

        row_offset = (self.column_facet != "") + (self.subcolumn_facet != "")
        col_offset = (self.row_facet != "") + (self.subrow_facet != "")

        num_cols = len(col_groups) * len(subcol_groups) + col_offset

        fig = plt.figure()
        ax = fig.add_subplot(111)

        # hide the plot axes that matplotlib tries to make
        ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)
        for sp in ax.spines.itervalues():
            sp.set_color('w')
            sp.set_zorder(0)

        loc = 'best'
        bbox = None

        t = Table(ax, loc, bbox, **kwargs)
        width = [1.0 / num_cols] * num_cols
        height = t._approx_text_height() * 1.8

        # make the main table
        for (ri, r) in enumerate(row_groups):
            for (rri, rr) in enumerate(subrow_groups):
                for (ci, c) in enumerate(col_groups):
                    for (cci, cc) in enumerate(subcol_groups):
                        row_idx = ri * len(subrow_groups) + rri + row_offset
                        col_idx = ci * len(subcol_groups) + cci + col_offset

                        # this is not pythonic, but i'm tired
                        agg_idx = []
                        for data_idx in data.index.names:
                            if data_idx == self.row_facet:
                                agg_idx.append(r)
                            elif data_idx == self.subrow_facet:
                                agg_idx.append(rr)
                            elif data_idx == self.column_facet:
                                agg_idx.append(c)
                            elif data_idx == self.subcolumn_facet:
                                agg_idx.append(cc)

                        agg_idx = tuple(agg_idx)
                        if len(agg_idx) == 1:
                            agg_idx = agg_idx[0]

                        t.add_cell(row_idx,
                                   col_idx,
                                   width=width[col_idx],
                                   height=height,
                                   text=data.loc[agg_idx][stat.name])

        # row headers
        if self.row_facet:
            for (ri, r) in enumerate(row_groups):
                row_idx = ri * len(subrow_groups) + row_offset
                text = "{0} = {1}".format(self.row_facet, r)
                t.add_cell(row_idx,
                           0,
                           width=width[0],
                           height=height,
                           text=text)

        # subrow headers
        if self.subrow_facet:
            for (ri, r) in enumerate(row_groups):
                for (rri, rr) in enumerate(subrow_groups):
                    row_idx = ri * len(subrow_groups) + rri + row_offset
                    text = "{0} = {1}".format(self.subrow_facet, rr)
                    t.add_cell(row_idx,
                               1,
                               width=width[1],
                               height=height,
                               text=text)

        # column headers
        if self.column_facet:
            for (ci, c) in enumerate(col_groups):
                col_idx = ci * len(subcol_groups) + col_offset
                text = "{0} = {1}".format(self.column_facet, c)
                t.add_cell(0,
                           col_idx,
                           width=width[col_idx],
                           height=height,
                           text=text)

        # column headers
        if self.subcolumn_facet:
            for (ci, c) in enumerate(col_groups):
                for (cci, cc) in enumerate(subcol_groups):
                    col_idx = ci * len(subcol_groups) + cci + col_offset
                    text = "{0} = {1}".format(self.subcolumn_facet, c)
                    t.add_cell(1,
                               col_idx,
                               width=width[col_idx],
                               height=height,
                               text=text)

        ax.add_table(t)
예제 #11
0
파일: stats_1d.py 프로젝트: wangjs/cytoflow
class Stats1DView(Base1DStatisticsView):
    """
    Plot a statistic.  The value of the statistic will be plotted on the
    Y axis; a numeric conditioning variable must be chosen for the X axis.
    Every variable in the statistic must be specified as either the `variable`
    or one of the plot facets.
    
    Attributes
    ----------
    name : Str
        The plot's name 
        
    statistic : Tuple(Str, Str)
        The statistic to plot.  The first element is the name of the module that
        added the statistic, and the second element is the name of the statistic.
    
    variable : Str
        the name of the conditioning variable to put on the X axis.  Must be
        numeric (float or int).
        
    xscale : Enum("linear", "log") (default = "linear")
        The scale to use on the X axis
        
    yscale : Enum("linear", "log", "logicle") (default = "linear")
        The scale to use on the Y axis
        
    xfacet : Str
        the conditioning variable for horizontal subplots
        
    yfacet : Str
        the conditioning variable for vertical subplots
        
    huefacet : 
        the conditioning variable for color.
        
    huescale :
        the scale to use on the "hue" axis, if there are many values of
        the hue facet.
        
    error_statistic : Tuple(Str, Str)
        A statistic to use to draw error bars; the bars are +- the value of
        the statistic.
        
    subset : String
        Passed to pandas.DataFrame.query(), to get a subset of the statistic
        before we plot it.

        
    Examples
    --------
    
    Assume we want a Dox induction curve in a transient transfection experiment.  
    We have induced several wells with different amounts of Dox and the output
    of the Dox-inducible channel is "Pacific Blue-A".  We have a constitutive
    expression channel in "PE-Tx-Red-YG-A". We want to bin all the data by
    constitutive expression level, then plot the dose-response (geometric mean)
    curve in each bin. 
    
    >>> ex_bin = flow.BinningOp(name = "CFP_Bin",
    ...                         channel = "PE-Tx-Red-YG-A",
    ...                         scale = "log",
    ...                         bin_width = 0.1).apply(ex)
    >>> ex_stat = flow.ChannelStatisticOp(name = "DoxCFP",
    ...                                   by = ["Dox", "CFP_Bin"],
    ...                                   channel = "Pacific Blue-A",
    ...                                   function = flow.geom_mean).apply(ex_bin)
    >>> view = flow.Stats1DView(name = "Dox vs IFP",
    ...                         statistic = ("DoxCFP", "geom_mean"),
    ...                         variable = "Dox",
    ...                         xscale = "log",
    ...                         huefacet = "CFP_Bin").plot(ex_stat)
    >>> view.plot(ex_stat)
    """

    # traits
    id = "edu.mit.synbio.cytoflow.view.stats1d"
    friendly_id = "1D Statistics View"

    REMOVED_ERROR = "Statistics changed dramatically in 0.5; please see the documentation"
    by = util.Removed(err_string=REMOVED_ERROR)
    yfunction = util.Removed(err_string=REMOVED_ERROR)
    ychannel = util.Removed(err_string=REMOVED_ERROR)
    xvariable = util.Deprecated(new="variable")

    def enum_plots(self, experiment):
        """
        Returns an iterator over the possible plots that this View can
        produce.  The values returned can be passed to "plot".
        """

        return super().enum_plots(experiment)

    def plot(self, experiment, plot_name=None, **kwargs):
        """Plot a chart of a variable's values against a statistic.
        
        Parameters
        ----------
        
        color : a matplotlib color
            The color to plot with.  Overridden if `huefacet` is not `None`
            
        linestyle : ['solid' | 'dashed', 'dashdot', 'dotted' | (offset, on-off-dash-seq) | '-' | '--' | '-.' | ':' | 'None' | ' ' | '']
            
        marker : a matplotlib marker style
            See http://matplotlib.org/api/markers_api.html#module-matplotlib.markers
            
        markersize : int
            The marker size in points
            
        markerfacecolor : a matplotlib color
            The color to make the markers.  Overridden (?) if `huefacet` is not `None`
            
        alpha : the alpha blending value, from 0.0 (transparent) to 1.0 (opaque)
        
        Other Parameters
        ----------------
        
        Other `kwargs` are passed to matplotlib.pyplot.plot_.
    
        .. _matplotlib.pyplot.hist: https://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.plot.html
        
        See Also
        --------
        BaseView.plot : common parameters for data views
        
        """

        super().plot(experiment, plot_name, **kwargs)

    def _grid_plot(self, experiment, grid, xlim, ylim, xscale, yscale,
                   **kwargs):

        data = grid.data

        stat = experiment.statistics[self.statistic]
        stat_name = stat.name
        if self.error_statistic[0]:
            err_stat = experiment.statistics[self.error_statistic]
            err_stat_name = err_stat.name

        xlim = kwargs.pop("xlim", None)
        if xlim is None:
            xlim = (xscale.clip(data[self.variable].min() * 0.9),
                    xscale.clip(data[self.variable].max() * 1.1))

        ylim = kwargs.pop("ylim", None)
        if ylim is None:
            ylim = (yscale.clip(data[stat_name].min() * 0.9),
                    yscale.clip(data[stat_name].max() * 1.1))

            if self.error_statistic[0]:
                try:
                    ylim = (yscale.clip(
                        min([x[0] for x in data[err_stat_name]]) * 0.9),
                            yscale.clip(
                                max([x[1]
                                     for x in data[err_stat_name]]) * 1.1))
                except IndexError:
                    ylim = (yscale.clip(
                        (data[stat_name].min() - data[err_stat_name].min()) *
                        0.9),
                            yscale.clip((data[stat_name].max() +
                                         data[err_stat_name].max()) * 1.1))

        # plot the error bars first so the axis labels don't get overwritten
        if self.error_statistic[0]:
            grid.map(_error_bars, self.variable, stat_name, err_stat_name,
                     **kwargs)

        grid.map(plt.plot, self.variable, stat_name, **kwargs)

        return {'xlim': xlim, 'ylim': ylim}
예제 #12
0
class BarChartView(HasStrictTraits):
    """Plots a bar chart of some summary statistic
    
    Attributes
    ----------
    name : Str
        The bar chart's name 
    
    statistic : Tuple(Str, Str)
        the statistic we're plotting
        
    scale : Enum("linear", "log", "logicle") (default = "linear")
        The scale to use on the Y axis.
        
    variable : Str
        the name of the conditioning variable to group the chart's bars
        
    error_statistic : Tuple(Str, Str)
        if specified, a statistic to draw error bars.  if values are numeric,
        the bars are drawn +/- the value.  if the values are tuples, then
        the first element is the low error and the second element is the
        high error.
        
    xfacet : Str
        the conditioning variable for horizontal subplots
        
    yfacet : Str
        the conditioning variable for vertical subplots
        
    huefacet : Str
        the conditioning variable to make multiple bar colors
        
    orientation : Enum("horizontal", "vertical")
        do we plot the bar chart horizontally or vertically?
        TODO - waiting on seaborn v0.6
        
    subset : String
        Passed to pandas.DataFrame.query(), to get a subset of the statistic
        before we plot it.
        
    Examples
    --------
    >>> bar = flow.BarChartView()
    >>> bar.name = "Bar Chart"
    >>> bar.channel = 'Y2-A'
    >>> bar.variable = 'Y2-A+'
    >>> bar.huefacet = 'Dox'
    >>> bar.function = len
    >>> bar.plot(ex)
    """

    # traits
    id = "edu.mit.synbio.cytoflow.view.barchart"
    friendly_id = "Bar Chart"

    REMOVED_ERROR = "Statistics have changed dramatically in 0.5; please see the documentation"
    channel = util.Removed(err_string=REMOVED_ERROR)
    function = util.Removed(err_string=REMOVED_ERROR)
    error_bars = util.Removed(err_string=REMOVED_ERROR)

    by = util.Deprecated(new='variable')

    name = Str
    statistic = Tuple(Str, Str)
    scale = util.ScaleEnum
    variable = Str
    orientation = Enum("vertical", "horizontal")

    xfacet = Str
    yfacet = Str
    huefacet = Str

    error_statistic = Tuple(Str, Str)
    subset = Str

    def enum_plots(self, experiment):
        """
        Returns an iterator over the possible plots that this View can
        produce.  The values returned can be passed to "plot".
        """

        # TODO - all this is copied from below.  can we abstract it out somehow?

        if not experiment:
            raise util.CytoflowViewError("No experiment specified")

        if self.statistic not in experiment.statistics:
            raise util.CytoflowViewError(
                "Can't find the statistic {} in the experiment".format(
                    self.statistic))
        else:
            stat = experiment.statistics[self.statistic]

        if self.error_statistic[0]:
            if self.error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    "Can't find the error statistic in the experiment")
            else:
                error_stat = experiment.statistics[self.error_statistic]
        else:
            error_stat = None

        if error_stat is not None:
            if not stat.index.equals(error_stat.index):
                raise util.CytoflowViewError(
                    "Data statistic and error statistic "
                    " don't have the same index.")

        data = pd.DataFrame(index=stat.index)

        data[stat.name] = stat

        if error_stat is not None:
            error_name = util.random_string(6)
            data[error_name] = error_stat
        else:
            error_name = None

        if self.subset:
            try:
                data = data.query(self.subset)
            except:
                raise util.CytoflowViewError(
                    "Subset string '{0}' isn't valid".format(self.subset))

            if len(data) == 0:
                raise util.CytoflowViewError(
                    "Subset string '{0}' returned no values".format(
                        self.subset))

        names = list(data.index.names)

        for name in names:
            unique_values = data.index.get_level_values(name).unique()
            if len(unique_values) == 1:
                warn("Only one value for level {}; dropping it.".format(name),
                     util.CytoflowViewWarning)
                try:
                    data.index = data.index.droplevel(name)
                except AttributeError:
                    raise util.CytoflowViewError("Must have more than one "
                                                 "value to plot.")

        names = list(data.index.names)

        if not self.variable:
            raise util.CytoflowViewError("variable not specified")

        if not self.variable in data.index.names:
            raise util.CytoflowViewError("Variable {} isn't in the statistic; "
                                         "must be one of {}".format(
                                             self.variable, data.index.names))

        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "X facet {0} isn't in the experiment".format(self.xfacet))

        if self.xfacet and self.xfacet not in data.index.names:
            raise util.CytoflowViewError(
                "X facet {} is not a statistic index; "
                "must be one of {}".format(self.xfacet, data.index.names))

        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Y facet {0} isn't in the experiment".format(self.yfacet))

        if self.yfacet and self.yfacet not in data.index.names:
            raise util.CytoflowViewError(
                "Y facet {} is not a statistic index; "
                "must be one of {}".format(self.yfacet, data.index.names))

        if self.huefacet and self.huefacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Hue facet {0} isn't in the experiment".format(self.huefacet))

        if self.huefacet and self.huefacet not in data.index.names:
            raise util.CytoflowViewError(
                "Hue facet {} is not a statistic index; "
                "must be one of {}".format(self.huefacet, data.index.names))

        facets = filter(
            lambda x: x,
            [self.variable, self.xfacet, self.yfacet, self.huefacet])
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError("Can't reuse facets")

        by = list(set(names) - set(facets))

        class plot_enum(object):
            def __init__(self, experiment, by):
                self._iter = None
                self._returned = False

                if by:
                    self._iter = experiment.data.groupby(by).__iter__()

            def __iter__(self):
                return self

            def next(self):
                if self._iter:
                    return self._iter.next()[0]
                else:
                    if self._returned:
                        raise StopIteration
                    else:
                        self._returned = True
                        return None

        return plot_enum(experiment, by)

    def plot(self, experiment, plot_name=None, **kwargs):
        """Plot a bar chart"""

        if not experiment:
            raise util.CytoflowViewError("No experiment specified")

        if self.statistic not in experiment.statistics:
            raise util.CytoflowViewError(
                "Can't find the statistic {} in the experiment".format(
                    self.statistic))
        else:
            stat = experiment.statistics[self.statistic]

        if self.error_statistic[0]:
            if self.error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    "Can't find the error statistic in the experiment")
            else:
                error_stat = experiment.statistics[self.error_statistic]
        else:
            error_stat = None

        if error_stat is not None:
            if not stat.index.equals(error_stat.index):
                raise util.CytoflowViewError(
                    "Data statistic and error statistic "
                    " don't have the same index.")

        data = pd.DataFrame(index=stat.index)

        data[stat.name] = stat

        if error_stat is not None:
            error_name = util.random_string(6)
            data[error_name] = error_stat
        else:
            error_name = None

        if self.subset:
            try:
                data = data.query(self.subset)
            except:
                raise util.CytoflowViewError(
                    "Subset string '{0}' isn't valid".format(self.subset))

            if len(data) == 0:
                raise util.CytoflowViewError(
                    "Subset string '{0}' returned no values".format(
                        self.subset))

        names = list(data.index.names)
        for name in names:
            unique_values = data.index.get_level_values(name).unique()
            if len(unique_values) == 1:
                warn("Only one value for level {}; dropping it.".format(name),
                     util.CytoflowViewWarning)
                try:
                    data.index = data.index.droplevel(name)
                except AttributeError:
                    raise util.CytoflowViewError("Must have more than one "
                                                 "value to plot.")

        names = list(data.index.names)

        if not self.variable:
            raise util.CytoflowViewError("variable not specified")

        if not self.variable in names:
            raise util.CytoflowViewError("Variable {} isn't in the statistic; "
                                         "must be one of {}".format(
                                             self.variable, names))

        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "X facet {0} isn't in the experiment".format(self.xfacet))

        if self.xfacet and self.xfacet not in names:
            raise util.CytoflowViewError(
                "X facet {} is not a statistic index; "
                "must be one of {}".format(self.xfacet, names))

        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Y facet {0} isn't in the experiment".format(self.yfacet))

        if self.yfacet and self.yfacet not in names:
            raise util.CytoflowViewError(
                "Y facet {} is not a statistic index; "
                "must be one of {}".format(self.yfacet, names))

        if self.huefacet and self.huefacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Hue facet {0} isn't in the experiment".format(self.huefacet))

        if self.huefacet and self.huefacet not in names:
            raise util.CytoflowViewError(
                "Hue facet {} is not a statistic index; "
                "must be one of {}".format(self.huefacet, names))

        col_wrap = kwargs.pop('col_wrap', None)

        if col_wrap and self.yfacet:
            raise util.CytoflowViewError(
                "Can't set yfacet and col_wrap at the same time.")

        if col_wrap and not self.xfacet:
            raise util.CytoflowViewError("Must set xfacet to use col_wrap.")

        facets = filter(
            lambda x: x,
            [self.variable, self.xfacet, self.yfacet, self.huefacet])
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError("Can't reuse facets")

        unused_names = list(set(names) - set(facets))

        if plot_name is not None and not unused_names:
            raise util.CytoflowViewError("You specified a plot name, but all "
                                         "the facets are already used")

        data.reset_index(inplace=True)
        if unused_names:
            groupby = data.groupby(unused_names)

            if plot_name is None:
                raise util.CytoflowViewError(
                    "You must use facets {} in either the "
                    "plot variables or the plot name. "
                    "Possible plot names: {}".format(unused_names,
                                                     groupby.groups.keys()))

            if plot_name not in set(groupby.groups.keys()):
                raise util.CytoflowViewError(
                    "Plot {} not from plot_enum; must "
                    "be one of {}".format(plot_name, groupby.groups.keys()))

            data = groupby.get_group(plot_name)

        sharex = kwargs.pop('sharex', True)
        sharey = kwargs.pop('sharey', True)

        cols = col_wrap if col_wrap else \
               len(data[self.xfacet].unique()) if self.xfacet else 1

        g = sns.FacetGrid(data,
                          size=(6 / cols),
                          aspect=1.5,
                          col=(self.xfacet if self.xfacet else None),
                          row=(self.yfacet if self.yfacet else None),
                          col_order=(np.sort(data[self.xfacet].unique())
                                     if self.xfacet else None),
                          row_order=(np.sort(data[self.yfacet].unique())
                                     if self.yfacet else None),
                          col_wrap=col_wrap,
                          legend_out=False,
                          sharex=sharex,
                          sharey=sharey)

        scale = util.scale_factory(self.scale,
                                   experiment,
                                   statistic=self.statistic)

        # because the bottom of a bar chart is "0", masking out bad
        # values on a log scale doesn't work.  we must clip instead.
        if self.scale == "log":
            scale.mode = "clip"

        # set the scale for each set of axes; can't just call plt.xscale()
        for ax in g.axes.flatten():
            if self.orientation == 'horizontal':
                ax.set_xscale(self.scale, **scale.mpl_params)
            else:
                ax.set_yscale(self.scale, **scale.mpl_params)

        map_args = [self.variable, stat.name]

        if self.huefacet:
            map_args.append(self.huefacet)

        if error_stat is not None:
            map_args.append(error_name)

        g.map(_barplot,
              *map_args,
              view=self,
              stat_name=stat.name,
              error_name=error_name,
              **kwargs)

        if sharex:
            # if are sharing axes make sure the x scale is the same for each
            fig = plt.gcf()
            fig_x_min = float("inf")
            fig_x_max = float("-inf")

            for ax in fig.get_axes():
                ax_x_min, ax_x_max = ax.get_xlim()
                if ax_x_min < fig_x_min:
                    fig_x_min = ax_x_min
                if ax_x_max > fig_x_max:
                    fig_x_max = ax_x_max

            for ax in fig.get_axes():
                ax.set_xlim(fig_x_min, fig_x_max)

        if sharey:
            # if we are sharing y axes, make sure the y scale is the same for each
            fig = plt.gcf()
            fig_y_min = float("inf")
            fig_y_max = float("-inf")

            for ax in fig.get_axes():
                ax_y_min, ax_y_max = ax.get_ylim()
                if ax_y_min < fig_y_min:
                    fig_y_min = ax_y_min
                if ax_y_max > fig_y_max:
                    fig_y_max = ax_y_max

            for ax in fig.get_axes():
                ax.set_ylim(fig_y_min, fig_y_max)

        if self.huefacet:
            labels = np.sort(data[self.huefacet].unique())
            labels = [str(x) for x in labels]
            g.add_legend(title=self.huefacet, label_order=labels)

        if self.orientation == 'horizontal':
            plt.sca(fig.get_axes()[0])
            plt.xlabel(self.statistic)
        else:
            plt.sca(fig.get_axes()[0])
            plt.ylabel(self.statistic)

        if unused_names and plot_name is not None:
            plt.title("{0} = {1}".format(unused_names, plot_name))
예제 #13
0
class Base1DStatisticsView(BaseStatisticsView):
    """
    The base class for 1-dimensional statistic views -- ie, the :attr:`variable`
    attribute is on the x axis, and the statistic value is on the y axis.
    
    Attributes
    ----------
    statistic : (str, str)
        The name of the statistic to plot.  Must be a key in the  
        :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment`
        being plotted.
        
    error_statistic : (str, str)
        The name of the statistic used to plot error bars.  Must be a key in the
        :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment`
        being plotted.
    """
    
    REMOVED_ERROR = "Statistics changed dramatically in 0.5; please see the documentation"
    by = util.Removed(err_string = REMOVED_ERROR)
    yfunction = util.Removed(err_string = REMOVED_ERROR)
    ychannel = util.Removed(err_string = REMOVED_ERROR)
    channel = util.Removed(err_string = REMOVED_ERROR)
    function = util.Removed(err_string = REMOVED_ERROR)
    error_bars = util.Removed(err_string = REMOVED_ERROR)
    
    xvariable = util.Deprecated(new = "variable")
    
    statistic = Tuple(Str, Str)
    error_statistic = Tuple(Str, Str)
    
    def enum_plots(self, experiment):
        data = self._make_data(experiment)
        return super().enum_plots(experiment, data)
    
    def plot(self, experiment, plot_name = None, **kwargs):       
        data = self._make_data(experiment)
        
        if not self.variable:
            raise util.CytoflowViewError('variable',
                                         "variable not set")
            
        if self.variable not in experiment.conditions:
            raise util.CytoflowViewError('variable',
                                         "variable {0} not in the experiment"
                                    .format(self.variable))
            
        if util.is_numeric(experiment[self.variable]):
            xscale = util.scale_factory(self.xscale, experiment, condition = self.variable)
        else:
            xscale = None 
        
        yscale = util.scale_factory(self.yscale, 
                                    experiment, 
                                    statistic = self.statistic, 
                                    error_statistic = self.error_statistic)
            
        super().plot(experiment, 
                     data, 
                     plot_name, 
                     xscale = xscale, 
                     yscale = yscale, 
                     **kwargs)
        
    def _make_data(self, experiment):
        if experiment is None:
            raise util.CytoflowViewError('experiment', "No experiment specified")
        
        if not self.statistic:
            raise util.CytoflowViewError('statistic', "Statistic not set")
        
        if self.statistic not in experiment.statistics:
            raise util.CytoflowViewError('statistic',
                                         "Can't find the statistic {} in the experiment"
                                         .format(self.statistic))
        else:
            stat = experiment.statistics[self.statistic]
            
        if not util.is_numeric(stat):
            raise util.CytoflowViewError('statistic',
                                         "Statistic must be numeric")
            
        if self.error_statistic[0]:
            if self.error_statistic not in experiment.statistics:
                raise util.CytoflowViewError('error_statistic',
                                             "Can't find the error statistic in the experiment")
            else:
                error_stat = experiment.statistics[self.error_statistic]
        else:
            error_stat = None
         
        if error_stat is not None:
            if not stat.index.equals(error_stat.index):
                raise util.CytoflowViewError('error_statistic',
                                             "Data statistic and error statistic "
                                             " don't have the same index.")
               
            if stat.name == error_stat.name:
                raise util.CytoflowViewError('error_statistic',
                                             "Data statistic and error statistic can "
                                             "not have the same name.")
               
        data = pd.DataFrame(index = stat.index)
        data[stat.name] = stat
        
        if error_stat is not None:
            data[error_stat.name] = error_stat
            
        return data
예제 #14
0
class ColorTranslationOp(HasStrictTraits):
    """
    Translate measurements from one color's scale to another, using a two-color
    or three-color control.
    
    To use, set up the `channels` dict with the desired mapping and the 
    `controls` dict with the multi-color controls.  Call `estimate()` to
    paramterize the module; check that the plots look good with 
    `default_view().plot()`; then `apply()` to an Experiment.
    
    Attributes
    ----------
    name : Str
        The operation name (for UI representation; optional for interactive use)
        
    controls : Dict((Str, Str), File)
        Two-color controls used to determine the mapping.  They keys are 
        tuples of *from-channel* and *to-channel*.  The values are FCS files 
        containing two-color constitutive fluorescent expression data 
        for the mapping.
        
    mixture_model : Bool (default = False)
        If "True", try to model the "from" channel as a mixture of expressing
        cells and non-expressing cells (as you would get with a transient
        transfection.)  Make sure you check the diagnostic plots!
        
    Metadata
    --------
    channel_translation : Str
        Which channel was this one translated to?
        
    channel_translation_fn : Callable (pandas.Series --> pandas.Series)
        The function that translated this channel
        
    Notes
    -----
    In the TASBE workflow, this operation happens *after* the application of
    `AutofluorescenceOp` and `BleedthroughPiecewiseOp`.  Both must be applied
    to the single-color controls before the translation coefficients are
    estimated; the autofluorescence and bleedthrough parameters for each channel 
    are retrieved from the channel metadata and applied in `estimate()`.
    

    Examples
    --------
    >>> ct_op = flow.ColorTranslationOp()
    >>> ct_op.controls = {("Pacific Blue-A", "FITC-A") : "merged/rby.fcs",
    ...                   ("PE-Tx-Red-YG-A", "FITC-A") : "merged/rby.fcs"}
    >>> ct_op.mixture_model = True
    >>>
    >>> ct_op.estimate(ex4)
    >>> ct_op.default_view().plot(ex4)
    >>> ex5 = ct_op.apply(ex4)
    """

    # traits
    id = Constant('edu.mit.synbio.cytoflow.operations.color_translation')
    friendly_id = Constant("Color translation")

    name = Constant("Color Translation")

    translation = util.Removed(
        err_string=
        "'translation' is removed; the same info is found in 'controls'",
        warning=True)
    controls = Dict(Tuple(Str, Str), File)
    mixture_model = Bool(False)
    linear_model = Bool(False)

    # The regression coefficients determined by `estimate()`, used to map
    # colors between channels.  The keys are tuples of (*from-channel*,
    # *to-channel) (corresponding to key-value pairs in `translation`).  The
    # values are lists of Float, the log-log coefficients for the color
    # translation (determined by `estimate()`).
    # TODO - why can't i make the value List(Float)?
    _coefficients = Dict(Tuple(Str, Str), Any, transient=True)
    _trans_fn = Dict(Tuple(Str, Str), Callable, transient=True)

    def estimate(self, experiment, subset=None):
        """
        Estimate the mapping from the two-channel controls
        """

        if experiment is None:
            raise util.CytoflowOpError("No experiment specified")

        if not self.controls:
            raise util.CytoflowOpError("No controls specified")

        tubes = {}

        translation = {x[0]: x[1] for x in list(self.controls.keys())}

        for from_channel, to_channel in translation.items():

            if from_channel not in experiment.channels:
                raise util.CytoflowOpError(
                    "Channel {0} not in the experiment".format(from_channel))

            if to_channel not in experiment.channels:
                raise util.CytoflowOpError(
                    "Channel {0} not in the experiment".format(to_channel))

            if (from_channel, to_channel) not in self.controls:
                raise util.CytoflowOpError("Control file for {0} --> {1} "
                                           "not specified".format(
                                               from_channel, to_channel))

            tube_file = self.controls[(from_channel, to_channel)]

            if tube_file not in tubes:
                # make a little Experiment
                check_tube(tube_file, experiment)
                tube_exp = ImportOp(tubes=[Tube(file=tube_file)],
                                    channels={
                                        experiment.metadata[c]["fcs_name"]: c
                                        for c in experiment.channels
                                    },
                                    name_metadata=experiment.
                                    metadata['name_metadata']).apply()

                # apply previous operations
                for op in experiment.history:
                    tube_exp = op.apply(tube_exp)

                # subset the events
                if subset:
                    try:
                        tube_exp = tube_exp.query(subset)
                    except Exception as e:
                        raise util.CytoflowOpError(
                            "Subset string '{0}' isn't valid".format(
                                subset)) from e

                    if len(tube_exp.data) == 0:
                        raise util.CytoflowOpError(
                            "Subset string '{0}' returned no events".format(
                                subset))

                tube_data = tube_exp.data

                tubes[tube_file] = tube_data

            data = tubes[tube_file][[from_channel, to_channel]].copy()
            data = data[data[from_channel] > 0]
            data = data[data[to_channel] > 0]

            _ = data.reset_index(drop=True, inplace=True)

            data[from_channel] = np.log10(data[from_channel])
            data[to_channel] = np.log10(data[to_channel])

            if self.mixture_model:
                gmm = sklearn.mixture.BayesianGaussianMixture(n_components=2)
                fit = gmm.fit(data)

                # pick the component with the maximum mean
                idx = 0 if fit.means_[0][0] > fit.means_[1][0] else 1
                weights = [x[idx] for x in fit.predict_proba(data)]
            else:
                weights = [1] * len(data.index)

            # this estimation method yields different results than the TASBE
            # method.  TASBE ..... does something with binned means, or
            # something ..... I can't read the MATLAB code too well, and I
            # don't know if the code I have is the same as is running on the
            # TASBE website ...... anyways.  It computes a linear, multiplicative
            # scaling constant.  Ie, OUT = m * IN, where OUT is the color we're
            # translating TO and IN is the color we're translating FROM.

            # this code uses a different approach: it uses a log-linear model,
            # computing the linear Y = a * X + b coefficients on a log-log
            # plot.  this is a more general model of the underlying physical
            # behavior -- but it may not be more "correct."

            # which is better?  idunno.  i'd love to try EQUIP predictions with
            # both.  i'd like to note that i can't reproduce the TASBE method
            # precisely anyways; if i replace this with a linear model, i get
            # coefficients that are close to (but not quite the same as) the
            # TASBE website, and WAY off the color model I have in the same
            # directory as my test data.

            lr = np.polyfit(data[from_channel],
                            data[to_channel],
                            deg=1,
                            w=weights)

            # remember, these (linear) coefficients came from logspace, so
            # if the relationship in log10 space is Y = aX + b, then in
            # linear space the relationship is x = 10**X, y = 10**Y,
            # and y = (10**b) * x ^ a

            # also remember that the result of np.polyfit is a list of
            # coefficients with the highest power first!  so if we
            # solve y=ax + b, coeff #0 is a and coeff #1 is b

            a = lr[0]
            b = 10**lr[1]
            trans_fn = lambda x, a=a, b=b: b * np.power(x, a)

            self._coefficients[(from_channel, to_channel)] = lr
            self._trans_fn[(from_channel, to_channel)] = trans_fn

    def apply(self, experiment):
        """Applies the color translation to an experiment
        
        Parameters
        ----------
        experiment : Experiment
            the old_experiment to which this op is applied
            
        Returns
        -------
            a new experiment with the color translation applied.
        """

        if experiment is None:
            raise util.CytoflowOpError("No experiment specified")

        if not self.controls:
            raise util.CytoflowOpError("No controls specified")

        if not self._trans_fn:
            raise util.CytoflowOpError("Transfer functions aren't set. "
                                       "Did you call estimate()?")

        translation = {x[0]: x[1] for x in list(self.controls.keys())}
        from_channels = [x[0] for x in list(self.controls.keys())]

        for key, val in translation.items():
            if (key, val) not in self._coefficients:
                raise util.CytoflowOpError(
                    "Coefficients aren't set for translation "
                    "{1} --> {2}.  Did you call estimate()?".format(key, val))

        new_experiment = experiment.clone()

        for channel in from_channels:
            new_experiment.data = \
                new_experiment.data[new_experiment.data[channel] > 0]

        for from_channel, to_channel in translation.items():
            trans_fn = self._trans_fn[(from_channel, to_channel)]

            new_experiment[from_channel] = trans_fn(experiment[from_channel])
            new_experiment.metadata[from_channel][
                'channel_translation_fn'] = trans_fn
            new_experiment.metadata[from_channel][
                'channel_translation'] = to_channel

        new_experiment.history.append(
            self.clone_traits(transient=lambda _: True))

        return new_experiment

    def default_view(self, **kwargs):
        """
        Returns a diagnostic plot to see if the bleedthrough spline estimation
        is working.
        
        Returns
        -------
            IView : An IView, call plot() to see the diagnostic plots
        """

        return ColorTranslationDiagnostic(op=self, **kwargs)
예제 #15
0
class Stats1DView(Base1DStatisticsView):
    """
    Plot a statistic.  The value of the statistic will be plotted on the
    Y axis; a numeric conditioning variable must be chosen for the X axis.
    Every variable in the statistic must be specified as either the `variable`
    or one of the plot facets.
    
    Attributes
    ----------
        
    Examples
    --------
    
    .. plot::
        :context: close-figs
        
        Make a little data set.
    
        >>> import cytoflow as flow
        >>> import_op = flow.ImportOp()
        >>> import_op.tubes = [flow.Tube(file = "Plate01/RFP_Well_A3.fcs",
        ...                              conditions = {'Dox' : 10.0}),
        ...                    flow.Tube(file = "Plate01/CFP_Well_A4.fcs",
        ...                              conditions = {'Dox' : 1.0})]
        >>> import_op.conditions = {'Dox' : 'float'}
        >>> ex = import_op.apply()
    
    Create and a new statistic.
    
    .. plot::
        :context: close-figs
        
        >>> ch_op = flow.ChannelStatisticOp(name = 'MeanByDox',
        ...                     channel = 'Y2-A',
        ...                     function = flow.geom_mean,
        ...                     by = ['Dox'])
        >>> ex2 = ch_op.apply(ex)
        
    View the new statistic
    
    .. plot::
        :context: close-figs
        
        >>> flow.Stats1DView(variable = 'Dox',
        ...                  statistic = ('MeanByDox', 'geom_mean'),
        ...                  xscale = 'log',
        ...                  yscale = 'log').plot(ex2)
    """
    
    # traits   
    id = Constant("edu.mit.synbio.cytoflow.view.stats1d")
    friendly_id = Constant("1D Statistics View")
    
    REMOVED_ERROR = Constant("Statistics changed dramatically in 0.5; please see the documentation")
    by = util.Removed(err_string = REMOVED_ERROR)
    yfunction = util.Removed(err_string = REMOVED_ERROR)
    ychannel = util.Removed(err_string = REMOVED_ERROR)
    xvariable = util.Deprecated(new = "variable")
    
    def enum_plots(self, experiment):
        """
        Returns an iterator over the possible plots that this View can
        produce.  The values returned can be passed to :meth:`plot`.
        """
                
        return super().enum_plots(experiment)
        
    
    def plot(self, experiment, plot_name = None, **kwargs):
        """Plot a chart of a variable's values against a statistic.
        
        Parameters
        ----------
        
        color : a matplotlib color
            The color to plot with.  Overridden if `huefacet` is not `None`
            
        linestyle : ['solid' | 'dashed', 'dashdot', 'dotted' | (offset, on-off-dash-seq) | '-' | '--' | '-.' | ':' | 'None' | ' ' | '']
            
        marker : a matplotlib marker style
            See http://matplotlib.org/api/markers_api.html#module-matplotlib.markers
            
        markersize : int
            The marker size in points
            
        markerfacecolor : a matplotlib color
            The color to make the markers.  Overridden (?) if `huefacet` is not `None`
            
        alpha : the alpha blending value, from 0.0 (transparent) to 1.0 (opaque)
        
        Notes
        -----
                
        Other `kwargs` are passed to `matplotlib.pyplot.plot <https://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.plot.html>`_
        
        """
        
        super().plot(experiment, plot_name, **kwargs)

    def _grid_plot(self, experiment, grid, xlim, ylim, xscale, yscale, **kwargs):

        data = grid.data

        stat = experiment.statistics[self.statistic]
        stat_name = stat.name
        if self.error_statistic[0]:
            err_stat = experiment.statistics[self.error_statistic]
            err_stat_name = err_stat.name
                    
        xlim = kwargs.pop("xlim", None)
        if xlim is None:
            xlim = (xscale.clip(data[self.variable].min() * 0.9),
                    xscale.clip(data[self.variable].max() * 1.1))
                      
        ylim = kwargs.pop("ylim", None)
        if ylim is None:
            ylim = (yscale.clip(data[stat_name].min() * 0.9),
                    yscale.clip(data[stat_name].max() * 1.1))
            
            if self.error_statistic[0]:
                try: 
                    ylim = (yscale.clip(min([x[0] for x in data[err_stat_name]]) * 0.9),
                            yscale.clip(max([x[1] for x in data[err_stat_name]]) * 1.1))
                except IndexError:
                    ylim = (yscale.clip((data[stat_name].min() - data[err_stat_name].min()) * 0.9), 
                            yscale.clip((data[stat_name].max() + data[err_stat_name].max()) * 1.1))


        # plot the error bars first so the axis labels don't get overwritten
        if self.error_statistic[0]:
            grid.map(_error_bars, self.variable, stat_name, err_stat_name, **kwargs)
        
        grid.map(plt.plot, self.variable, stat_name, **kwargs)
        
        return {'xlim' : xlim, 'ylim' : ylim}
예제 #16
0
class Base1DStatisticsView(BaseStatisticsView):

    REMOVED_ERROR = "Statistics changed dramatically in 0.5; please see the documentation"
    by = util.Removed(err_string=REMOVED_ERROR)
    yfunction = util.Removed(err_string=REMOVED_ERROR)
    ychannel = util.Removed(err_string=REMOVED_ERROR)
    channel = util.Removed(err_string=REMOVED_ERROR)
    function = util.Removed(err_string=REMOVED_ERROR)
    error_bars = util.Removed(err_string=REMOVED_ERROR)

    xvariable = util.Deprecated(new="variable")

    statistic = Tuple(Str, Str)
    error_statistic = Tuple(Str, Str)

    def enum_plots(self, experiment):
        data = self._make_data(experiment)
        return super().enum_plots(experiment, data)

    def plot(self, experiment, plot_name=None, **kwargs):
        data = self._make_data(experiment)

        if util.is_numeric(experiment[self.variable]):
            xscale = util.scale_factory(self.xscale,
                                        experiment,
                                        condition=self.variable)
        else:
            xscale = None

        yscale = util.scale_factory(self.yscale,
                                    experiment,
                                    statistic=self.statistic,
                                    error_statistic=self.error_statistic)

        super().plot(experiment,
                     data,
                     plot_name,
                     xscale=xscale,
                     yscale=yscale,
                     **kwargs)

    def _make_data(self, experiment):
        if experiment is None:
            raise util.CytoflowViewError("No experiment specified")

        if not self.statistic:
            raise util.CytoflowViewError("Statistic not set")

        if self.statistic not in experiment.statistics:
            raise util.CytoflowViewError(
                "Can't find the statistic {} in the experiment".format(
                    self.statistic))
        else:
            stat = experiment.statistics[self.statistic]

        if not util.is_numeric(stat):
            raise util.CytoflowViewError("Statistic must be numeric")

        if self.error_statistic[0]:
            if self.error_statistic not in experiment.statistics:
                raise util.CytoflowViewError(
                    "Can't find the error statistic in the experiment")
            else:
                error_stat = experiment.statistics[self.error_statistic]
        else:
            error_stat = None

        if error_stat is not None:
            if not stat.index.equals(error_stat.index):
                raise util.CytoflowViewError(
                    "Data statistic and error statistic "
                    " don't have the same index.")

            if stat.name == error_stat.name:
                raise util.CytoflowViewError(
                    "Data statistic and error statistic can "
                    "not have the same name.")

        data = pd.DataFrame(index=stat.index)
        data[stat.name] = stat

        if error_stat is not None:
            data[error_stat.name] = error_stat

        return data
예제 #17
0
class BarChartView(Base1DStatisticsView):
    """
    Plots a bar chart of some summary statistic
    
    Attributes
    ----------
    
    Examples
    --------
    
    Make a little data set.
    
    .. plot::
        :context: close-figs
            
        >>> import cytoflow as flow
        >>> import_op = flow.ImportOp()
        >>> import_op.tubes = [flow.Tube(file = "Plate01/RFP_Well_A3.fcs",
        ...                              conditions = {'Dox' : 10.0}),
        ...                    flow.Tube(file = "Plate01/CFP_Well_A4.fcs",
        ...                              conditions = {'Dox' : 1.0})]
        >>> import_op.conditions = {'Dox' : 'float'}
        >>> ex = import_op.apply()
        
    Add a threshold gate
    
    .. plot::
        :context: close-figs
    
        >>> ex2 = flow.ThresholdOp(name = 'Threshold',
        ...                        channel = 'Y2-A',
        ...                        threshold = 2000).apply(ex)
        
    Add a statistic
    
    .. plot::
        :context: close-figs

        >>> ex3 = flow.ChannelStatisticOp(name = "ByDox",
        ...                               channel = "Y2-A",
        ...                               by = ['Dox', 'Threshold'],
        ...                               function = len).apply(ex2) 
    
    Plot the bar chart
    
    .. plot::
        :context: close-figs
        
        >>> flow.BarChartView(statistic = ("ByDox", "len"),
        ...                   variable = "Dox",
        ...                   huefacet = "Threshold").plot(ex3)
        
    """
    
    # traits   
    id = Constant("edu.mit.synbio.cytoflow.view.barchart")
    friendly_id = Constant("Bar Chart") 
    
    orientation = util.Removed(err_string = "`orientation` is now a parameter to `plot`")
    
    def enum_plots(self, experiment):
        """
        Returns an iterator over the possible plots that this View can
        produce.  The values returned can be passed to "plot".
        """
                
        return super().enum_plots(experiment)
        
        
    def plot(self, experiment, plot_name = None, **kwargs):
        """
        Plot a bar chart
        
        Parameters
        ----------
            
        color : a matplotlib color
            Sets the colors of all the bars, even if there is a hue facet
            
        errwidth : scalar
            The width of the error bars, in points
            
        errcolor : a matplotlib color
            The color of the error bars
            
        capsize : scalar
            The size of the error bar caps, in points
            
        Notes
        -----
        
        Other ``kwargs`` are passed to `matplotlib.axes.Axes.bar <https://matplotlib.org/devdocs/api/_as_gen/matplotlib.axes.Axes.bar.html>`_

        """
        
        super().plot(experiment, plot_name, **kwargs)
        
    def _grid_plot(self, experiment, grid, **kwargs):
                 
        # because the bottom of a bar chart is "0", masking out bad
        # values on a log scale doesn't work.  we must clip instead.
        orientation = kwargs.pop('orientation', 'vertical')
        
        # statistic scale
        scale = kwargs.pop('scale')
        
        if scale.name == "log":
            scale.mode = "clip"
            
        # limits
        lim = kwargs.pop('lim', None)
                
#         # set the scale for each set of axes; can't just call plt.xscale() 
#         for ax in grid.axes.flatten():
#             if orient == 'horizontal':
#                 ax.set_xscale(yscale.name, **yscale.mpl_params)  
#             elif orient == 'vertical':
#                 ax.set_yscale(yscale.name, **yscale.mpl_params)
#             else:
#                 raise util.CytoflowViewError('orient', "'orient' param must be 'horizontal' or 'vertical'")  
#                 
        stat = experiment.statistics[self.statistic]
        map_args = [self.variable, stat.name]
        
        if self.huefacet:
            map_args.append(self.huefacet)  
        
        if self.error_statistic[0]:
            error_stat = experiment.statistics[self.error_statistic]
            map_args.append(error_stat.name)
        else:
            error_stat = None
                        
        grid.map(_barplot, 
                 *map_args,
                 view = self,
                 stat_name = stat.name,
                 error_name = error_stat.name if error_stat is not None else None,
                 orientation = orientation,
                 grid = grid,
                 **kwargs)
        
        if orientation == 'horizontal':
            return dict(xscale = scale,
                        xlim = lim)
        else:
            return dict(yscale = scale,
                        ylim = lim)
예제 #18
0
class Stats1DView(HasStrictTraits):
    """
    Plot a statistic.  The value of the statistic will be plotted on the
    Y axis; a numeric conditioning variable must be chosen for the X axis.
    Every variable in the statistic must be specified as either the `variable`
    or one of the plot facets.
    
    Attributes
    ----------
    name : Str
        The plot's name 
        
    statistic : Tuple(Str, Str)
        The statistic to plot.  The first element is the name of the module that
        added the statistic, and the second element is the name of the statistic.
    
    variable : Str
        the name of the conditioning variable to put on the X axis.  Must be
        numeric (float or int).
        
    xscale : Enum("linear", "log") (default = "linear")
        The scale to use on the X axis
        
    yscale : Enum("linear", "log", "logicle") (default = "linear")
        The scale to use on the Y axis
        
    xfacet : Str
        the conditioning variable for horizontal subplots
        
    yfacet : Str
        the conditioning variable for vertical subplots
        
    huefacet : 
        the conditioning variable for color.
        
    huescale :
        the scale to use on the "hue" axis, if there are many values of
        the hue facet.
        
    error_statistic : Tuple(Str, Str)
        A statistic to use to draw error bars; the bars are +- the value of
        the statistic.
        
    subset : String
        Passed to pandas.DataFrame.query(), to get a subset of the statistic
        before we plot it.

        
    Examples
    --------
    
    Assume we want a Dox induction curve in a transient transfection experiment.  
    We have induced several wells with different amounts of Dox and the output
    of the Dox-inducible channel is "Pacific Blue-A".  We have a constitutive
    expression channel in "PE-Tx-Red-YG-A". We want to bin all the data by
    constitutive expression level, then plot the dose-response (geometric mean)
    curve in each bin. 
    
    >>> ex_bin = flow.BinningOp(name = "CFP_Bin",
    ...                         channel = "PE-Tx-Red-YG-A",
    ...                         scale = "log",
    ...                         bin_width = 0.1).apply(ex)
    >>> ex_stat = flow.ChannelStatisticOp(name = "DoxCFP",
    ...                                   by = ["Dox", "CFP_Bin"],
    ...                                   channel = "Pacific Blue-A",
    ...                                   function = flow.geom_mean).apply(ex_bin)
    >>> view = flow.Stats1DView(name = "Dox vs IFP",
    ...                         statistic = ("DoxCFP", "geom_mean"),
    ...                         variable = "Dox",
    ...                         xscale = "log",
    ...                         huefacet = "CFP_Bin").plot(ex_stat)
    >>> view.plot(ex_stat)
    """
    
    # traits   
    id = "edu.mit.synbio.cytoflow.view.stats1d"
    friendly_id = "1D Statistics View" 
    
    REMOVED_ERROR = "Statistics have changed dramatically in 0.5; please see the documentation"
    by = util.Removed(err_string = REMOVED_ERROR)
    yfunction = util.Removed(err_string = REMOVED_ERROR)
    ychannel = util.Removed(err_string = REMOVED_ERROR)
    xvariable = util.Deprecated(new = "variable")
    
    name = Str
    statistic = Tuple(Str, Str)
    variable = Str
    xscale = util.ScaleEnum
    yscale = util.ScaleEnum
    xfacet = Str
    yfacet = Str
    huefacet = Str
    huescale = util.ScaleEnum # TODO - make this actually work
    
    error_statistic = Tuple(Str, Str)
    subset = Str
    
    def enum_plots(self, experiment):
        """
        Returns an iterator over the possible plots that this View can
        produce.  The values returned can be passed to "plot".
        """
        
        # TODO - all this is copied from below.  can we abstract it out somehow?
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if self.statistic not in experiment.statistics:
            raise util.CytoflowViewError("Can't find the statistic {} in the experiment"
                                         .format(self.statistic))
        else:
            stat = experiment.statistics[self.statistic]
            
        if self.error_statistic[0]:
            if self.error_statistic not in experiment.statistics:
                raise util.CytoflowViewError("Can't find the error statistic in the experiment")
            else:
                error_stat = experiment.statistics[self.error_statistic]
        else:
            error_stat = None
         
        if error_stat is not None:
            if not stat.index.equals(error_stat.index):
                raise util.CytoflowViewError("Data statistic and error statistic "
                                             " don't have the same index.")

        data = pd.DataFrame(index = stat.index)
        
        data[stat.name] = stat
                
        if error_stat is not None:
            error_name = util.random_string(6)
            data[error_name] = error_stat 
        else:
            error_name = None
            
        if self.subset:
            try:
                data = data.query(self.subset)
            except:
                raise util.CytoflowViewError("Subset string '{0}' isn't valid"
                                        .format(self.subset))
                
            if len(data) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no values"
                                        .format(self.subset))
            
        names = list(data.index.names)
        for name in names:
            unique_values = data.index.get_level_values(name).unique()
            if len(unique_values) == 1:
                warn("Only one value for level {}; dropping it.".format(name),
                     util.CytoflowViewWarning)
                try:
                    data.index = data.index.droplevel(name)
                except AttributeError:
                    raise util.CytoflowViewError("Must have more than one "
                                                 "value to plot.")
                
        names = list(data.index.names)
                        
        if not self.variable:
            raise util.CytoflowViewError("variable not specified")
        
        if not self.variable in data.index.names:
            raise util.CytoflowViewError("Variable {} isn't in the statistic; "
                                         "must be one of {}"
                                         .format(self.variable, data.index.names))
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} isn't in the experiment"
                                    .format(self.xfacet))
            
        if self.xfacet and self.xfacet not in data.index.names:
            raise util.CytoflowViewError("X facet {} is not a statistic index; "
                                         "must be one of {}".format(self.xfacet, data.index.names))
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} isn't in the experiment"
                                    .format(self.yfacet))

        if self.yfacet and self.yfacet not in data.index.names:
            raise util.CytoflowViewError("Y facet {} is not a statistic index; "
                                         "must be one of {}".format(self.yfacet, data.index.names))

        if self.huefacet and self.huefacet not in experiment.conditions:
            raise util.CytoflowViewError("Hue facet {0} isn't in the experiment"
                                    .format(self.huefacet))
            
        if self.huefacet and self.huefacet not in data.index.names:
            raise util.CytoflowViewError("Hue facet {} is not a statistic index; "
                                         "must be one of {}".format(self.huefacet, data.index.names)) 
            
        facets = filter(lambda x: x, [self.variable, self.xfacet, self.yfacet, self.huefacet])
        
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError("Can't reuse facets")
        
        by = list(set(names) - set(facets))
        
        class plot_enum(object):
            
            def __init__(self, experiment, by):
                self._iter = None
                self._returned = False
                
                if by:
                    self._iter = experiment.data.groupby(by).__iter__()
                
            def __iter__(self):
                return self
            
            def next(self):
                if self._iter:
                    return self._iter.next()[0]
                else:
                    if self._returned:
                        raise StopIteration
                    else:
                        self._returned = True
                        return None
            
        return plot_enum(experiment, by)
    
    def plot(self, experiment, plot_name = None, **kwargs):
        """Plot a chart"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if not self.statistic:
            raise util.CytoflowViewError("Statistic not set")
        
        if self.statistic not in experiment.statistics:
            raise util.CytoflowViewError("Can't find the statistic {} in the experiment"
                                         .format(self.statistic))
        else:
            stat = experiment.statistics[self.statistic]
            
        if self.error_statistic[0]:
            if self.error_statistic not in experiment.statistics:
                raise util.CytoflowViewError("Can't find the error statistic in the experiment")
            else:
                error_stat = experiment.statistics[self.error_statistic]
        else:
            error_stat = None
         
        if error_stat is not None:
            if not stat.index.equals(error_stat.index):
                raise util.CytoflowViewError("Data statistic and error statistic "
                                             " don't have the same index.")
               
        data = pd.DataFrame(index = stat.index)
        data[stat.name] = stat
        
        if error_stat is not None:
            error_name = util.random_string(6)
            data[error_name] = error_stat
        
        if self.subset:
            try:
                # TODO - either sanitize column names, or check to see that
                # all conditions are valid Python variables
                data = data.query(self.subset)
            except:
                raise util.CytoflowViewError("Subset string '{0}' isn't valid"
                                        .format(self.subset))
                
            if len(data) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no values"
                                        .format(self.subset))
                
        names = list(data.index.names)
        for name in names:
            unique_values = data.index.get_level_values(name).unique()
            if len(unique_values) == 1:
                warn("Only one value for level {}; dropping it.".format(name),
                     util.CytoflowViewWarning)
                try:
                    data.index = data.index.droplevel(name)
                except AttributeError:
                    raise util.CytoflowViewError("Must have more than one "
                                                 "value to plot.")

        names = list(data.index.names)
               
        if not self.variable:
            raise util.CytoflowViewError("X variable not set")
            
        if self.variable not in experiment.conditions:
            raise util.CytoflowViewError("X variable {0} not in the experiment"
                                    .format(self.variable))
                        
        if self.variable not in names:
            raise util.CytoflowViewError("X variable {} is not a statistic index; "
                                         "must be one of {}".format(self.variable, names))
                
        if experiment.conditions[self.variable].dtype.kind not in "biufc": 
            raise util.CytoflowViewError("X variable {0} isn't numeric"
                                    .format(self.variable))
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment")
        
        if self.xfacet and self.xfacet not in names:
            raise util.CytoflowViewError("X facet {} is not a statistic index; "
                                         "must be one of {}".format(self.xfacet, names))
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment")
        
        if self.yfacet and self.yfacet not in names:
            raise util.CytoflowViewError("Y facet {} is not a statistic index; "
                                         "must be one of {}".format(self.yfacet, names))
        
        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment")   
        
        if self.huefacet and self.huefacet not in names:
            raise util.CytoflowViewError("Hue facet {} is not a statistic index; "
                                         "must be one of {}".format(self.huefacet, names))  
            
        col_wrap = kwargs.pop('col_wrap', None)
        
        if col_wrap and self.yfacet:
            raise util.CytoflowViewError("Can't set yfacet and col_wrap at the same time.") 
        
        if col_wrap and not self.xfacet:
            raise util.CytoflowViewError("Must set xfacet to use col_wrap.")
            
        facets = filter(lambda x: x, [self.variable, self.xfacet, self.yfacet, self.huefacet])
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError("Can't reuse facets")
        
        unused_names = list(set(names) - set(facets))

        if unused_names and plot_name is None:
            for plot in self.enum_plots(experiment):
                self.plot(experiment, plot, **kwargs)
            return

        data.reset_index(inplace = True)
        
        if plot_name is not None:
            if plot_name is not None and not unused_names:
                raise util.CytoflowViewError("Plot {} not from plot_enum"
                                             .format(plot_name))
                               
            groupby = data.groupby(unused_names)

            if plot_name not in set(groupby.groups.keys()):
                raise util.CytoflowViewError("Plot {} not from plot_enum"
                                             .format(plot_name))
                
            data = groupby.get_group(plot_name)
            data.reset_index(drop = True, inplace = True)
            
        xscale = util.scale_factory(self.xscale, experiment, condition = self.variable) 
        
        if error_stat is not None:
            yscale = util.scale_factory(self.yscale, experiment, statistic = self.error_statistic)
        else:
            yscale = util.scale_factory(self.yscale, experiment, statistic = self.statistic)
                        
        xlim = kwargs.pop("xlim", None)
        if xlim is None:
            xlim = (xscale.clip(data[self.variable].min() * 0.9),
                    xscale.clip(data[self.variable].max() * 1.1))
                      
        ylim = kwargs.pop("ylim", None)
        if ylim is None:
            ylim = (yscale.clip(data[stat.name].min() * 0.9),
                    yscale.clip(data[stat.name].max() * 1.1))
            
            if error_stat is not None:
                try: 
                    ylim = (yscale.clip(min([x[0] for x in error_stat]) * 0.9),
                            yscale.clip(max([x[1] for x in error_stat]) * 1.1))
                except IndexError:
                    ylim = (yscale.clip(error_stat.min() * 0.9), 
                            yscale.clip(error_stat.max() * 1.1))

        kwargs.setdefault('antialiased', True)  
        
        cols = col_wrap if col_wrap else \
               len(data[self.xfacet].unique()) if self.xfacet else 1
               
        sharex = kwargs.pop('sharex', True)
        sharey = kwargs.pop('sharey', True)
                  
        grid = sns.FacetGrid(data,
                             size = (6 / cols),
                             aspect = 1.5,
                             col = (self.xfacet if self.xfacet else None),
                             row = (self.yfacet if self.yfacet else None),
                             hue = (self.huefacet if self.huefacet else None),
                             col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                             row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                             hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                             col_wrap = col_wrap,
                             legend_out = False,
                             sharex = sharex,
                             sharey = sharey,
                             xlim = xlim,
                             ylim = ylim)

        for ax in grid.axes.flatten():
            ax.set_xscale(self.xscale, **xscale.mpl_params)
            ax.set_yscale(self.yscale, **yscale.mpl_params)

        # plot the error bars first so the axis labels don't get overwritten
        if error_stat is not None:
            grid.map(_error_bars, self.variable, stat.name, error_name, **kwargs)
        
        grid.map(plt.plot, self.variable, stat.name, **kwargs)
        
        # if we are sharing y axes, make sure the y scale is the same for each
        if sharey:
            fig = plt.gcf()
            fig_y_min = float("inf")
            fig_y_max = float("-inf")
            
            for ax in fig.get_axes():
                ax_y_min, ax_y_max = ax.get_ylim()
                if ax_y_min < fig_y_min:
                    fig_y_min = ax_y_min
                if ax_y_max > fig_y_max:
                    fig_y_max = ax_y_max
                    
            for ax in fig.get_axes():
                ax.set_ylim(fig_y_min, fig_y_max)
            
        # if we are sharing x axes, make sure the x scale is the same for each
        if sharex:
            fig = plt.gcf()
            fig_x_min = float("inf")
            fig_x_max = float("-inf")
            
            for ax in fig.get_axes():
                ax_x_min, ax_x_max = ax.get_xlim()
                if ax_x_min < fig_x_min:
                    fig_x_min = ax_x_min
                if ax_x_max > fig_x_max:
                    fig_x_max = ax_x_max
            
            for ax in fig.get_axes():
                ax.set_xlim(fig_x_min, fig_x_max)
        
        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.
        
        if self.huefacet:
            current_palette = mpl.rcParams['axes.color_cycle']
            if util.is_numeric(experiment.data[self.huefacet]) and \
               len(grid.hue_names) > len(current_palette):
                
                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(sns.color_palette("husl", 
                                                                   n_colors = len(grid.hue_names)))
                cax, kw = mpl.colorbar.make_axes(plt.gca())
                norm = mpl.colors.Normalize(vmin = np.min(grid.hue_names), 
                                            vmax = np.max(grid.hue_names), 
                                            clip = False)
                mpl.colorbar.ColorbarBase(cax, 
                                          cmap = cmap, 
                                          norm = norm,
                                          label = self.huefacet, 
                                          **kw)
                plt.sca(plot_ax)
            else:
                grid.add_legend(title = self.huefacet)
                
        if unused_names and plot_name:
            plt.title("{0} = {1}".format(unused_names, plot_name))
                
        plt.ylabel(self.statistic)