Ejemplo n.º 1
0
    def _grid_plot(self, experiment, grid, xlim, ylim, xscale, yscale, **kwargs):

        scaled_xdata = xscale(experiment[self.xchannel])
        scaled_xdata = scaled_xdata[~np.isnan(scaled_xdata)]

        scaled_ydata = yscale(experiment[self.ychannel])
        scaled_ydata = scaled_ydata[~np.isnan(scaled_ydata)]
        
        # find good bin counts
        num_xbins = kwargs.pop('xbins', util.num_hist_bins(scaled_xdata))
        num_ybins = kwargs.pop('ybins', util.num_hist_bins(scaled_ydata))
        
        max_bins = kwargs.pop('max_bins', 100)
        
        # there are situations where this produces an unreasonable estimate.
        if num_xbins > max_bins:
            warnings.warn("Capping X bins to {}! To increase this limit, "
                          "change max_bins"
                          .format(max_bins))
            num_xbins = max_bins
            
        if num_ybins > max_bins:
            warnings.warn("Capping Y bins to {}! To increase this limit, "
                          "change max_bins"
                          .format(max_bins))
            num_ybins = max_bins
      
        kwargs.setdefault('smoothed', False)

        xbins = xscale.inverse(np.linspace(xscale(xlim[0]), xscale(xlim[1]), num_xbins))
        ybins = yscale.inverse(np.linspace(yscale(ylim[0]), yscale(ylim[1]), num_ybins))

        kwargs.setdefault('antialiased', False)
        kwargs.setdefault('linewidth', 0)
        kwargs.setdefault('edgecolors', 'face')
            
        grid.map(_hist2d, self.xchannel, self.ychannel, xbins = xbins, ybins = ybins, **kwargs)
        
        return {}
Ejemplo n.º 2
0
    def estimate(self, experiment, subset=None):
        """
        Estimate the Gaussian mixture model parameters
        """

        if experiment is None:
            raise util.CytoflowOpError("No experiment specified")

        if len(self.channels) == 0:
            raise util.CytoflowOpError("Must set at least one channel")

        for c in self.channels:
            if c not in experiment.data:
                raise util.CytoflowOpError(
                    "Channel {0} not found in the experiment".format(c))

        for c in self.scale:
            if c not in self.channels:
                raise util.CytoflowOpError(
                    "Scale set for channel {0}, but it isn't "
                    "in the experiment".format(c))

        for b in self.by:
            if b not in experiment.data:
                raise util.CytoflowOpError("Aggregation metadata {0} not found"
                                           " in the experiment".format(b))
            if len(experiment.data[b].unique()) > 100:  #WARNING - magic number
                raise util.CytoflowOpError(
                    "More than 100 unique values found for"
                    " aggregation metadata {0}.  Did you"
                    " accidentally specify a data channel?".format(b))

        if subset:
            try:
                experiment = experiment.query(subset)
            except:
                raise util.CytoflowViewError(
                    "Subset string '{0}' isn't valid".format(subset))

            if len(experiment) == 0:
                raise util.CytoflowViewError(
                    "Subset string '{0}' returned no events".format(subset))

        if self.by:
            groupby = experiment.data.groupby(self.by)
        else:
            # use a lambda expression to return a group that contains
            # all the events
            groupby = experiment.data.groupby(lambda _: True)

        # get the scale. estimate the scale params for the ENTIRE data set,
        # not subsets we get from groupby().  And we need to save it so that
        # the data is transformed the same way when we apply()
        for c in self.channels:
            if c in self.scale:
                self._scale[c] = util.scale_factory(self.scale[c],
                                                    experiment,
                                                    channel=c)
#                 if self.scale[c] == 'log':
#                     self._scale[c].mode = 'mask'
            else:
                self._scale[c] = util.scale_factory(util.get_default_scale(),
                                                    experiment,
                                                    channel=c)

        for data_group, data_subset in groupby:
            if len(data_subset) == 0:
                raise util.CytoflowOpError(
                    "Group {} had no data".format(data_group))
            x = data_subset.loc[:, self.channels[:]]
            for c in self.channels:
                x[c] = self._scale[c](x[c])

            # drop data that isn't in the scale range
            for c in self.channels:
                x = x[~(np.isnan(x[c]))]
            x = x.values

            #### choose the number of clusters and fit the kmeans
            num_clusters = [
                util.num_hist_bins(x[:, c]) for c in range(len(self.channels))
            ]
            num_clusters = np.ceil(np.median(num_clusters))
            num_clusters = int(num_clusters)

            self._kmeans[data_group] = kmeans = \
                sklearn.cluster.MiniBatchKMeans(n_clusters = num_clusters)

            kmeans.fit(x)
            x_labels = kmeans.predict(x)
            d = len(self.channels)

            #### use the kmeans centroids to parameterize a finite gaussian
            #### mixture model which estimates the density function

            d = len(self.channels)
            s0 = np.zeros([d, d])
            for j in range(d):
                r = x[d].max() - x[d].min()
                s0[j, j] = (r / (num_clusters**(1. / d)))**0.5

            means = []
            weights = []
            normals = []
            beta_max = []

            for k in range(num_clusters):
                xk = x[x_labels == k]
                num_k = np.sum(x_labels == k)
                weight_k = num_k / len(x_labels)
                mu = xk.mean(axis=0)
                means.append(mu)
                s = np.cov(xk, rowvar=False)

                el = num_k / (num_clusters + num_k)
                s_smooth = el * self.h * s + (1.0 - el) * self.h0 * s0

                n = scipy.stats.multivariate_normal(mean=mu, cov=s_smooth)
                weights.append(weight_k)
                normals.append(lambda x, n=n: n.pdf(x))

                # get appropriate step size for peak finding
                min_b = np.inf
                for b in np.diagonal(s_smooth):
                    if np.sqrt(b) < min_b:
                        min_b = np.sqrt(b)
                beta_max.append(b)

            self._normals[data_group] = normals
            self._density[
                data_group] = density = lambda x, weights=weights, normals=normals: np.sum(
                    [w * n(x) for w, n in zip(weights, normals)], axis=0)

            ### use optimization on the finite gmm to find the local peak for
            ### each kmeans cluster
            peaks = []
            peak_clusters = []  # peak idx --> list of clusters

            min_mu = [np.inf] * len(self.channels)
            max_mu = [-1.0 * np.inf] * len(self.channels)

            for k in range(num_clusters):
                mu = means[k]
                for ci in range(len(self.channels)):
                    if mu[ci] < min_mu[ci]:
                        min_mu[ci] = mu[ci]
                    if mu[ci] > max_mu[ci]:
                        max_mu[ci] = mu[ci]

            constraints = []
            for ci, c in enumerate(self.channels):
                constraints.append({
                    'type':
                    'ineq',
                    'fun':
                    lambda x, min_mu=min_mu[ci]: x - min_mu
                })
                constraints.append({
                    'type':
                    'ineq',
                    'fun':
                    lambda x, max_mu=max_mu[ci]: max_mu - x
                })

            for k in range(num_clusters):
                mu = means[k]
                f = lambda x: -1.0 * density(x)

                res = scipy.optimize.minimize(f,
                                              mu,
                                              method='COBYLA',
                                              constraints=constraints,
                                              options={
                                                  'rhobeg': beta_max[k],
                                                  'maxiter': 5000
                                              })
                if not res.success:
                    raise util.CytoflowOpError(
                        "Peak finding failed for cluster {}: {}".format(
                            k, res.message))


#                 ### The peak-searching algorithm from the paper.  works fine,
#                 ### but slow!  we get similar results with the COBYLA
#                 ### optimization method from scipy, using an appropriate rho
#                 x0 = x = means[k]
#                 k0 = k
#                 b = beta_max[k] / 10.0
#                 Nsuc = 0
#                 n = 0
#
#                 while(n < 1000):
# #                     df = scipy.misc.derivative(density, x, 1e-6)
#                     df = statsmodels.tools.numdiff.approx_fprime(x, density)
#                     if np.linalg.norm(df) < 1e-3:
#                         break
#
#                     y = x + b * df / np.linalg.norm(df)
#                     if density(y) <= density(x):
#                         Nsuc = 0
#                         b = b / 2.0
#                         continue
#
#                     Nsuc += 1
#                     if Nsuc >= 2:
#                         b = min(2*b, beta_max[k])
#
#                     ky = kmeans.predict(y[np.newaxis, :])[0]
#                     if ky == k:
#                         x = y
#                     else:
#                         k = ky
#                         b = beta_max[k] / 10.0
#                         mu = means[k]
#                         if density(mu) > density(y):
#                             x = mu
#                         else:
#                             x = y
#
#                     n += 1
#
#
#
#                 print("{} --> {}, {}".format(x0, x, n))

                merged = False
                for pi, p in enumerate(peaks):
                    if np.linalg.norm(p - res.x) < (1e-2):
                        peak_clusters[pi].append(k)
                        merged = True
                        break

                if not merged:
                    peak_clusters.append([k])
                    peaks.append(res.x)

            self._peaks[data_group] = peaks

            ### merge peaks that are sufficiently close

            groups = [[x] for x in range(len(peaks))]
            peak_groups = [x for x in range(len(peaks))
                           ]  # peak idx --> group idx

            def max_tol(x, y):
                f = lambda a: density(a[np.newaxis, :])
                #                 lx = kmeans.predict(x[np.newaxis, :])[0]
                #                 ly = kmeans.predict(y[np.newaxis, :])[0]
                n = len(x)
                n_scale = 1

                #                 n_scale = np.sqrt(((nx + ny) / 2.0) / (n / num_clusters))

                def tol(t):
                    zt = x + t * (y - x)
                    fhat_zt = f(x) + t * (f(y) - f(x))
                    return -1.0 * abs((f(zt) - fhat_zt) / fhat_zt) * n_scale

                res = scipy.optimize.minimize_scalar(tol,
                                                     bounds=[0, 1],
                                                     method='Bounded')

                if res.status != 0:
                    raise util.CytoflowOpError(
                        "tol optimization failed for {}, {}".format(x, y))
                return -1.0 * res.fun

            def nearest_neighbor_dist(k):
                min_dist = np.inf

                for i in range(num_clusters):
                    if i == k:
                        continue
                    dist = np.linalg.norm(means[k] - means[i])
                    if dist < min_dist:
                        min_dist = dist

                return min_dist

            sk = [nearest_neighbor_dist(x) for x in range(num_clusters)]

            def s(x):
                k = kmeans.predict(x[np.newaxis, :])[0]
                return sk[k]

            def can_merge(g, h):
                for pg in g:
                    for ph in h:
                        vg = peaks[pg]
                        vh = peaks[ph]
                        dist_gh = np.linalg.norm(vg - vh)

                        if max_tol(vg, vh) < self.tol and dist_gh / (
                                s(vg) + s(vh)) <= self.merge_dist:
                            return True

                return False

            while True:
                if len(groups) == 1:
                    break

                # find closest mergable groups
                min_dist = np.inf
                for gi in range(len(groups)):
                    g = groups[gi]

                    for hi in range(gi + 1, len(groups)):
                        h = groups[hi]

                        if can_merge(g, h):
                            dist_gh = np.inf
                            for pg in g:
                                vg = peaks[pg]
                                for ph in h:
                                    vh = peaks[ph]
                                    #                                     print("vg {} vh {}".format(vg, vh))
                                    dist_gh = min(dist_gh,
                                                  np.linalg.norm(vg - vh))

                            if dist_gh < min_dist:
                                min_gi = gi
                                min_hi = hi
                                min_dist = dist_gh

                if min_dist == np.inf:
                    break

                # merge the groups
                groups[min_gi].extend(groups[min_hi])
                for g in groups[min_hi]:
                    peak_groups[g] = min_gi
                del groups[min_hi]

        cluster_group = [0] * num_clusters
        cluster_peaks = [0] * num_clusters

        for gi, g in enumerate(groups):
            for p in g:
                for cluster in peak_clusters[p]:
                    cluster_group[cluster] = gi
                    cluster_peaks[cluster] = p

        self._peaks[data_group] = peaks
        self._cluster_peak[data_group] = cluster_peaks
        self._cluster_group[data_group] = cluster_group
Ejemplo n.º 3
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted histogram view of a channel"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if not self.channel:
            raise util.CytoflowViewError("Must specify a channel")
        
        if self.channel not in experiment.data:
            raise util.CytoflowViewError("Channel {0} not in the experiment"
                                    .format(self.channel))
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment"
                                    .format(self.xfacet))
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment"
                                    .format(self.yfacet))
        
        if self.huefacet and self.huefacet not in experiment.conditions:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment"
                                    .format(self.huefacet))

        if self.subset:
            try:
                data = experiment.query(self.subset).data.reset_index()
            except:
                raise util.CytoflowViewError("Subset string '{0}' isn't valid"
                                        .format(self.subset))
                
            if len(experiment.data) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data
        
        # get the scale
        scale = util.scale_factory(self.scale, experiment, self.channel)
        scaled_data = scale(data[self.channel])
        
        #print scaled_data
        
        kwargs.setdefault('histtype', 'stepfilled')
        kwargs.setdefault('alpha', 0.5)
        kwargs.setdefault('antialiased', True)

        # estimate a "good" number of bins; see cytoflow.utility.num_hist_bins
        # for a reference.
        
        num_bins = util.num_hist_bins(scaled_data)
        
        # clip num_bins to (50, 1000)
        num_bins = max(min(num_bins, 1000), 50)
        
        xmin = bottleneck.nanmin(scaled_data)
        xmax = bottleneck.nanmax(scaled_data)
                    
        if (self.huefacet 
            and "bins" in experiment.metadata[self.huefacet]
            and experiment.metadata[self.huefacet]["bin_scale"] == self.scale):
            # if we color facet by the result of a BinningOp and we don't
            # match the BinningOp bins with the histogram bins, we get
            # gnarly aliasing.
            
            # each color gets at least one bin.  however, if the estimated
            # number of bins for the histogram is much larger than the
            # number of colors, sub-divide each color into multiple bins.
            bins = experiment.metadata[self.huefacet]["bins"]
            bins = np.append(bins, xmax)
            
            num_hues = len(data[self.huefacet].unique())
            bins_per_hue = math.ceil(num_bins / num_hues)
            
            new_bins = [xmin]
            for end in [b for b in bins if (b > xmin and b <= xmax)]:
                new_bins = np.append(new_bins,
                                     np.linspace(new_bins[-1],
                                                 end,
                                                 bins_per_hue + 1,
                                                 endpoint = True)[1:])

            bins = scale.inverse(new_bins)
        else:
            bin_width = (xmax - xmin) / num_bins
            bins = scale.inverse(np.arange(xmin, xmax, bin_width))
            bins = np.append(bins, scale.inverse(xmax))
            
        # take care of a rare rounding error, where the last observation is
        # a liiiitle bit more than the last bin, which makes plt.hist() puke
        bins[-1] += 1
                    
        kwargs.setdefault('bins', bins) 
        
        # mask out the data that's not in the scale domain
        data = data[~np.isnan(scaled_data)]

        g = sns.FacetGrid(data, 
                          size = 6,
                          aspect = 1.5,
                          col = (self.xfacet if self.xfacet else None),
                          row = (self.yfacet if self.yfacet else None),
                          hue = (self.huefacet if self.huefacet else None),
                          col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                          row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                          hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                          legend_out = False,
                          sharex = False,
                          sharey = False)
        
        # set the scale for each set of axes; can't just call plt.xscale() 
        for ax in g.axes.flatten():
            ax.set_xscale(self.scale, **scale.mpl_params)  
                  
        g.map(plt.hist, self.channel, **kwargs)
        
        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.
        
        if self.huefacet:
            current_palette = mpl.rcParams['axes.color_cycle']
            if len(g.hue_names) > len(current_palette):
                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(sns.color_palette("husl", 
                                                                   n_colors = len(g.hue_names)))
                cax, _ = mpl.colorbar.make_axes(plt.gca())
                norm = mpl.colors.Normalize(vmin = np.min(g.hue_names), 
                                            vmax = np.max(g.hue_names), 
                                            clip = False)
                mpl.colorbar.ColorbarBase(cax, 
                                          cmap = cmap, 
                                          norm = norm, 
                                          label = self.huefacet)
                plt.sca(plot_ax)
            else:
                g.add_legend(title = self.huefacet)
Ejemplo n.º 4
0
    def _grid_plot(self, experiment, grid, **kwargs):

        kwargs.setdefault('histtype', 'stepfilled')
        kwargs.setdefault('alpha', 0.5)
        kwargs.setdefault('antialiased', True)

        # estimate a "good" number of bins; see cytoflow.utility.num_hist_bins
        # for a reference.
        scale = kwargs.pop('scale')[self.channel]
        lim = kwargs.pop('lim')[self.channel]

        scaled_data = scale(experiment[self.channel])
        num_bins = kwargs.pop('num_bins', util.num_hist_bins(scaled_data))
        num_bins = util.num_hist_bins(
            scaled_data) if num_bins is None else num_bins

        # clip num_bins to (100, 1000)
        num_bins = max(min(num_bins, 1000), 100)

        if (self.huefacet and "bins" in experiment.metadata[self.huefacet] and
                experiment.metadata[self.huefacet]["bin_scale"] == self.scale):

            # if we color facet by the result of a BinningOp and we don't
            # match the BinningOp bins with the histogram bins, we get
            # gnarly aliasing.

            # each color gets at least one bin.  however, if the estimated
            # number of bins for the histogram is much larger than the
            # number of colors, sub-divide each color into multiple bins.
            bins = experiment.metadata[self.huefacet]["bins"]
            scaled_bins = scale(bins)

            num_hues = len(experiment[self.huefacet].unique())
            bins_per_hue = math.floor(num_bins / num_hues)

            if bins_per_hue == 1:
                new_bins = scaled_bins
            else:
                new_bins = []
                for idx in range(1, len(scaled_bins)):
                    new_bins = np.append(
                        new_bins,
                        np.linspace(scaled_bins[idx - 1],
                                    scaled_bins[idx],
                                    bins_per_hue + 1,
                                    endpoint=False))

            bins = scale.inverse(new_bins)
        else:
            xmin = bottleneck.nanmin(scaled_data)
            xmax = bottleneck.nanmax(scaled_data)
            bins = scale.inverse(
                np.linspace(xmin, xmax, num=int(num_bins), endpoint=True))

        kwargs.setdefault('bins', bins)
        kwargs.setdefault('orientation', 'vertical')

        if ('linewidth' not in kwargs) or ('linewidth' in kwargs
                                           and kwargs['linewidth'] is None):
            kwargs[
                'linewidth'] = 0 if kwargs['histtype'] == "stepfilled" else 2

        # if we have a hue facet, the y scaling is frequently wrong.  this
        # will capture the maximum bin count of each call to plt.hist, so
        # we don't have to compute the histogram multiple times
        count_max = []

        def hist_lims(*args, **kwargs):
            # there's some bug in the above code where we get data that isn't
            # in the range of `bins`, which makes hist() puke.  so get rid
            # of it.

            bins = kwargs.get('bins')
            new_args = []
            for x in args:
                x = x[x > bins[0]]
                x = x[x < bins[-1]]
                new_args.append(x)

            if scale.name != "linear" and kwargs.get("density"):
                kwargs["density"] = False
                counts, _ = np.histogram(new_args, bins=kwargs["bins"])
                kwargs["weights"] = counts / np.sum(counts)
                n, _, _ = plt.hist(kwargs["bins"][:-1], **kwargs)
            else:
                n, _, _ = plt.hist(*new_args, **kwargs)

            count_max.append(max(n))

        grid.map(hist_lims, self.channel, **kwargs)

        ret = {}
        if kwargs['orientation'] == 'vertical':
            ret['xscale'] = scale
            ret['xlim'] = lim
            ret['ylim'] = (0, 1.05 * max(count_max))
        else:
            ret['yscale'] = scale
            ret['ylim'] = lim
            ret['xlim'] = (0, 1.05 * max(count_max))

        return ret
Ejemplo n.º 5
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted histogram view of a channel"""

        if not experiment:
            raise util.CytoflowViewError("No experiment specified")

        if not self.channel:
            raise util.CytoflowViewError("Must specify a channel")

        if self.channel not in experiment.data:
            raise util.CytoflowViewError(
                "Channel {0} not in the experiment".format(self.channel))

        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "X facet {0} not in the experiment".format(self.xfacet))

        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Y facet {0} not in the experiment".format(self.yfacet))

        if self.huefacet and self.huefacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Hue facet {0} not in the experiment".format(self.huefacet))

        facets = filter(lambda x: x, [self.xfacet, self.yfacet, self.huefacet])
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError("Can't reuse facets")

        col_wrap = kwargs.pop('col_wrap', None)

        if col_wrap and self.yfacet:
            raise util.CytoflowViewError(
                "Can't set yfacet and col_wrap at the same time.")

        if col_wrap and not self.xfacet:
            raise util.CytoflowViewError("Must set xfacet to use col_wrap.")

        if self.subset:
            try:
                data = experiment.query(self.subset).data.reset_index()
            except util.CytoflowError as e:
                raise util.CytoflowViewError(str(e))
            except Exception as e:
                raise util.CytoflowViewError(
                    "Subset string '{0}' isn't valid".format(self.subset))

            if len(data) == 0:
                raise util.CytoflowViewError(
                    "Subset string '{0}' returned no events".format(
                        self.subset))
        else:
            data = experiment.data

        # get the scale
        scale = kwargs.pop('scale', None)
        if scale is None:
            scale = util.scale_factory(self.scale,
                                       experiment,
                                       channel=self.channel)

        scaled_data = scale(data[self.channel])

        kwargs.setdefault('histtype', 'stepfilled')
        kwargs.setdefault('alpha', 0.5)
        kwargs.setdefault('antialiased', True)

        # estimate a "good" number of bins; see cytoflow.utility.num_hist_bins
        # for a reference.

        num_bins = util.num_hist_bins(scaled_data)

        # clip num_bins to (50, 1000)
        num_bins = max(min(num_bins, 1000), 50)

        xmin = bottleneck.nanmin(scaled_data)
        xmax = bottleneck.nanmax(scaled_data)

        if (self.huefacet and "bins" in experiment.metadata[self.huefacet] and
                experiment.metadata[self.huefacet]["bin_scale"] == self.scale):
            # if we color facet by the result of a BinningOp and we don't
            # match the BinningOp bins with the histogram bins, we get
            # gnarly aliasing.

            # each color gets at least one bin.  however, if the estimated
            # number of bins for the histogram is much larger than the
            # number of colors, sub-divide each color into multiple bins.
            bins = experiment.metadata[self.huefacet]["bins"]
            bins = np.append(bins, xmax)

            num_hues = len(data[self.huefacet].unique())
            bins_per_hue = math.ceil(num_bins / num_hues)

            new_bins = [xmin]
            for end in [b for b in bins if (b > xmin and b <= xmax)]:
                new_bins = np.append(
                    new_bins,
                    np.linspace(new_bins[-1],
                                end,
                                bins_per_hue + 1,
                                endpoint=True)[1:])

            bins = scale.inverse(new_bins)
        else:
            bin_width = (xmax - xmin) / num_bins
            bins = scale.inverse(np.arange(xmin, xmax, bin_width))
            bins = np.append(bins, scale.inverse(xmax))

        # take care of a rare rounding error, where the first observation is
        # less than the first bin or the last observation is more than the last
        # bin, which makes plt.hist() puke
        bins[-1] += 1
        bins[0] -= 1

        kwargs.setdefault('bins', bins)

        # mask out the data that's not in the scale domain
        data = data[~np.isnan(scaled_data)]

        # adjust the limits to clip extreme values
        min_quantile = kwargs.pop("min_quantile", 0.001)
        max_quantile = kwargs.pop("max_quantile", 0.999)

        xlim = kwargs.pop("xlim", None)
        if xlim is None:
            xlim = (data[self.channel].quantile(min_quantile),
                    data[self.channel].quantile(max_quantile))

        sharex = kwargs.pop("sharex", True)
        sharey = kwargs.pop("sharey", True)

        cols = col_wrap if col_wrap else \
               len(data[self.xfacet].unique()) if self.xfacet else 1

        g = sns.FacetGrid(data,
                          size=6 / cols,
                          aspect=1.5,
                          col=(self.xfacet if self.xfacet else None),
                          row=(self.yfacet if self.yfacet else None),
                          hue=(self.huefacet if self.huefacet else None),
                          col_order=(np.sort(data[self.xfacet].unique())
                                     if self.xfacet else None),
                          row_order=(np.sort(data[self.yfacet].unique())
                                     if self.yfacet else None),
                          hue_order=(np.sort(data[self.huefacet].unique())
                                     if self.huefacet else None),
                          col_wrap=col_wrap,
                          legend_out=False,
                          sharex=sharex,
                          sharey=sharey,
                          xlim=xlim)

        # set the scale for each set of axes; can't just call plt.xscale()
        for ax in g.axes.flatten():
            ax.set_xscale(self.scale, **scale.mpl_params)

        legend = kwargs.pop('legend', True)
        g.map(plt.hist, self.channel, **kwargs)

        # if we are sharing y axes, make sure the y scale is the same for each
        if sharey:
            fig = plt.gcf()
            fig_y_max = float("-inf")

            for ax in fig.get_axes():
                _, ax_y_max = ax.get_ylim()
                if ax_y_max > fig_y_max:
                    fig_y_max = ax_y_max

            for ax in fig.get_axes():
                ax.set_ylim(None, fig_y_max)

        # if we are sharing x axes, make sure the x scale is the same for each
        if sharex:
            fig = plt.gcf()
            fig_x_min = float("inf")
            fig_x_max = float("-inf")

            for ax in fig.get_axes():
                ax_x_min, ax_x_max = ax.get_xlim()
                if ax_x_min < fig_x_min:
                    fig_x_min = ax_x_min
                if ax_x_max > fig_x_max:
                    fig_x_max = ax_x_max

            for ax in fig.get_axes():
                ax.set_xlim(fig_x_min, fig_x_max)

        # if we have a hue facet, the y scaling is frequently wrong.
        if self.huefacet:
            h = np.histogram(data[self.channel], bins=bins)
            ymax = np.max(h[0])
            plt.ylim(0, 1.1 * ymax)

        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.

        if self.huefacet and legend:
            current_palette = mpl.rcParams['axes.color_cycle']

            if util.is_numeric(experiment.data[self.huefacet]) and \
               len(g.hue_names) > len(current_palette):

                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(
                    sns.color_palette("husl", n_colors=len(g.hue_names)))
                cax, _ = mpl.colorbar.make_axes(plt.gca())
                norm = mpl.colors.Normalize(vmin=np.min(g.hue_names),
                                            vmax=np.max(g.hue_names),
                                            clip=False)
                mpl.colorbar.ColorbarBase(cax,
                                          cmap=cmap,
                                          norm=norm,
                                          label=self.huefacet)
                plt.sca(plot_ax)
            else:
                g.add_legend(title=self.huefacet)

        return g
Ejemplo n.º 6
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted histogram view of a channel"""

        if not experiment:
            raise util.CytoflowViewError("No experiment specified")

        if not self.xchannel:
            raise util.CytoflowViewError("X channel not specified")

        if self.xchannel not in experiment.data:
            raise util.CytoflowViewError(
                "X channel {0} not in the experiment".format(self.xchannel))

        if not self.ychannel:
            raise util.CytoflowViewError("Y channel not specified")

        if self.ychannel not in experiment.data:
            raise util.CytoflowViewError("Y channel {0} not in the experiment")

        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment")

        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment")

        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment")

        facets = filter(lambda x: x, [self.xfacet, self.yfacet, self.huefacet])
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError("Can't reuse facets")

        col_wrap = kwargs.pop('col_wrap', None)

        if col_wrap and self.yfacet:
            raise util.CytoflowViewError(
                "Can't set yfacet and col_wrap at the same time.")

        if col_wrap and not self.xfacet:
            raise util.CytoflowViewError("Must set xfacet to use col_wrap.")

        if self.subset:
            try:
                data = experiment.query(self.subset).data.reset_index()
            except:
                raise util.CytoflowViewError("Subset string \'{0}\' not valid")

            if len(data) == 0:
                raise util.CytoflowViewError(
                    "Subset string '{0}' returned no events".format(
                        self.subset))
        else:
            data = experiment.data

        xscale = util.scale_factory(self.xscale,
                                    experiment,
                                    channel=self.xchannel)
        yscale = util.scale_factory(self.yscale,
                                    experiment,
                                    channel=self.ychannel)

        kwargs['xscale'] = xscale
        kwargs['yscale'] = yscale

        scaled_xdata = xscale(data[self.xchannel])
        data = data[~np.isnan(scaled_xdata)]
        scaled_xdata = scaled_xdata[~np.isnan(scaled_xdata)]

        scaled_ydata = yscale(data[self.ychannel])
        data = data[~np.isnan(scaled_ydata)]
        scaled_ydata = scaled_ydata[~np.isnan(scaled_ydata)]

        # find good bin counts
        num_xbins = util.num_hist_bins(scaled_xdata)
        num_ybins = util.num_hist_bins(scaled_ydata)

        # there are situations where this produces an unreasonable estimate.
        if num_xbins > self._max_bins:
            warnings.warn("Capping X bins to {}! To increase this limit, "
                          "change _max_bins".format(self._max_bins))
            num_xbins = self._max_bins

        if num_ybins > self._max_bins:
            warnings.warn("Capping Y bins to {}! To increase this limit, "
                          "change _max_bins".format(self._max_bins))
            num_ybins = self._max_bins

        kwargs.setdefault('smoothed', False)
        if kwargs['smoothed']:
            num_xbins /= 2
            num_ybins /= 2

        _, xedges, yedges = np.histogram2d(scaled_xdata,
                                           scaled_ydata,
                                           bins=(num_xbins, num_ybins))

        kwargs['xedges'] = xscale.inverse(xedges)
        kwargs['yedges'] = yscale.inverse(yedges)

        kwargs.setdefault('antialiased', True)

        # adjust the limits to clip extreme values
        min_quantile = kwargs.pop("min_quantile", 0.001)
        max_quantile = kwargs.pop("max_quantile", 0.999)

        xlim = kwargs.pop("xlim", None)
        if xlim is None:
            xlim = (data[self.xchannel].quantile(min_quantile),
                    data[self.xchannel].quantile(max_quantile))

        ylim = kwargs.pop("ylim", None)
        if ylim is None:
            ylim = (data[self.ychannel].quantile(min_quantile),
                    data[self.ychannel].quantile(max_quantile))

        sharex = kwargs.pop('sharex', True)
        sharey = kwargs.pop('sharey', True)

        cols = col_wrap if col_wrap else \
               len(data[self.xfacet].unique()) if self.xfacet else 1

        g = sns.FacetGrid(data,
                          size=(6 / cols),
                          aspect=1.5,
                          col=(self.xfacet if self.xfacet else None),
                          row=(self.yfacet if self.yfacet else None),
                          hue=(self.huefacet if self.huefacet else None),
                          col_order=(np.sort(data[self.xfacet].unique())
                                     if self.xfacet else None),
                          row_order=(np.sort(data[self.yfacet].unique())
                                     if self.yfacet else None),
                          hue_order=(np.sort(data[self.huefacet].unique())
                                     if self.huefacet else None),
                          col_wrap=col_wrap,
                          sharex=sharex,
                          sharey=sharey,
                          xlim=xlim,
                          ylim=ylim)

        for ax in g.axes.flatten():
            ax.set_xscale(self.xscale, **xscale.mpl_params)
            ax.set_yscale(self.yscale, **yscale.mpl_params)

        g.map(_hist2d, self.xchannel, self.ychannel, **kwargs)

        # if we are sharing x axes, make sure the x scale is the same for each
        if sharex:
            fig = plt.gcf()
            fig_x_min = float("inf")
            fig_x_max = float("-inf")

            for ax in fig.get_axes():
                ax_x_min, ax_x_max = ax.get_xlim()
                if ax_x_min < fig_x_min:
                    fig_x_min = ax_x_min
                if ax_x_max > fig_x_max:
                    fig_x_max = ax_x_max

            for ax in fig.get_axes():
                ax.set_xlim(fig_x_min, fig_x_max)

        # if we are sharing y axes, make sure the y scale is the same for each
        if sharey:
            fig = plt.gcf()
            fig_y_min = float("inf")
            fig_y_max = float("-inf")

            for ax in fig.get_axes():
                ax_y_min, ax_y_max = ax.get_ylim()
                if ax_y_min < fig_y_min:
                    fig_y_min = ax_y_min
                if ax_y_max > fig_y_max:
                    fig_y_max = ax_y_max

            for ax in fig.get_axes():
                ax.set_ylim(fig_y_min, fig_y_max)

        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.

        if self.huefacet:
            current_palette = mpl.rcParams['axes.color_cycle']
            if util.is_numeric(experiment.data[self.huefacet]) and \
               len(g.hue_names) > len(current_palette):

                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(
                    sns.color_palette("husl", n_colors=len(g.hue_names)))
                cax, _ = mpl.colorbar.make_axes(plt.gca())
                hue_scale = util.scale_factory(self.huescale,
                                               experiment,
                                               condition=self.huefacet)
                mpl.colorbar.ColorbarBase(cax,
                                          cmap=cmap,
                                          norm=hue_scale.color_norm(),
                                          label=self.huefacet)
                plt.sca(plot_ax)
            else:
                g.add_legend(title=self.huefacet)
Ejemplo n.º 7
0
    def _grid_plot(self, experiment, grid, xlim, ylim, xscale, yscale,
                   **kwargs):

        kwargs.setdefault('histtype', 'stepfilled')
        kwargs.setdefault('alpha', 0.5)
        kwargs.setdefault('antialiased', True)

        # estimate a "good" number of bins; see cytoflow.utility.num_hist_bins
        # for a reference.

        scaled_data = xscale(experiment[self.channel])
        num_bins = util.num_hist_bins(scaled_data)

        # clip num_bins to (100, 1000)
        num_bins = max(min(num_bins, 1000), 100)

        if (self.huefacet and "bins" in experiment.metadata[self.huefacet] and
                experiment.metadata[self.huefacet]["bin_scale"] == self.scale):

            # if we color facet by the result of a BinningOp and we don't
            # match the BinningOp bins with the histogram bins, we get
            # gnarly aliasing.

            # each color gets at least one bin.  however, if the estimated
            # number of bins for the histogram is much larger than the
            # number of colors, sub-divide each color into multiple bins.
            bins = experiment.metadata[self.huefacet]["bins"]
            scaled_bins = xscale(bins)

            num_hues = len(experiment[self.huefacet].unique())
            bins_per_hue = math.floor(num_bins / num_hues)

            if bins_per_hue == 1:
                new_bins = scaled_bins
            else:
                new_bins = []
                for idx in range(1, len(scaled_bins)):
                    new_bins = np.append(
                        new_bins,
                        np.linspace(scaled_bins[idx - 1],
                                    scaled_bins[idx],
                                    bins_per_hue + 1,
                                    endpoint=False))

            bins = xscale.inverse(new_bins)
        else:
            xmin = bottleneck.nanmin(scaled_data)
            xmax = bottleneck.nanmax(scaled_data)
            bins = xscale.inverse(
                np.linspace(xmin, xmax, num=num_bins, endpoint=True))
            bins = np.append(bins, xscale.inverse(xmax))

        kwargs.setdefault('bins', bins)

        # if we have a hue facet, the y scaling is frequently wrong.  this
        # will capture the maximum bin count of each call to plt.hist, so
        # we don't have to compute the histogram multiple times
        ymax = []

        def hist_lims(*args, **kwargs):
            # there's some bug in the above code where we get data that isn't
            # in the range of `bins`, which makes hist() puke.  so get rid
            # of it.

            bins = kwargs.get('bins')
            new_args = []
            for x in args:
                x = x[x > bins[0]]
                x = x[x < bins[-1]]
                new_args.append(x)

            n, _, _ = plt.hist(*new_args, **kwargs)
            ymax.append(max(n))

        grid.map(hist_lims, self.channel, **kwargs)

        plt.ylim(0, 1.05 * max(ymax))

        return {}
Ejemplo n.º 8
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted histogram view of a channel"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if not self.xchannel:
            raise util.CytoflowViewError("X channel not specified")
        
        if self.xchannel not in experiment.data:
            raise util.CytoflowViewError("X channel {0} not in the experiment"
                                    .format(self.xchannel))
            
        if not self.ychannel:
            raise util.CytoflowViewError("Y channel not specified")
        
        if self.ychannel not in experiment.data:
            raise util.CytoflowViewError("Y channel {0} not in the experiment")
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment")
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment")
        
        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment")

        if self.subset:
            try: 
                data = experiment.query(self.subset).data.reset_index()
            except:
                raise util.CytoflowViewError("Subset string \'{0}\' not valid")
                            
            if len(data.index) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data
            
        xscale = util.scale_factory(self.xscale, experiment, self.xchannel)
        yscale = util.scale_factory(self.yscale, experiment, self.ychannel)

        kwargs['xscale'] = xscale
        kwargs['yscale'] = yscale
        
        scaled_xdata = xscale(data[self.xchannel])
        data = data[~np.isnan(scaled_xdata)]
        scaled_xdata = scaled_xdata[~np.isnan(scaled_xdata)]

        scaled_ydata = yscale(data[self.ychannel])
        data = data[~np.isnan(scaled_ydata)]
        scaled_ydata = scaled_ydata[~np.isnan(scaled_ydata)]
        

        # find good bin counts
        num_xbins = util.num_hist_bins(scaled_xdata)
        num_ybins = util.num_hist_bins(scaled_ydata)
        
        # there are situations where this produces an unreasonable estimate.
        if num_xbins > self._max_bins:
            warnings.warn("Capping X bins to {}! To increase this limit, "
                          "change _max_bins"
                          .format(self._max_bins))
            num_xbins = self._max_bins
            
        if num_ybins > self._max_bins:
            warnings.warn("Capping Y bins to {}! To increase this limit, "
                          "change _max_bins"
                          .format(self._max_bins))
            num_ybins = self._max_bins
      
        kwargs.setdefault('smoothed', False)
        if kwargs['smoothed']:
            num_xbins /= 2
            num_ybins /= 2
                
        _, xedges, yedges = np.histogram2d(scaled_xdata, 
                                           scaled_ydata, 
                                           bins = (num_xbins, num_ybins))

        kwargs['xedges'] = xscale.inverse(xedges)
        kwargs['yedges'] = yscale.inverse(yedges)
        
        kwargs.setdefault('antialiased', True)

        g = sns.FacetGrid(data,
                          size = 6,
                          aspect = 1.5, 
                          col = (self.xfacet if self.xfacet else None),
                          row = (self.yfacet if self.yfacet else None),
                          hue = (self.huefacet if self.huefacet else None),
                          col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                          row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                          hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                          sharex = False,
                          sharey = False)
         
        for ax in g.axes.flatten():
            ax.set_xscale(self.xscale, **xscale.mpl_params)
            ax.set_yscale(self.yscale, **yscale.mpl_params)
        
        g.map(_hist2d, self.xchannel, self.ychannel, **kwargs)
        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.
        
        if self.huefacet:
            current_palette = mpl.rcParams['axes.color_cycle']
            if len(g.hue_names) > len(current_palette):
                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(sns.color_palette("husl", 
                                                                   n_colors = len(g.hue_names)))
                cax, _ = mpl.colorbar.make_axes(plt.gca())
                norm = mpl.colors.Normalize(vmin = np.min(g.hue_names), 
                                            vmax = np.max(g.hue_names), 
                                            clip = False)
                mpl.colorbar.ColorbarBase(cax, 
                                          cmap = cmap, 
                                          norm = norm,
                                          label = self.huefacet)
                plt.sca(plot_ax)
            else:
                g.add_legend(title = self.huefacet)
Ejemplo n.º 9
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted histogram view of a channel"""
        
        if not experiment:
            raise util.CytoflowViewError("No experiment specified")
        
        if not self.xchannel:
            raise util.CytoflowViewError("X channel not specified")
        
        if self.xchannel not in experiment.data:
            raise util.CytoflowViewError("X channel {0} not in the experiment"
                                    .format(self.xchannel))
            
        if not self.ychannel:
            raise util.CytoflowViewError("Y channel not specified")
        
        if self.ychannel not in experiment.data:
            raise util.CytoflowViewError("Y channel {0} not in the experiment")
        
        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError("X facet {0} not in the experiment")
        
        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError("Y facet {0} not in the experiment")
        
        if self.huefacet and self.huefacet not in experiment.metadata:
            raise util.CytoflowViewError("Hue facet {0} not in the experiment")

        if self.subset:
            try: 
                data = experiment.query(self.subset)
            except:
                raise util.CytoflowViewError("Subset string \'{0}\' not valid")
                            
            if len(data.index) == 0:
                raise util.CytoflowViewError("Subset string '{0}' returned no events"
                                        .format(self.subset))
        else:
            data = experiment.data
        
        #kwargs.setdefault('histtype', 'stepfilled')
        #kwargs.setdefault('alpha', 0.5)
        kwargs.setdefault('edgecolor', 'none')
        #kwargs.setdefault('mincnt', 1)
        #kwargs.setdefault('bins', 'log')
        kwargs.setdefault('antialiased', True)
            
        xmin, xmax = (np.amin(data[self.xchannel]), np.amax(data[self.xchannel]))
        ymin, ymax = (np.amin(data[self.ychannel]), np.amax(data[self.ychannel]))
        # to avoid issues with singular data, expand the min/max pairs
        xmin, xmax = mtrans.nonsingular(xmin, xmax, expander=0.1)
        ymin, ymax = mtrans.nonsingular(ymin, ymax, expander=0.1)
        
        extent = (xmin, xmax, ymin, ymax)
        kwargs.setdefault('extent', extent)
        
        xbins = util.num_hist_bins(experiment[self.xchannel])
        ybins = util.num_hist_bins(experiment[self.ychannel])
        bins = np.mean([xbins, ybins])
        
        kwargs.setdefault('bins', bins) # Do not move above.  don't ask.

        g = sns.FacetGrid(data,
                          size = 6,
                          aspect = 1.5, 
                          col = (self.xfacet if self.xfacet else None),
                          row = (self.yfacet if self.yfacet else None),
                          hue = (self.huefacet if self.huefacet else None),
                          col_order = (np.sort(data[self.xfacet].unique()) if self.xfacet else None),
                          row_order = (np.sort(data[self.yfacet].unique()) if self.yfacet else None),
                          hue_order = (np.sort(data[self.huefacet].unique()) if self.huefacet else None),
                          sharex = False,
                          sharey = False)
        
        if(self.xscale != "linear" or self.yscale != "linear"):
            warnings.warn("hexbin is broken with scales other than \"linear\"",
                          util.CytoflowViewWarning)

        xscale = util.scale_factory(self.xscale, experiment, self.xchannel)
        yscale = util.scale_factory(self.yscale, experiment, self.ychannel)
        
        for ax in g.axes.flatten():
            ax.set_xscale(self.xscale, **xscale.mpl_params)
            ax.set_yscale(self.yscale, **yscale.mpl_params)
        
        g.map(plt.hexbin, self.xchannel, self.ychannel, **kwargs)
Ejemplo n.º 10
0
    def plot(self, experiment, **kwargs):
        """Plot a faceted histogram view of a channel"""

        if not experiment:
            raise util.CytoflowViewError("No experiment specified")

        if self.channel not in experiment.data:
            raise util.CytoflowViewError(
                "Channel {0} not in the experiment".format(self.channel))

        if self.xfacet and self.xfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "X facet {0} not in the experiment".format(self.xfacet))

        if self.yfacet and self.yfacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Y facet {0} not in the experiment".format(self.yfacet))

        if self.huefacet and self.huefacet not in experiment.conditions:
            raise util.CytoflowViewError(
                "Hue facet {0} not in the experiment".format(self.huefacet))

        if self.subset:
            try:
                data = experiment.query(self.subset)
            except:
                raise util.CytoflowViewError(
                    "Subset string '{0}' isn't valid".format(self.subset))

            if len(data.index) == 0:
                raise util.CytoflowViewError(
                    "Subset string '{0}' returned no events".format(
                        self.subset))
        else:
            data = experiment.data

        # get the scale
        scale = util.scale_factory(self.scale, experiment, self.channel)
        scaled_data = scale(data[self.channel])

        #print scaled_data

        kwargs.setdefault('histtype', 'stepfilled')
        kwargs.setdefault('alpha', 0.5)
        kwargs.setdefault('antialiased', True)

        # estimate a "good" number of bins; see cytoflow.utility.num_hist_bins
        # for a reference.

        num_bins = util.num_hist_bins(scaled_data)
        num_bins = 50 if num_bins < 50 else num_bins

        xmin = bottleneck.nanmin(scaled_data)
        xmax = bottleneck.nanmax(scaled_data)

        if (self.huefacet and "bins" in experiment.metadata[self.huefacet] and
                experiment.metadata[self.huefacet]["bin_scale"] == self.scale):
            # if we color facet by the result of a BinningOp and we don't
            # match the BinningOp bins with the histogram bins, we get
            # gnarly aliasing.

            # each color gets at least one bin.  however, if the estimated
            # number of bins for the histogram is much larger than the
            # number of colors, sub-divide each color into multiple bins.
            bins = experiment.metadata[self.huefacet]["bins"]
            bins = np.append(bins, xmax)

            num_hues = len(data[self.huefacet].unique())
            bins_per_hue = math.ceil(num_bins / num_hues)

            new_bins = [xmin]
            for end in [b for b in bins if (b > xmin and b <= xmax)]:
                new_bins = np.append(
                    new_bins,
                    np.linspace(new_bins[-1],
                                end,
                                bins_per_hue + 1,
                                endpoint=True)[1:])

            bins = scale.inverse(new_bins)
        else:
            bin_width = (xmax - xmin) / num_bins
            bins = scale.inverse(np.arange(xmin, xmax, bin_width))
            bins = np.append(bins, scale.inverse(xmax))

        kwargs.setdefault('bins', bins)

        # mask out the data that's not in the scale domain
        data = data[~np.isnan(scaled_data)]

        g = sns.FacetGrid(data,
                          size=6,
                          aspect=1.5,
                          col=(self.xfacet if self.xfacet else None),
                          row=(self.yfacet if self.yfacet else None),
                          hue=(self.huefacet if self.huefacet else None),
                          col_order=(np.sort(data[self.xfacet].unique())
                                     if self.xfacet else None),
                          row_order=(np.sort(data[self.yfacet].unique())
                                     if self.yfacet else None),
                          hue_order=(np.sort(data[self.huefacet].unique())
                                     if self.huefacet else None),
                          legend_out=False,
                          sharex=False,
                          sharey=False)

        # set the scale for each set of axes; can't just call plt.xscale()
        for ax in g.axes.flatten():
            ax.set_xscale(self.scale, **scale.mpl_params)

        g.map(plt.hist, self.channel, **kwargs)

        # if we have a hue facet and a lot of hues, make a color bar instead
        # of a super-long legend.

        if self.huefacet:
            current_palette = mpl.rcParams['axes.color_cycle']
            if len(g.hue_names) > len(current_palette):
                plot_ax = plt.gca()
                cmap = mpl.colors.ListedColormap(
                    sns.color_palette("husl", n_colors=len(g.hue_names)))
                cax, _ = mpl.colorbar.make_axes(plt.gca())
                norm = mpl.colors.Normalize(vmin=np.min(g.hue_names),
                                            vmax=np.max(g.hue_names),
                                            clip=False)
                mpl.colorbar.ColorbarBase(cax, cmap=cmap, norm=norm)
                plt.sca(plot_ax)
            else:
                g.add_legend()