Ejemplo n.º 1
0
def _bivariate_kdeplot(x, y, filled, kernel, bw, gridsize, cut, clip, axlabel,
                       ax, **kwargs):
    """Plot a joint KDE estimate as a bivariate contour plot."""

    # Determine the clipping
    if clip is None:
        clip = [(-np.inf, np.inf), (-np.inf, np.inf)]
    elif np.ndim(clip) == 1:
        clip = [clip, clip]

    # Calculate the KDE
    try:
        xx, yy, z = _statsmodels_bivariate_kde(x, y, bw, gridsize, cut, clip)
    except ImportError:
        xx, yy, z = _scipy_bivariate_kde(x, y, bw, gridsize, cut, clip)

    # Plot the contours
    n_levels = kwargs.pop("n_levels", 10)
    cmap = kwargs.pop("cmap", "BuGn" if filled else "BuGn_d")
    if isinstance(cmap, str):
        if cmap.endswith("_d"):
            pal = ["#333333"]
            pal.extend(color_palette(cmap.replace("_d", "_r"), 2))
            cmap = blend_palette(pal, as_cmap=True)
    contour_func = ax.contourf if filled else ax.contour
    contour_func(xx, yy, z, n_levels, cmap=cmap, **kwargs)

    # Label the axes
    if hasattr(x, "name") and axlabel:
        ax.set_xlabel(x.name)
    if hasattr(y, "name") and axlabel:
        ax.set_ylabel(y.name)

    return ax
Ejemplo n.º 2
0
def _bivariate_kdeplot(x, y, filled, kernel, bw, gridsize, cut, clip, axlabel,
                       ax, **kwargs):
    """Plot a joint KDE estimate as a bivariate contour plot."""

    # Determine the clipping
    if clip is None:
        clip = [(-np.inf, np.inf), (-np.inf, np.inf)]
    elif np.ndim(clip) == 1:
        clip = [clip, clip]

    # Calculate the KDE
    try:
        xx, yy, z = _statsmodels_bivariate_kde(x, y, bw, gridsize, cut, clip)
    except ImportError:
        xx, yy, z = _scipy_bivariate_kde(x, y, bw, gridsize, cut, clip)

    # Plot the contours
    n_levels = kwargs.pop("n_levels", 10)
    cmap = kwargs.pop("cmap", "BuGn" if filled else "BuGn_d")
    if isinstance(cmap, str):
        if cmap.endswith("_d"):
            pal = ["#333333"]
            pal.extend(color_palette(cmap.replace("_d", "_r"), 2))
            cmap = blend_palette(pal, as_cmap=True)
    contour_func = ax.contourf if filled else ax.contour
    contour_func(xx, yy, z, n_levels, cmap=cmap, **kwargs)

    # Label the axes
    if hasattr(x, "name") and axlabel:
        ax.set_xlabel(x.name)
    if hasattr(y, "name") and axlabel:
        ax.set_ylabel(y.name)

    return ax
Ejemplo n.º 3
0
def set_color_palette(name, n_colors=6, desat=None):
    """Set the matplotlib color cycle in one of a variety of ways.

    Parameters
    ----------
    name : hls | husl | matplotlib colormap | seaborn color palette
        palette name
    n_colors : int
        only relevant for hls or matplotlib palettes
    desat : float
        desaturation factor for each color

    """
    colors = utils.color_palette(name, n_colors, desat)
    mpl.rcParams["axes.color_cycle"] = colors
    mpl.rcParams["patch.facecolor"] = colors[0]
Ejemplo n.º 4
0
def set_color_palette(name, n_colors=6, desat=None):
    """Set the matplotlib color cycle in one of a variety of ways.

    Parameters
    ----------
    name : hls | husl | matplotlib colormap | seaborn color palette
        palette name
    n_colors : int
        only relevant for hls or matplotlib palettes
    desat : float
        desaturation factor for each color

    """
    colors = utils.color_palette(name, n_colors, desat)
    mpl.rcParams["axes.color_cycle"] = colors
    mpl.rcParams["patch.facecolor"] = colors[0]
Ejemplo n.º 5
0
def _bivariate_kde(x, y, filled, kernel, bw, gridsize, cut, clip, axlabel, ax,
                   **kwargs):
    """Plot a joint KDE estimate as a bivariate contour plot."""

    # Determine the clipping
    if clip is None:
        clip = [(-np.inf, np.inf), (-np.inf, np.inf)]
    elif np.ndim(clip) == 1:
        clip = [clip, clip]

    # Calculate the KDE
    if isinstance(bw, str):
        bw_func = getattr(sm.nonparametric.bandwidths, "bw_" + bw)
        x_bw = bw_func(x)
        y_bw = bw_func(y)
        bw = [x_bw, y_bw]
    elif np.isscalar(bw):
        bw = [bw, bw]
    kde = sm.nonparametric.KDEMultivariate([x, y], "cc", bw)
    x_support = _kde_support(x, kde.bw[0], gridsize, cut, clip[0])
    y_support = _kde_support(y, kde.bw[1], gridsize, cut, clip[1])
    xx, yy = np.meshgrid(x_support, y_support)
    z = kde.pdf([xx.ravel(), yy.ravel()]).reshape(xx.shape)

    # Plot the contours
    n_levels = kwargs.pop("n_levels", 10)
    cmap = kwargs.pop("cmap", "BuGn" if filled else "BuGn_d")
    if isinstance(cmap, str):
        if cmap.endswith("_d"):
            pal = ["#333333"]
            pal.extend(color_palette(cmap.replace("_d", "_r"), 2))
            cmap = blend_palette(pal, as_cmap=True)
    contour_func = ax.contourf if filled else ax.contour
    contour_func(xx, yy, z, n_levels, cmap=cmap, **kwargs)

    # Label the axes
    if hasattr(x, "name") and axlabel:
        ax.set_xlabel(x.name)
    if hasattr(y, "name") and axlabel:
        ax.set_ylabel(y.name)

    return ax
Ejemplo n.º 6
0
def _box_colors(vals, color):
    """Find colors to use for boxplots or violinplots."""
    if color is None:
        colors = husl_palette(len(vals), l=.7)
    else:
        try:
            color = mpl.colors.colorConverter.to_rgb(color)
            colors = [color for _ in vals]
        except ValueError:
                colors = color_palette(color, len(vals))

    # Desaturate a bit because these are patches
    colors = [mpl.colors.colorConverter.to_rgb(c) for c in colors]
    colors = [desaturate(c, .7) for c in colors]

    # Determine the gray color for the lines
    light_vals = [colorsys.rgb_to_hls(*c)[1] for c in colors]
    l = min(light_vals) * .6
    gray = (l, l, l)

    return colors, gray
Ejemplo n.º 7
0
def _box_colors(vals, color):
    """Find colors to use for boxplots or violinplots."""
    if color is None:
        colors = husl_palette(len(vals), l=.7)
    else:
        try:
            color = mpl.colors.colorConverter.to_rgb(color)
            colors = [color for _ in vals]
        except ValueError:
            colors = color_palette(color, len(vals))

    # Desaturate a bit because these are patches
    colors = [mpl.colors.colorConverter.to_rgb(c) for c in colors]
    colors = [desaturate(c, .7) for c in colors]

    # Determine the gray color for the lines
    light_vals = [colorsys.rgb_to_hls(*c)[1] for c in colors]
    l = min(light_vals) * .6
    gray = (l, l, l)

    return colors, gray
Ejemplo n.º 8
0
def set_color_palette(name, n_colors=8, desat=None, h=.01, l=.6, s=.65):
    """Set the matplotlib color cycle in one of a variety of ways.

    Parameters
    ----------
    name : hls | matplotlib colormap | seaborn color palette
        palette name
    n_colors : int
        only relevant for hls or matplotlib palettes
    desat : float
        desaturation factor for each color
    h : float
        first hue for hls spokes
    l : float
        lightness of hls spokes
    s : float
        saturation of hls spokes

    """
    colors = utils.color_palette(name, n_colors, desat, h, l, s)
    mpl.rcParams["axes.color_cycle"] = colors
    mpl.rcParams["patch.facecolor"] = colors[0]
Ejemplo n.º 9
0
def set_color_palette(name, n_colors=8, desat=None, h=.01, l=.6, s=.65):
    """Set the matplotlib color cycle in one of a variety of ways.

    Parameters
    ----------
    name : hls | matplotlib colormap | seaborn color palette
        palette name
    n_colors : int
        only relevant for hls or matplotlib palettes
    desat : float
        desaturation factor for each color
    h : float
        first hue for hls spokes
    l : float
        lightness of hls spokes
    s : float
        saturation of hls spokes

    """
    colors = utils.color_palette(name, n_colors, desat, h, l, s)
    mpl.rcParams["axes.color_cycle"] = colors
    mpl.rcParams["patch.facecolor"] = colors[0]
Ejemplo n.º 10
0
def tsplot(x,
           data,
           err_style=["ci_band"],
           ci=68,
           interpolate=True,
           estimator=np.mean,
           n_boot=10000,
           smooth=False,
           err_palette=None,
           ax=None,
           **kwargs):
    """Plot timeseries from a set of observations.

    Parameters
    ----------
    x : n_tp array
        x values
    data : n_obs x n_tp array
        array of timeseries data where first axis is e.g. subjects
    err_style : list of strings
        names of ways to plot uncertainty across observations from set of
       {ci_band, ci_bars, boot_traces, book_kde, obs_traces, obs_points}
    ci : int or list of ints
        confidence interaval size(s). if a list, it will stack the error
        plots for each confidence interval
    estimator : callable
        function to determine centralt tendency and to pass to bootstrap
        must take an ``axis`` argument
    n_boot : int
        number of bootstrap iterations
    smooth : boolean
        whether to perform a smooth bootstrap (resample from KDE)
    ax : axis object, optional
        plot in given axis; if None creates a new figure
    kwargs : further keyword arguments for main call to plot()

    Returns
    -------
    ax : matplotlib axis
        axis with plot data

    """
    if ax is None:
        ax = plt.subplot(111)

    # Bootstrap the data for confidence intervals
    boot_data = moss.bootstrap(data,
                               n_boot=n_boot,
                               smooth=smooth,
                               axis=0,
                               func=estimator)
    ci_list = hasattr(ci, "__iter__")
    if not ci_list:
        ci = [ci]
    ci_vals = [(50 - w / 2, 50 + w / 2) for w in ci]
    cis = [moss.percentiles(boot_data, ci, axis=0) for ci in ci_vals]
    central_data = estimator(data, axis=0)

    # Plot the timeseries line to get its color
    line, = ax.plot(x, central_data, **kwargs)
    color = line.get_color()
    line.remove()
    kwargs.pop("color", None)

    # Use subroutines to plot the uncertainty
    for style in err_style:

        # Grab the function from the global environment
        try:
            plot_func = globals()["_plot_%s" % style]
        except KeyError:
            raise ValueError("%s is not a valid err_style" % style)

        # Possibly set up to plot each observation in a different color
        if err_palette is not None and "obs" in style:
            orig_color = color
            color = color_palette(err_palette, len(data), desat=.99)

        plot_kwargs = dict(ax=ax,
                           x=x,
                           data=data,
                           boot_data=boot_data,
                           central_data=central_data,
                           color=color)

        for ci_i in cis:
            plot_kwargs["ci"] = ci_i
            plot_func(**plot_kwargs)

        if err_palette is not None and "obs" in style:
            color = orig_color
    # Replot the central trace so it is prominent
    marker = kwargs.pop("marker", "" if interpolate else "o")
    linestyle = kwargs.pop("linestyle", "-" if interpolate else "")
    ax.plot(x,
            central_data,
            color=color,
            marker=marker,
            linestyle=linestyle,
            **kwargs)

    return ax
Ejemplo n.º 11
0
def coefplot(formula, data, groupby=None, intercept=False, ci=95,
             palette="husl"):
    """Plot the coefficients from a linear model.

    Parameters
    ----------
    formula : string
        patsy formula for ols model
    data : dataframe
        data for the plot; formula terms must appear in columns
    groupby : grouping object, optional
        object to group data with to fit conditional models
    intercept : bool, optional
        if False, strips the intercept term before plotting
    ci : float, optional
        size of confidence intervals
    palette : seaborn color palette, optional
        palette for the horizonal plots

    """
    alpha = 1 - ci / 100
    if groupby is None:
        coefs = sf.ols(formula, data).fit().params
        cis = sf.ols(formula, data).fit().conf_int(alpha)
    else:
        grouped = data.groupby(groupby)
        coefs = grouped.apply(lambda d: sf.ols(formula, d).fit().params).T
        cis = grouped.apply(lambda d: sf.ols(formula, d).fit().conf_int(alpha))

    # Possibly ignore the intercept
    if not intercept:
        coefs = coefs.ix[1:]

    n_terms = len(coefs)

    # Plot seperately depending on groupby
    w, h = mpl.rcParams["figure.figsize"]
    hsize = lambda n: n * (h / 2)
    wsize = lambda n: n * (w / (4 * (n / 5)))
    if groupby is None:
        colors = itertools.cycle(color_palette(palette, n_terms))
        f, ax = plt.subplots(1, 1, figsize=(wsize(n_terms), hsize(1)))
        for i, term in enumerate(coefs.index):
            color = colors.next()
            low, high = cis.ix[term]
            ax.plot([i, i], [low, high], c=color,
                    solid_capstyle="round", lw=2.5)
            ax.plot(i, coefs.ix[term], "o", c=color, ms=8)
        ax.set_xlim(-.5, n_terms - .5)
        ax.axhline(0, ls="--", c="dimgray")
        ax.set_xticks(range(n_terms))
        ax.set_xticklabels(coefs.index)

    else:
        n_groups = len(coefs.columns)
        f, axes = plt.subplots(n_terms, 1, sharex=True,
                               figsize=(wsize(n_groups), hsize(n_terms)))
        if n_terms == 1:
            axes = [axes]
        colors = itertools.cycle(color_palette(palette, n_groups))
        for ax, term in zip(axes, coefs.index):
            for i, group in enumerate(coefs.columns):
                color = colors.next()
                low, high = cis.ix[(group, term)]
                ax.plot([i, i], [low, high], c=color,
                        solid_capstyle="round", lw=2.5)
                ax.plot(i, coefs.loc[term, group], "o", c=color, ms=8)
            ax.set_xlim(-.5, n_groups - .5)
            ax.axhline(0, ls="--", c="dimgray")
            ax.set_title(term)
        ax.set_xlabel(groupby)
        ax.set_xticks(range(n_groups))
        ax.set_xticklabels(coefs.columns)
Ejemplo n.º 12
0
def lmplot(x, y, data, color=None, row=None, col=None, col_wrap=None,
           x_estimator=None, x_ci=95, n_boot=5000, fit_reg=True,
           order=1, ci=95, logistic=False, truncate=False,
           x_partial=None, y_partial=None, x_jitter=None, y_jitter=None,
           sharex=True, sharey=True, palette="husl", size=None,
           scatter_kws=None, line_kws=None, palette_kws=None):
    """Plot a linear model from a DataFrame.

    Parameters
    ----------
    x, y : strings
        column names in `data` DataFrame for x and y variables
    data : DataFrame
        source of data for the model
    color : string, optional
        DataFrame column name to group the model by color
    row, col : strings, optional
        DataFrame column names to make separate plot facets
    col_wrap : int, optional
        wrap col variable at this width - cannot be used with row facet
    x_estimator : callable, optional
        Interpret X values as factor labels and use this function
        to plot the point estimate and bootstrapped CI
    x_ci : int optional
        size of confidence interval for x_estimator error bars
    n_boot : int, optional
        number of bootstrap iterations to perform
    fit_reg : bool, optional
        if True fit a regression model by color/row/col and plot
    order : int, optional
        order of the regression polynomial to fit (default = 1)
    ci : int, optional
        confidence interval for the regression line
    logistic : bool, optional
        fit the regression line with logistic regression
    truncate : bool, optional
        if True, only fit line from data min to data max
    {x, y}_partial : string or list of strings, optional
        regress these variables out of the factors before plotting
    {x, y}_jitter : float, optional
        parameters for uniformly distributed random noise added to positions
    sharex, sharey : bools, optional
        only relevant if faceting; passed to plt.subplots
    palette : seaborn color palette argument
        if using separate plots by color, draw with this color palette
    size : float, optional
        size (plots are square) for each plot facet
    {scatter, line}_kws : dictionary
        keyword arguments to pass to the underlying plot functions
    palette_kws : dictionary
        keyword arguments for seaborn.color_palette

    """
    # TODO
    # - legend when fit_line is False
    # - wrap title when wide

    # First sort out the general figure layout
    if size is None:
        size = mpl.rcParams["figure.figsize"][1]

    if col is None and col_wrap is not None:
        raise ValueError("Need column facet variable for `col_wrap`")
    if row is not None and col_wrap is not None:
        raise ValueError("Cannot facet rows when using `col_wrap`")

    nrow = 1 if row is None else len(data[row].unique())
    ncol = 1 if col is None else len(data[col].unique())

    if col_wrap is not None:
        ncol = col_wrap
        nrow = int(np.ceil(len(data[col].unique()) / col_wrap))

    f, axes = plt.subplots(nrow, ncol, sharex=sharex, sharey=sharey,
                           figsize=(size * ncol, size * nrow))
    axes = np.atleast_2d(axes).reshape(nrow, ncol)

    if nrow == 1 or col_wrap is not None:
        row_masks = [np.repeat(True, len(data))]
    else:
        row_vals = np.sort(data[row].unique())
        row_masks = [data[row] == val for val in row_vals]

    if ncol == 1:
        col_masks = [np.repeat(True, len(data))]
    else:
        col_vals = np.sort(data[col].unique())
        col_masks = [data[col] == val for val in col_vals]

    if x_partial is not None:
        if not isinstance(x_partial, list):
            x_partial = [x_partial]
    if y_partial is not None:
        if not isinstance(y_partial, list):
            y_partial = [y_partial]

    if palette_kws is None:
        palette_kws = {}

    # Sort out the plot colors
    color_factor = color
    if color is None:
        hue_masks = [np.repeat(True, len(data))]
        colors = ["#222222"]
    else:
        hue_vals = np.sort(data[color].unique())
        hue_masks = [data[color] == val for val in hue_vals]
        colors = color_palette(palette, len(hue_masks), **palette_kws)

    # Default keyword arguments for plot components
    if scatter_kws is None:
        scatter_kws = {}
    if line_kws is None:
        line_kws = {}

    # First walk through the facets and plot the scatters
    scatter_ms = scatter_kws.pop("ms", 4)
    scatter_mew = mew = scatter_kws.pop("mew", 0)
    scatter_alpha = mew = scatter_kws.pop("alpha", .77)
    for row_i, row_mask in enumerate(row_masks):
        for col_j, col_mask in enumerate(col_masks):
            if col_wrap is not None:
                f_row = col_j // ncol
                f_col = col_j % ncol
            else:
                f_row, f_col = row_i, col_j
            ax = axes[f_row, f_col]
            if f_row + 1 == nrow:
                ax.set_xlabel(x)
            if f_col == 0:
                ax.set_ylabel(y)

            # Title the plot if we are faceting
            title = ""
            if row is not None:
                title += "%s = %s" % (row, row_vals[row_i])
            if row is not None and col is not None:
                title += " | "
            if col is not None:
                title += "%s = %s" % (col, col_vals[col_j])
            ax.set_title(title)

            for hue_k, hue_mask in enumerate(hue_masks):
                color = colors[hue_k]
                data_ijk = data[row_mask & col_mask & hue_mask]

                if x_estimator is not None:
                    ms = scatter_kws.pop("ms", 7)
                    mew = scatter_kws.pop("mew", 0)
                    x_vals = data_ijk[x].unique()
                    y_vals = data_ijk[y]

                    if y_partial is not None:
                        for var in y_partial:
                            conf = data_ijk[var]
                            conf -= conf.mean()
                            y_mean = y_vals.mean()
                            y_vals = moss.vector_reject(y_vals - y_mean, conf)
                            y_vals += y_mean

                    y_grouped = [np.array(y_vals[data_ijk[x] == v])
                                 for v in x_vals]

                    y_est = [x_estimator(y_i) for y_i in y_grouped]
                    y_boots = [moss.bootstrap(np.array(y_i),
                                              func=x_estimator,
                                              n_boot=n_boot)
                               for y_i in y_grouped]
                    ci_lims = [50 - x_ci / 2., 50 + x_ci / 2.]
                    y_ci = [moss.percentiles(y_i, ci_lims) for y_i in y_boots]
                    y_error = ci_to_errsize(np.transpose(y_ci), y_est)

                    ax.plot(x_vals, y_est, "o", mew=mew, ms=ms,
                            color=color, **scatter_kws)
                    ax.errorbar(x_vals, y_est, y_error,
                                fmt=None, ecolor=color)
                else:
                    x_ = data_ijk[x]
                    y_ = data_ijk[y]

                    if x_partial is not None:
                        for var in x_partial:
                            conf = data_ijk[var]
                            conf -= conf.mean()
                            x_mean = x_.mean()
                            x_ = moss.vector_reject(x_ - x_mean, conf)
                            x_ += x_mean
                    if y_partial is not None:
                        for var in y_partial:
                            conf = data_ijk[var]
                            conf -= conf.mean()
                            y_mean = y_.mean()
                            y_ = moss.vector_reject(y_ - y_mean, conf)
                            y_ += y_mean

                    if x_jitter is not None:
                        x_ += np.random.uniform(-x_jitter, x_jitter, x_.shape)
                    if y_jitter is not None:
                        y_ += np.random.uniform(-y_jitter, y_jitter, y_.shape)
                    ax.plot(x_, y_, "o", color=color, alpha=scatter_alpha,
                            mew=scatter_mew, ms=scatter_ms, **scatter_kws)

    for ax_i in np.ravel(axes):
        ax_i.set_xmargin(.05)
        ax_i.autoscale_view()

    # Now walk through again and plot the regression estimate
    # and a confidence interval for the regression line
    if fit_reg:
        for row_i, row_mask in enumerate(row_masks):
            for col_j, col_mask in enumerate(col_masks):
                if col_wrap is not None:
                    f_row = col_j // ncol
                    f_col = col_j % ncol
                else:
                    f_row, f_col = row_i, col_j
                ax = axes[f_row, f_col]
                xlim = ax.get_xlim()

                for hue_k, hue_mask in enumerate(hue_masks):
                    color = colors[hue_k]
                    data_ijk = data[row_mask & col_mask & hue_mask]
                    x_vals = np.array(data_ijk[x])
                    y_vals = np.array(data_ijk[y])
                    if not len(x_vals):
                        continue

                    # Sort out the limit of the fit
                    if truncate:
                        xx = np.linspace(x_vals.min(),
                                         x_vals.max(), 100)
                    else:
                        xx = np.linspace(xlim[0], xlim[1], 100)
                    xx_ = sm.add_constant(xx, prepend=True)

                    # Inner function to bootstrap the regression
                    def _regress(x, y):
                        if logistic:
                            x_ = sm.add_constant(x, prepend=True)
                            fit = sm.GLM(y, x_,
                                         family=sm.families.Binomial()).fit()
                            reg = fit.predict(xx_)
                        else:
                            fit = np.polyfit(x, y, order)
                            reg = np.polyval(fit, xx)
                        return reg

                    # Remove nuisance variables with vector rejection
                    if x_partial is not None:
                        for var in x_partial:
                            conf = data_ijk[var]
                            conf -= conf.mean()
                            x_mean = x_vals.mean()
                            x_vals = moss.vector_reject(x_vals - x_mean, conf)
                            x_vals += x_mean
                    if y_partial is not None:
                        for var in y_partial:
                            conf = data_ijk[var]
                            conf -= conf.mean()
                            y_mean = y_vals.mean()
                            y_vals = moss.vector_reject(y_vals - y_mean, conf)
                            y_vals += y_mean

                    # Regression line confidence interval
                    if ci is not None:
                        ci_lims = [50 - ci / 2., 50 + ci / 2.]
                        boots = moss.bootstrap(x_vals, y_vals,
                                               func=_regress,
                                               n_boot=n_boot)
                        ci_band = moss.percentiles(boots, ci_lims, axis=0)
                        ax.fill_between(xx, *ci_band, color=color, alpha=.15)

                    # Regression line
                    reg = _regress(x_vals, y_vals)
                    if color_factor is None:
                        label = ""
                    else:
                        label = hue_vals[hue_k]
                    ax.plot(xx, reg, color=color,
                            label=str(label), **line_kws)
                    ax.set_xlim(xlim)

    # Plot the legend on the upper left facet and adjust the layout
    if color_factor is not None and color_factor not in [row, col]:
        axes[0, 0].legend(loc="best", title=color_factor)
    plt.tight_layout()
Ejemplo n.º 13
0
def tsplot(data, time=None, unit=None, condition=None, value=None,
           err_style="ci_band", ci=68, interpolate=True, color=None,
           estimator=np.mean, n_boot=5000, err_palette=None, err_kws=None,
           legend=True, ax=None, **kwargs):
    """Plot one or more timeseries with flexible representation of uncertainty.

    This function can take data specified either as a long-form (tidy)
    DataFrame or as an ndarray with dimensions for sampling unit, time, and
    (optionally) condition. The interpretation of some of the other parameters
    changes depending on the type of object passed as data.

    Parameters
    ----------
    data : DataFrame or ndarray
        Data for the plot. Should either be a "long form" dataframe or an
        array with dimensions (unit, time, condition). In both cases, the
        condition field/dimension is optional. The type of this argument
        determines the interpretation of the next few parameters.
    time : string or series-like
        Either the name of the field corresponding to time in the data
        DataFrame or x values for a plot when data is an array. If a Series,
        the name will be used to label the x axis.
    value : string
        Either the name of the field corresponding to the data values in
        the data DataFrame (i.e. the y coordinate) or a string that forms
        the y axis label when data is an array.
    unit : string
        Field in the data DataFrame identifying the sampling unit (e.g.
        subject, neuron, etc.). The error representation will collapse over
        units at each time/condition observation. This has no role when data
        is an array.
    condition : string or Series-like
        Either the name of the field identifying the condition an observation
        falls under in the data DataFrame, or a sequence of names with a length
        equal to the size of the third dimension of data. There will be a
        separate trace plotted for each condition. If condition is a Series
        with a name attribute, the name will form the title for the plot
        legend (unless legend is set to False).
    err_style : string or list of strings or None
        Names of ways to plot uncertainty across units from set of
        {ci_band, ci_bars, boot_traces, book_kde, unit_traces, unit_points}.
        Can use one or more than one method.
    ci : float or list of floats in [0, 100]
        Confidence interaval size(s). If a list, it will stack the error
        plots for each confidence interval. Only relevant for error styles
        with "ci" in the name.
    interpolate : boolean
        Whether to do a linear interpolation between each timepoint when
        plotting. The value of this parameter also determines the marker
        used for the main plot traces, unless marker is specified as a keyword
        argument.
    color : seaborn palette or matplotlib color name or dictionary
        Palette or color for the main plots and error representation (unless
        plotting by unit, which can be separately controlled with err_palette).
        If a dictionary, should map condition name to color spec.
    estimator : callable
        Function to determine central tendency and to pass to bootstrap
        must take an ``axis`` argument.
    n_boot : int
        Number of bootstrap iterations.
    err_palette: seaborn palette
        Palette name or list of colors used when plotting data for each unit.
    err_kws : dict, optional
        Keyword argument dictionary passed through to matplotlib function
        generating the error plot,
    ax : axis object, optional
        Plot in given axis; if None creates a new figure
    kwargs :
        Other keyword arguments are passed to main plot() call

    Returns
    -------
    ax : matplotlib axis
        axis with plot data

    """
    # Sort out default values for the parameters
    if ax is None:
        ax = plt.gca()

    if err_kws is None:
        err_kws = {}

    # Handle different types of input data
    if isinstance(data, pd.DataFrame):

        xlabel = time
        ylabel = value

        # Condition is optional
        if condition is None:
            condition = pd.Series(np.ones(len(data)))
            legend = False
            legend_name = None
            n_cond = 1
        else:
            legend = True and legend
            legend_name = condition
            n_cond = len(data[condition].unique())

    else:
        data = np.asarray(data)

        # Data can be a timecourse from a single unit or
        # several observations in one condition
        if data.ndim == 1:
            data = data[np.newaxis, :, np.newaxis]
        elif data.ndim == 2:
            data = data[:, :, np.newaxis]
        n_unit, n_time, n_cond = data.shape

        # Units are experimental observations. Maybe subjects, or neurons
        if unit is None:
            units = np.arange(n_unit)
        unit = "unit"
        units = np.repeat(units, n_time * n_cond)
        ylabel = None

        # Time forms the xaxis of the plot
        if time is None:
            times = np.arange(n_time)
        else:
            times = np.asarray(time)
        xlabel = None
        if hasattr(time, "name"):
            xlabel = time.name
        time = "time"
        times = np.tile(np.repeat(times, n_cond), n_unit)

        # Conditions split the timeseries plots
        if condition is None:
            conds = range(n_cond)
            legend = False
            if isinstance(color, dict):
                err = "Must have condition names if using color dict."
                raise ValueError(err)
        else:
            conds = np.asarray(condition)
            legend = True and legend
            if hasattr(condition, "name"):
                legend_name = condition.name
            else:
                legend_name = None
        condition = "cond"
        conds = np.tile(conds, n_unit * n_time)

        # Value forms the y value in the plot
        if value is None:
            ylabel = None
        else:
            ylabel = value
        value = "value"

        # Convert to long-form DataFrame
        data = pd.DataFrame(dict(value=data.ravel(),
                                 time=times,
                                 unit=units,
                                 cond=conds))

    # Set up the err_style and ci arguments for teh loop below
    if isinstance(err_style, string_types):
        err_style = [err_style]
    elif err_style is None:
        err_style = []
    if not hasattr(ci, "__iter__"):
        ci = [ci]

    # Set up the color palette
    if color is None:
        colors = color_palette()
    elif isinstance(color, dict):
        colors = [color[c] for c in data[condition].unique()]
    else:
        try:
            colors = color_palette(color, n_cond)
        except ValueError:
            color = mpl.colors.colorConverter.to_rgb(color)
            colors = [color] * n_cond

    # Do a groupby with condition and plot each trace
    for c, (cond, df_c) in enumerate(data.groupby(condition, sort=False)):

        df_c = df_c.pivot(unit, time, value)
        x = df_c.columns.values.astype(np.float)

        # Bootstrap the data for confidence intervals
        boot_data = moss.bootstrap(df_c.values, n_boot=n_boot,
                                   axis=0, func=estimator)
        cis = [moss.ci(boot_data, v, axis=0) for v in ci]
        central_data = estimator(df_c.values, axis=0)

        # Get the color for this condition
        color = colors[c]

        # Use subroutines to plot the uncertainty
        for style in err_style:

            # Allow for null style (only plot central tendency)
            if style is None:
                continue

            # Grab the function from the global environment
            try:
                plot_func = globals()["_plot_%s" % style]
            except KeyError:
                raise ValueError("%s is not a valid err_style" % style)

            # Possibly set up to plot each observation in a different color
            if err_palette is not None and "unit" in style:
                orig_color = color
                color = color_palette(err_palette, len(df_c.values))

            # Pass all parameters to the error plotter as keyword args
            plot_kwargs = dict(ax=ax, x=x, data=df_c.values,
                               boot_data=boot_data,
                               central_data=central_data,
                               color=color, err_kws=err_kws)

            # Plot the error representation, possibly for multiple cis
            for ci_i in cis:
                plot_kwargs["ci"] = ci_i
                plot_func(**plot_kwargs)

            if err_palette is not None and "unit" in style:
                color = orig_color

        # Plot the central trace
        marker = kwargs.pop("marker", "" if interpolate else "o")
        linestyle = kwargs.pop("linestyle", "-" if interpolate else "")
        label = kwargs.pop("label", cond if legend else "_nolegend_")
        ax.plot(x, central_data, color=color, label=label,
                marker=marker, linestyle=linestyle,  **kwargs)

    # Pad the sides of the plot only when not interpolating
    ax.set_xlim(x.min(), x.max())
    x_diff = x[1] - x[0]
    if not interpolate:
        ax.set_xlim(x.min() - x_diff, x.max() + x_diff)

    # Add the plot labels
    if xlabel is not None:
        ax.set_xlabel(xlabel)
    if ylabel is not None:
        ax.set_ylabel(ylabel)
    if legend:
        ax.legend(loc=0, title=legend_name)

    return ax
Ejemplo n.º 14
0
def violin(vals,
           groupby=None,
           inner="box",
           color=None,
           positions=None,
           names=None,
           widths=.8,
           alpha=None,
           join_rm=False,
           kde_thresh=1e-2,
           inner_kws=None,
           ax=None,
           **kwargs):
    """Create a violin plot (a combination of boxplot and KDE plot).

    Parameters
    ----------
    vals : array or sequence of arrays
        data to plot
    groupby : grouping object
        if `vals` is a Series, this is used to group
    inner : box | sticks | points
        plot quartiles or individual sample values inside violin
    color : mpl color, sequence of colors, or seaborn palette name
        inner violin colors
    positions : number or sequence of numbers
        position of first violin or positions of each violin
    widths : float
        width of each violin at maximum density
    alpha : float, optional
        transparancy of violin fill
    join_rm : boolean, optional
        if True, positions in the input arrays are treated as repeated
        measures and are joined with a line plot
    names : list of strings, optional
        names to plot on x axis, otherwise plots numbers
    kde_thresh : float, optional
        proportion of maximum at which to threshold the KDE curve
    inner_kws : dict, optional
        keyword arugments for inner plot
    ax : matplotlib axis, optional
        axis to plot on, otherwise creates new one

    Returns
    -------
    ax : matplotlib axis
        axis with violin plot

    """
    if ax is None:
        ax = plt.gca()

    if isinstance(vals, pd.DataFrame):
        if names is None:
            names = vals.columns
        if vals.columns.name is not None:
            xlabel = vals.columns.name
        else:
            xlabel = None
        ylabel = None
        vals = vals.values

    elif isinstance(vals, pd.Series) and groupby is not None:
        if hasattr(groupby, "name"):
            xlabel = groupby.name
        if names is None:
            names = np.sort(pd.unique(groupby))
        ylabel = vals.name
        grouped_vals = pd.groupby(vals, groupby).values
        vals = grouped_vals.values
    else:
        xlabel = None
        ylabel = None

    if hasattr(vals, 'shape'):
        if len(vals.shape) == 1:
            if hasattr(vals[0], 'shape'):
                vals = list(vals)
            else:
                vals = [vals]
        elif len(vals.shape) == 2:
            nr, nc = vals.shape
            if nr == 1:
                vals = [vals]
            elif nc == 1:
                vals = [vals.ravel()]
            else:
                vals = [vals[:, i] for i in xrange(nc)]
        else:
            raise ValueError("Input x can have no more than 2 dimensions")
    if not hasattr(vals[0], '__len__'):
        vals = [vals]

    vals = [np.asarray(a, float) for a in vals]

    if color is None:
        colors = husl_palette(len(vals), l=.7)
    else:
        if hasattr(color, "__iter__") and not isinstance(color, tuple):
            colors = color
        else:
            try:
                color = mpl.colors.colorConverter.to_rgb(color)
                colors = [color for _ in vals]
            except ValueError:
                colors = color_palette(color, len(vals))

    colors = [mpl.colors.colorConverter.to_rgb(c) for c in colors]
    colors = [desaturate(c, .7) for c in colors]

    light_vals = [colorsys.rgb_to_hls(*c)[1] for c in colors]
    l = min(light_vals) * .6
    gray = (l, l, l)

    if inner_kws is None:
        inner_kws = {}

    if positions is None:
        positions = np.arange(1, len(vals) + 1)
    elif not hasattr(positions, "__iter__"):
        positions = np.arange(positions, len(vals) + positions)

    in_alpha = inner_kws.pop("alpha", .6 if inner == "points" else 1)
    in_alpha *= 1 if alpha is None else alpha
    in_color = inner_kws.pop("color", gray)
    in_marker = inner_kws.pop("marker", ".")
    in_lw = inner_kws.pop("lw", 1.5 if inner == "box" else .8)

    for i, a in enumerate(vals):
        x = positions[i]
        kde = stats.gaussian_kde(a)
        y = _kde_support(a, kde, 1000, kde_thresh)
        dens = kde(y)
        scl = 1 / (dens.max() / (widths / 2))
        dens *= scl

        ax.fill_betweenx(y, x - dens, x + dens, alpha=alpha, color=colors[i])
        if inner == "box":
            for quant in moss.percentiles(a, [25, 75]):
                q_x = kde(quant) * scl
                q_x = [x - q_x, x + q_x]
                ax.plot(q_x, [quant, quant],
                        color=in_color,
                        linestyle=":",
                        linewidth=in_lw,
                        **inner_kws)
            med = np.median(a)
            m_x = kde(med) * scl
            m_x = [x - m_x, x + m_x]
            ax.plot(m_x, [med, med],
                    color=in_color,
                    linestyle="--",
                    linewidth=in_lw,
                    **inner_kws)
        elif inner == "stick":
            x_vals = kde(a) * scl
            x_vals = [x - x_vals, x + x_vals]
            ax.plot(x_vals, [a, a],
                    color=in_color,
                    linewidth=in_lw,
                    alpha=in_alpha,
                    **inner_kws)
        elif inner == "points":
            x_vals = [x for _ in a]
            ax.plot(x_vals,
                    a,
                    in_marker,
                    color=in_color,
                    alpha=in_alpha,
                    mew=0,
                    **inner_kws)
        for side in [-1, 1]:
            ax.plot((side * dens) + x, y, c=gray, linewidth=1.5)

    if join_rm:
        ax.plot(range(1, len(vals) + 1), vals, color=in_color, alpha=2. / 3)

    ax.set_xticks(positions)
    if names is not None:
        if len(vals) != len(names):
            raise ValueError("Length of names list must match nuber of bins")
        ax.set_xticklabels(names)
    ax.set_xlim(positions[0] - .5, positions[-1] + .5)

    if xlabel is not None:
        ax.set_xlabel(xlabel)
    if ylabel is not None:
        ax.set_ylabel(ylabel)

    ax.xaxis.grid(False)
    return ax
Ejemplo n.º 15
0
def boxplot(vals,
            groupby=None,
            names=None,
            join_rm=False,
            color=None,
            alpha=None,
            fliersize=3,
            linewidth=1.5,
            widths=.8,
            ax=None,
            **kwargs):
    """Wrapper for matplotlib boxplot that allows better color control.

    Parameters
    ----------
    vals : sequence of data containers
        data for plot
    groupby : grouping object
        if `vals` is a Series, this is used to group
    names : list of strings, optional
        names to plot on x axis, otherwise plots numbers
    join_rm : boolean, optional
        if True, positions in the input arrays are treated as repeated
        measures and are joined with a line plot
    color : mpl color, sequence of colors, or seaborn palette name
        inner box color
    alpha : float
        transparancy of the inner box color
    fliersize : float, optional
        markersize for the fliers
    linewidth : float, optional
        width for the box outlines and whiskers
    ax : matplotlib axis, optional
        will plot in axis, or create new figure axis
    kwargs : additional keyword arguments to boxplot

    Returns
    -------
    ax : matplotlib axis
        axis where boxplot is plotted

    """
    if ax is None:
        ax = plt.gca()

    if isinstance(vals, pd.DataFrame):
        if names is None:
            names = vals.columns
        if vals.columns.name is not None:
            xlabel = vals.columns.name
        else:
            xlabel = None
        vals = vals.values
        ylabel = None

    elif isinstance(vals, pd.Series) and groupby is not None:
        if names is None:
            names = np.sort(pd.unique(groupby))
        if hasattr(groupby, "name"):
            xlabel = groupby.name
        ylabel = vals.name
        grouped_vals = pd.groupby(vals, groupby).values
        vals = grouped_vals.values
    else:
        xlabel = None
        ylabel = None

    boxes = ax.boxplot(vals, patch_artist=True, widths=widths, **kwargs)
    vals = np.atleast_2d(vals).T

    if color is None:
        colors = husl_palette(len(vals), l=.7)
    else:
        if hasattr(color, "__iter__") and not isinstance(color, tuple):
            colors = color
        else:
            try:
                color = mpl.colors.colorConverter.to_rgb(color)
                colors = [color for _ in vals]
            except ValueError:
                colors = color_palette(color, len(vals))

    colors = [mpl.colors.colorConverter.to_rgb(c) for c in colors]
    colors = [desaturate(c, .7) for c in colors]

    light_vals = [colorsys.rgb_to_hls(*c)[1] for c in colors]
    l = min(light_vals) * .6
    gray = (l, l, l)

    for i, box in enumerate(boxes["boxes"]):
        box.set_color(colors[i])
        if alpha is not None:
            box.set_alpha(alpha)
        box.set_edgecolor(gray)
        box.set_linewidth(linewidth)
    for i, whisk in enumerate(boxes["whiskers"]):
        whisk.set_color(gray)
        whisk.set_linewidth(linewidth)
        whisk.set_linestyle("-")
    for i, cap in enumerate(boxes["caps"]):
        cap.set_color(gray)
        cap.set_linewidth(linewidth)
    for i, med in enumerate(boxes["medians"]):
        med.set_color(gray)
        med.set_linewidth(linewidth)
    for i, fly in enumerate(boxes["fliers"]):
        fly.set_color(gray)
        fly.set_marker("d")
        fly.set_markeredgecolor(gray)
        fly.set_markersize(fliersize)

    if join_rm:
        ax.plot(range(1, len(vals.T) + 1), vals.T, color=gray, alpha=2. / 3)

    if names is not None:
        ax.set_xticklabels(names)
    if xlabel is not None:
        ax.set_xlabel(xlabel)
    if ylabel is not None:
        ax.set_ylabel(ylabel)

    ax.xaxis.grid(False)
    return ax
Ejemplo n.º 16
0
def violin(vals, groupby=None, inner="box", color=None, positions=None,
           names=None, widths=.8, alpha=None, join_rm=False, kde_thresh=1e-2,
           inner_kws=None, ax=None, **kwargs):
    """Create a violin plot (a combination of boxplot and KDE plot).

    Parameters
    ----------
    vals : array or sequence of arrays
        data to plot
    groupby : grouping object
        if `vals` is a Series, this is used to group
    inner : box | sticks | points
        plot quartiles or individual sample values inside violin
    color : mpl color, sequence of colors, or seaborn palette name
        inner violin colors
    positions : number or sequence of numbers
        position of first violin or positions of each violin
    widths : float
        width of each violin at maximum density
    alpha : float, optional
        transparancy of violin fill
    join_rm : boolean, optional
        if True, positions in the input arrays are treated as repeated
        measures and are joined with a line plot
    names : list of strings, optional
        names to plot on x axis, otherwise plots numbers
    kde_thresh : float, optional
        proportion of maximum at which to threshold the KDE curve
    inner_kws : dict, optional
        keyword arugments for inner plot
    ax : matplotlib axis, optional
        axis to plot on, otherwise creates new one

    Returns
    -------
    ax : matplotlib axis
        axis with violin plot

    """
    if ax is None:
        ax = plt.gca()

    if isinstance(vals, pd.DataFrame):
        if names is None:
            names = vals.columns
        if vals.columns.name is not None:
            xlabel = vals.columns.name
        else:
            xlabel = None
        ylabel = None
        vals = vals.values

    elif isinstance(vals, pd.Series) and groupby is not None:
        if hasattr(groupby, "name"):
            xlabel = groupby.name
        ylabel = vals.name
        grouped_vals = pd.groupby(vals, groupby).values
        if names is None:
            names = grouped_vals.index
        vals = grouped_vals.values
    else:
        xlabel = None
        ylabel = None

    if hasattr(vals, 'shape'):
        if len(vals.shape) == 1:
            if hasattr(vals[0], 'shape'):
                vals = list(vals)
            else:
                vals = [vals]
        elif len(vals.shape) == 2:
            nr, nc = vals.shape
            if nr == 1:
                vals = [vals]
            elif nc == 1:
                vals = [vals.ravel()]
            else:
                vals = [vals[:, i] for i in xrange(nc)]
        else:
            raise ValueError("Input x can have no more than 2 dimensions")
    if not hasattr(vals[0], '__len__'):
        vals = [vals]

    vals = [np.asarray(a, float) for a in vals]

    if color is None:
        colors = husl_palette(len(vals), l=.7)
    else:
        if hasattr(color, "__iter__") and not isinstance(color, tuple):
            colors = color
        else:
            try:
                color = mpl.colors.colorConverter.to_rgb(color)
                colors = [color for _ in vals]
            except ValueError:
                colors = color_palette(color, len(vals))

    colors = [mpl.colors.colorConverter.to_rgb(c) for c in colors]
    colors = [desaturate(c, .7) for c in colors]

    light_vals = [colorsys.rgb_to_hls(*c)[1] for c in colors]
    l = min(light_vals) * .6
    gray = (l, l, l)

    if inner_kws is None:
        inner_kws = {}

    if positions is None:
        positions = np.arange(1, len(vals) + 1)
    elif not hasattr(positions, "__iter__"):
        positions = np.arange(positions, len(vals) + positions)

    in_alpha = inner_kws.pop("alpha", .6 if inner == "points" else 1)
    in_alpha *= 1 if alpha is None else alpha
    in_color = inner_kws.pop("color", gray)
    in_marker = inner_kws.pop("marker", ".")
    in_lw = inner_kws.pop("lw", 1.5 if inner == "box" else .8)

    for i, a in enumerate(vals):
        x = positions[i]
        kde = stats.gaussian_kde(a)
        y = _kde_support(a, kde, 1000, kde_thresh)
        dens = kde(y)
        scl = 1 / (dens.max() / (widths / 2))
        dens *= scl

        ax.fill_betweenx(y, x - dens, x + dens, alpha=alpha, color=colors[i])
        if inner == "box":
            for quant in moss.percentiles(a, [25, 75]):
                q_x = kde(quant) * scl
                q_x = [x - q_x, x + q_x]
                ax.plot(q_x, [quant, quant], color=in_color,
                        linestyle=":", linewidth=in_lw, **inner_kws)
            med = np.median(a)
            m_x = kde(med) * scl
            m_x = [x - m_x, x + m_x]
            ax.plot(m_x, [med, med], color=in_color,
                    linestyle="--", linewidth=in_lw, **inner_kws)
        elif inner == "stick":
            x_vals = kde(a) * scl
            x_vals = [x - x_vals, x + x_vals]
            ax.plot(x_vals, [a, a], color=in_color,
                    linewidth=in_lw, alpha=in_alpha, **inner_kws)
        elif inner == "points":
            x_vals = [x for _ in a]
            ax.plot(x_vals, a, in_marker, color=in_color,
                    alpha=in_alpha, mew=0, **inner_kws)
        for side in [-1, 1]:
            ax.plot((side * dens) + x, y, c=gray, linewidth=1.5)

    if join_rm:
        ax.plot(range(1, len(vals) + 1), vals,
                color=in_color, alpha=2. / 3)

    ax.set_xticks(positions)
    if names is not None:
        if len(vals) != len(names):
            raise ValueError("Length of names list must match nuber of bins")
        ax.set_xticklabels(names)
    ax.set_xlim(positions[0] - .5, positions[-1] + .5)

    if xlabel is not None:
        ax.set_xlabel(xlabel)
    if ylabel is not None:
        ax.set_ylabel(ylabel)

    ax.xaxis.grid(False)
    return ax
Ejemplo n.º 17
0
def tsplot(x, data, err_style="ci_band", ci=68, interpolate=True,
           estimator=np.mean, n_boot=10000, smooth=False,
           err_palette=None, ax=None, err_kws=None, **kwargs):
    """Plot timeseries from a set of observations.

    Parameters
    ----------
    x : n_tp array
        x values
    data : n_obs x n_tp array
        array of timeseries data where first axis is observations. other
        objects (e.g. DataFrames) are converted to an array if possible
    err_style : string or list of strings
        names of ways to plot uncertainty across observations from set of
       {ci_band, ci_bars, boot_traces, book_kde, obs_traces, obs_points}
    ci : int or list of ints
        confidence interaval size(s). if a list, it will stack the error
        plots for each confidence interval
    estimator : callable
        function to determine centralt tendency and to pass to bootstrap
        must take an ``axis`` argument
    n_boot : int
        number of bootstrap iterations
    smooth : boolean
        whether to perform a smooth bootstrap (resample from KDE)
    ax : axis object, optional
        plot in given axis; if None creates a new figure
    err_kws : dict, optional
        keyword argument dictionary passed through to matplotlib
        function generating the error plot
    kwargs : further keyword arguments for main call to plot()

    Returns
    -------
    ax : matplotlib axis
        axis with plot data

    """
    if ax is None:
        ax = plt.gca()

    if err_kws is None:
        err_kws = {}

    # Bootstrap the data for confidence intervals
    data = np.asarray(data)
    boot_data = moss.bootstrap(data, n_boot=n_boot, smooth=smooth,
                               axis=0, func=estimator)
    ci_list = hasattr(ci, "__iter__")
    if not ci_list:
        ci = [ci]
    ci_vals = [(50 - w / 2, 50 + w / 2) for w in ci]
    cis = [moss.percentiles(boot_data, v, axis=0) for v in ci_vals]
    central_data = estimator(data, axis=0)

    # Plot the timeseries line to get its color
    line, = ax.plot(x, central_data, **kwargs)
    color = line.get_color()
    line.remove()
    kwargs.pop("color", None)

    # Use subroutines to plot the uncertainty
    if not hasattr(err_style, "__iter__"):
        err_style = [err_style]
    for style in err_style:

        # Grab the function from the global environment
        try:
            plot_func = globals()["_plot_%s" % style]
        except KeyError:
            raise ValueError("%s is not a valid err_style" % style)

        # Possibly set up to plot each observation in a different color
        if err_palette is not None and "obs" in style:
            orig_color = color
            color = color_palette(err_palette, len(data), desat=.99)

        plot_kwargs = dict(ax=ax, x=x, data=data,
                           boot_data=boot_data,
                           central_data=central_data,
                           color=color, err_kws=err_kws)

        for ci_i in cis:
            plot_kwargs["ci"] = ci_i
            plot_func(**plot_kwargs)

        if err_palette is not None and "obs" in style:
            color = orig_color
    # Replot the central trace so it is prominent
    marker = kwargs.pop("marker", "" if interpolate else "o")
    linestyle = kwargs.pop("linestyle", "-" if interpolate else "")
    ax.plot(x, central_data, color=color,
            marker=marker, linestyle=linestyle, **kwargs)

    return ax
Ejemplo n.º 18
0
def boxplot(vals, groupby=None, names=None, join_rm=False, color=None,
            alpha=None, fliersize=3, linewidth=1.5, widths=.8, ax=None,
            **kwargs):
    """Wrapper for matplotlib boxplot that allows better color control.

    Parameters
    ----------
    vals : sequence of data containers
        data for plot
    groupby : grouping object
        if `vals` is a Series, this is used to group
    names : list of strings, optional
        names to plot on x axis, otherwise plots numbers
    join_rm : boolean, optional
        if True, positions in the input arrays are treated as repeated
        measures and are joined with a line plot
    color : mpl color, sequence of colors, or seaborn palette name
        inner box color
    alpha : float
        transparancy of the inner box color
    fliersize : float, optional
        markersize for the fliers
    linewidth : float, optional
        width for the box outlines and whiskers
    ax : matplotlib axis, optional
        will plot in axis, or create new figure axis
    kwargs : additional keyword arguments to boxplot

    Returns
    -------
    ax : matplotlib axis
        axis where boxplot is plotted

    """
    if ax is None:
        ax = plt.gca()

    if isinstance(vals, pd.DataFrame):
        if names is None:
            names = vals.columns
        if vals.columns.name is not None:
            xlabel = vals.columns.name
        else:
            xlabel = None
        vals = vals.values
        ylabel = None

    elif isinstance(vals, pd.Series) and groupby is not None:
        if names is None:
            names = pd.unique(groupby)
        if hasattr(groupby, "name"):
            xlabel = groupby.name
        ylabel = vals.name
        grouped_vals = pd.groupby(vals, groupby).values
        if names is None:
            names = grouped_vals.index
        vals = grouped_vals.values
    else:
        xlabel = None
        ylabel = None

    boxes = ax.boxplot(vals, patch_artist=True, widths=widths, **kwargs)
    vals = np.atleast_2d(vals).T

    if color is None:
        colors = husl_palette(len(vals), l=.7)
    else:
        if hasattr(color, "__iter__") and not isinstance(color, tuple):
            colors = color
        else:
            try:
                color = mpl.colors.colorConverter.to_rgb(color)
                colors = [color for _ in vals]
            except ValueError:
                colors = color_palette(color, len(vals))

    colors = [mpl.colors.colorConverter.to_rgb(c) for c in colors]
    colors = [desaturate(c, .7) for c in colors]

    light_vals = [colorsys.rgb_to_hls(*c)[1] for c in colors]
    l = min(light_vals) * .6
    gray = (l, l, l)

    for i, box in enumerate(boxes["boxes"]):
        box.set_color(colors[i])
        if alpha is not None:
            box.set_alpha(alpha)
        box.set_edgecolor(gray)
        box.set_linewidth(linewidth)
    for i, whisk in enumerate(boxes["whiskers"]):
        whisk.set_color(gray)
        whisk.set_linewidth(linewidth)
        whisk.set_linestyle("-")
    for i, cap in enumerate(boxes["caps"]):
        cap.set_color(gray)
        cap.set_linewidth(linewidth)
    for i, med in enumerate(boxes["medians"]):
        med.set_color(gray)
        med.set_linewidth(linewidth)
    for i, fly in enumerate(boxes["fliers"]):
        fly.set_color(gray)
        fly.set_marker("d")
        fly.set_markeredgecolor(gray)
        fly.set_markersize(fliersize)

    if join_rm:
        ax.plot(range(1, len(vals.T) + 1), vals.T,
                color=gray, alpha=2. / 3)

    if names is not None:
        ax.set_xticklabels(names)
    if xlabel is not None:
        ax.set_xlabel(xlabel)
    if ylabel is not None:
        ax.set_ylabel(ylabel)

    ax.xaxis.grid(False)
    return ax
Ejemplo n.º 19
0
def lmplot(x,
           y,
           data,
           color=None,
           row=None,
           col=None,
           x_estimator=None,
           x_ci=95,
           fit_line=True,
           ci=95,
           truncate=False,
           sharex=True,
           sharey=True,
           palette="hls",
           size=None,
           scatter_kws=None,
           line_kws=None,
           palette_kws=None):
    """Plot a linear model from a DataFrame.

    Parameters
    ----------
    x, y : strings
        column names in `data` DataFrame for x and y variables
    data : DataFrame
        source of data for the model
    color : string, optional
        DataFrame column name to group the model by color
    row, col : strings, optional
        DataFrame column names to make separate plot facets
    x_estimator : callable, optional
        Interpret X values as factor labels and use this function
        to plot the point estimate and bootstrapped CI
    x_ci : int optional
        size of confidence interval for x_estimator error bars
    fit_line : bool, optional
        if True fit a regression line by color/row/col and plot
    ci : int, optional
        confidence interval for the regression line
    truncate : bool, optional
        if True, only fit line from data min to data max
    sharex, sharey : bools, optional
        only relevant if faceting; passed to plt.subplots
    palette : seaborn color palette argument
        if using separate plots by color, draw with this color palette
    size : float, optional
        size (plots are square) for each plot facet
    {scatter, line}_kws : dictionary
        keyword arguments to pass to the underlying plot functions
    palette_kws : dictionary
        keyword arguments for seaborn.color_palette

    """
    # TODO
    # - position_{dodge, jitter}
    # - legend when fit_line is False
    # - truncate fit
    # - wrap title when wide
    # - wrap columns

    # First sort out the general figure layout
    if size is None:
        size = mpl.rcParams["figure.figsize"][1]

    nrow = 1 if row is None else len(data[row].unique())
    ncol = 1 if col is None else len(data[col].unique())

    f, axes = plt.subplots(nrow,
                           ncol,
                           sharex=sharex,
                           sharey=sharey,
                           figsize=(size * ncol, size * nrow))
    axes = np.atleast_2d(axes).reshape(nrow, ncol)

    if nrow == 1:
        row_masks = [np.repeat(True, len(data))]
    else:
        row_vals = np.sort(data[row].unique())
        row_masks = [data[row] == val for val in row_vals]

    if ncol == 1:
        col_masks = [np.repeat(True, len(data))]
    else:
        col_vals = np.sort(data[col].unique())
        col_masks = [data[col] == val for val in col_vals]

    if palette_kws is None:
        palette_kws = {}

    # Sort out the plot colors
    color_factor = color
    if color is None:
        hue_masks = [np.repeat(True, len(data))]
        colors = ["#222222"]
    else:
        hue_vals = np.sort(data[color].unique())
        hue_masks = [data[color] == val for val in hue_vals]
        colors = color_palette(palette, len(hue_masks), **palette_kws)

    # Default keyword arguments for plot components
    if scatter_kws is None:
        scatter_kws = {}
    if line_kws is None:
        line_kws = {}

    # First walk through the facets and plot the scatters
    for row_i, row_mask in enumerate(row_masks):
        for col_j, col_mask in enumerate(col_masks):
            ax = axes[row_i, col_j]
            if not sharex or (row_i + 1 == len(row_masks)):
                ax.set_xlabel(x)
            if not sharey or col_j == 0:
                ax.set_ylabel(y)

            # Title the plot if we are faceting
            title = ""
            if row is not None:
                title += "%s = %s" % (row, row_vals[row_i])
            if row is not None and col is not None:
                title += " | "
            if col is not None:
                title += "%s = %s" % (col, col_vals[col_j])
            ax.set_title(title)

            for hue_k, hue_mask in enumerate(hue_masks):
                color = colors[hue_k]
                data_ijk = data[row_mask & col_mask & hue_mask]

                if x_estimator is not None:
                    ms = scatter_kws.pop("ms", 7)
                    mew = scatter_kws.pop("mew", 0)
                    x_vals = data_ijk[x].unique()
                    y_grouped = [
                        np.array(data_ijk[y][data_ijk[x] == v]) for v in x_vals
                    ]
                    y_est = [x_estimator(y_i) for y_i in y_grouped]
                    y_boots = [
                        moss.bootstrap(np.array(y_i), func=x_estimator)
                        for y_i in y_grouped
                    ]
                    ci_lims = [50 - x_ci / 2., 50 + x_ci / 2.]
                    y_ci = [moss.percentiles(y_i, ci_lims) for y_i in y_boots]
                    y_error = ci_to_errsize(np.transpose(y_ci), y_est)

                    ax.plot(x_vals,
                            y_est,
                            "o",
                            mew=mew,
                            ms=ms,
                            color=color,
                            **scatter_kws)
                    ax.errorbar(x_vals, y_est, y_error, fmt=None, ecolor=color)
                else:
                    ms = scatter_kws.pop("ms", 4)
                    mew = scatter_kws.pop("mew", 0)
                    ax.plot(data_ijk[x],
                            data_ijk[y],
                            "o",
                            color=color,
                            mew=mew,
                            ms=ms,
                            **scatter_kws)

    for ax_i in np.ravel(axes):
        ax_i.set_xmargin(.05)
        ax_i.autoscale_view()

    # Now walk through again and plot the regression estimate
    # and a confidence interval for the regression line
    if fit_line:
        for row_i, row_mask in enumerate(row_masks):
            for col_j, col_mask in enumerate(col_masks):
                ax = axes[row_i, col_j]
                xlim = ax.get_xlim()

                for hue_k, hue_mask in enumerate(hue_masks):
                    color = colors[hue_k]
                    data_ijk = data[row_mask & col_mask & hue_mask]
                    x_vals = np.array(data_ijk[x])
                    y_vals = np.array(data_ijk[y])

                    # Sort out the limit of the fit
                    if truncate:
                        xx = np.linspace(x_vals.min(), x_vals.max(), 100)
                    else:
                        xx = np.linspace(xlim[0], xlim[1], 100)

                    # Inner function to bootstrap the regression
                    def _bootstrap_reg(x, y):
                        fit = np.polyfit(x, y, 1)
                        return np.polyval(fit, xx)

                    # Regression line confidence interval
                    if ci is not None:
                        ci_lims = [50 - ci / 2., 50 + ci / 2.]
                        boots = moss.bootstrap(x_vals,
                                               y_vals,
                                               func=_bootstrap_reg)
                        ci_band = moss.percentiles(boots, ci_lims, axis=0)
                        ax.fill_between(xx, *ci_band, color=color, alpha=.15)

                    fit = np.polyfit(x_vals, y_vals, 1)
                    reg = np.polyval(fit, xx)
                    if color_factor is None:
                        label = ""
                    else:
                        label = hue_vals[hue_k]
                    ax.plot(xx, reg, color=color, label=str(label), **line_kws)
                    ax.set_xlim(xlim)

    # Plot the legend on the upper left facet and adjust the layout
    if color_factor is not None:
        axes[0, 0].legend(loc="best", title=color_factor)
    plt.tight_layout()
Ejemplo n.º 20
0
def coefplot(formula, data, groupby=None, intercept=False, ci=95,
             palette="husl"):
    """Plot the coefficients from a linear model.

    Parameters
    ----------
    formula : string
        patsy formula for ols model
    data : dataframe
        data for the plot; formula terms must appear in columns
    groupby : grouping object, optional
        object to group data with to fit conditional models
    intercept : bool, optional
        if False, strips the intercept term before plotting
    ci : float, optional
        size of confidence intervals
    palette : seaborn color palette, optional
        palette for the horizonal plots

    """
    alpha = 1 - ci / 100
    if groupby is None:
        coefs = sf.ols(formula, data).fit().params
        cis = sf.ols(formula, data).fit().conf_int(alpha)
    else:
        grouped = data.groupby(groupby)
        coefs = grouped.apply(lambda d: sf.ols(formula, d).fit().params).T
        cis = grouped.apply(lambda d: sf.ols(formula, d).fit().conf_int(alpha))

    # Possibly ignore the intercept
    if not intercept:
        coefs = coefs.ix[1:]

    n_terms = len(coefs)

    # Plot seperately depending on groupby
    w, h = mpl.rcParams["figure.figsize"]
    hsize = lambda n: n * (h / 2)
    wsize = lambda n: n * (w / (4 * (n / 5)))
    if groupby is None:
        colors = itertools.cycle(color_palette(palette, n_terms))
        f, ax = plt.subplots(1, 1, figsize=(wsize(n_terms), hsize(1)))
        for i, term in enumerate(coefs.index):
            color = next(colors)
            low, high = cis.ix[term]
            ax.plot([i, i], [low, high], c=color,
                    solid_capstyle="round", lw=2.5)
            ax.plot(i, coefs.ix[term], "o", c=color, ms=8)
        ax.set_xlim(-.5, n_terms - .5)
        ax.axhline(0, ls="--", c="dimgray")
        ax.set_xticks(range(n_terms))
        ax.set_xticklabels(coefs.index)

    else:
        n_groups = len(coefs.columns)
        f, axes = plt.subplots(n_terms, 1, sharex=True,
                               figsize=(wsize(n_groups), hsize(n_terms)))
        if n_terms == 1:
            axes = [axes]
        colors = itertools.cycle(color_palette(palette, n_groups))
        for ax, term in zip(axes, coefs.index):
            for i, group in enumerate(coefs.columns):
                color = next(colors)
                low, high = cis.ix[(group, term)]
                ax.plot([i, i], [low, high], c=color,
                        solid_capstyle="round", lw=2.5)
                ax.plot(i, coefs.loc[term, group], "o", c=color, ms=8)
            ax.set_xlim(-.5, n_groups - .5)
            ax.axhline(0, ls="--", c="dimgray")
            ax.set_title(term)
        ax.set_xlabel(groupby)
        ax.set_xticks(range(n_groups))
        ax.set_xticklabels(coefs.columns)
Ejemplo n.º 21
0
def lmplot(x, y, data, color=None, row=None, col=None, col_wrap=None,
           x_estimator=None, x_ci=95, x_bins=None, n_boot=5000, fit_reg=True,
           order=1, ci=95, logistic=False, truncate=False,
           x_partial=None, y_partial=None, x_jitter=None, y_jitter=None,
           sharex=True, sharey=True, palette="husl", size=None,
           scatter_kws=None, line_kws=None, palette_kws=None):
    """Plot a linear model with faceting, color binning, and other options.

    Parameters
    ----------
    x, y : strings
        Column names in `data` DataFrame for x and y variables.
    data : DataFrame
        Dource of data for the model.
    color : string, optional
        DataFrame column name to group the model by color.
    row, col : strings, optional
        DataFrame column names to make separate plot facets.
    col_wrap : int, optional
        Wrap col variable at this width - cannot be used with row facet.
    x_estimator : callable, optional
        Interpret X values as factor labels and use this function
        to plot the point estimate and bootstrapped CI.
    x_ci : int optional
        Size of confidence interval for x_estimator error bars.
    x_bins : sequence of floats, optional
        Bin the x variable with these values. Implies that x_estimator is
        mean, unless otherwise provided.
    n_boot : int, optional
        Number of bootstrap iterations to perform.
    fit_reg : bool, optional
        If True fit a regression model by color/row/col and plot.
    order : int, optional
        Order of the regression polynomial to fit.
    ci : int, optional
        Confidence interval for the regression line.
    logistic : bool, optional
        Fit the regression line with logistic regression.
    truncate : bool, optional
        If True, only fit line from data min to data max.
    {x, y}_partial : string or list of strings, optional
        Regress these variables out of the factors before plotting.
    {x, y}_jitter : float, optional
        Parameters for uniformly distributed random noise added to positions.
    sharex, sharey : bools, optional
        Only relevant if faceting; passed to plt.subplots.
    palette : seaborn color palette argument
        If using separate plots by color, draw with this color palette.
    size : float, optional
        Size (plots are square) for each plot facet.
    {scatter, line}_kws : dictionary
        Keyword arguments to pass to the underlying plot functions.
    palette_kws : dictionary
        Keyword arguments for seaborn.color_palette.

    """
    # TODO
    # - legend when fit_line is False

    # First sort out the general figure layout
    if size is None:
        size = mpl.rcParams["figure.figsize"][1]

    if col is None and col_wrap is not None:
        raise ValueError("Need column facet variable for `col_wrap`")
    if row is not None and col_wrap is not None:
        raise ValueError("Cannot facet rows when using `col_wrap`")

    nrow = 1 if row is None else len(data[row].unique())
    ncol = 1 if col is None else len(data[col].unique())

    if col_wrap is not None:
        ncol = col_wrap
        nrow = int(np.ceil(len(data[col].unique()) / col_wrap))

    f, axes = plt.subplots(nrow, ncol, sharex=sharex, sharey=sharey,
                           figsize=(size * ncol, size * nrow))
    axes = np.atleast_2d(axes).reshape(nrow, ncol)

    if nrow == 1 or col_wrap is not None:
        row_masks = [np.repeat(True, len(data))]
    else:
        row_vals = np.sort(data[row].unique())
        row_masks = [data[row] == val for val in row_vals]

    if ncol == 1:
        col_masks = [np.repeat(True, len(data))]
    else:
        col_vals = np.sort(data[col].unique())
        col_masks = [data[col] == val for val in col_vals]

    if x_bins is not None:
        x_estimator = np.mean if x_estimator is None else x_estimator
        x_bins = np.c_[x_bins]

    if x_partial is not None:
        if not isinstance(x_partial, list):
            x_partial = [x_partial]
    if y_partial is not None:
        if not isinstance(y_partial, list):
            y_partial = [y_partial]

    if palette_kws is None:
        palette_kws = {}

    # Sort out the plot colors
    color_factor = color
    if color is None:
        hue_masks = [np.repeat(True, len(data))]
        colors = ["#222222"]
    else:
        hue_vals = np.sort(data[color].unique())
        hue_masks = [data[color] == val for val in hue_vals]
        colors = color_palette(palette, len(hue_masks), **palette_kws)

    # Default keyword arguments for plot components
    if scatter_kws is None:
        scatter_kws = {}
    if line_kws is None:
        line_kws = {}

    # First walk through the facets and plot the scatters
    scatter_ms = scatter_kws.pop("ms", 4)
    scatter_mew = mew = scatter_kws.pop("mew", 0)
    scatter_alpha = mew = scatter_kws.pop("alpha", .77)
    for row_i, row_mask in enumerate(row_masks):
        for col_j, col_mask in enumerate(col_masks):
            if col_wrap is not None:
                f_row = col_j // ncol
                f_col = col_j % ncol
            else:
                f_row, f_col = row_i, col_j
            ax = axes[f_row, f_col]
            if f_row + 1 == nrow:
                ax.set_xlabel(x)
            if f_col == 0:
                ax.set_ylabel(y)

            # Title the plot if we are faceting
            title = ""
            if row is not None:
                title += "%s = %s" % (row, row_vals[row_i])
            if row is not None and col is not None:
                title += " | "
            if col is not None:
                title += "%s = %s" % (col, col_vals[col_j])
            if size < 3:
                title = title.replace(" | ", "\n")
            ax.set_title(title)

            for hue_k, hue_mask in enumerate(hue_masks):
                color = colors[hue_k]
                data_ijk = data[row_mask & col_mask & hue_mask]

                if x_estimator is not None:
                    ms = scatter_kws.pop("ms", 7)
                    mew = scatter_kws.pop("mew", 0)
                    if x_bins is None:
                        x_vals = data_ijk[x].unique()
                        x_data = data_ijk[x]
                    else:
                        dist = distance.cdist(np.c_[data_ijk[x]], x_bins)
                        x_vals = x_bins.ravel()
                        x_data = x_bins[np.argmin(dist, axis=1)].ravel()

                    y_vals = data_ijk[y]

                    if y_partial is not None:
                        for var in y_partial:
                            conf = data_ijk[var]
                            conf -= conf.mean()
                            y_mean = y_vals.mean()
                            y_vals = moss.vector_reject(y_vals - y_mean, conf)
                            y_vals += y_mean

                    y_grouped = [np.array(y_vals[x_data == v])
                                 for v in x_vals]

                    y_est = [x_estimator(y_i) for y_i in y_grouped]
                    y_boots = [moss.bootstrap(np.array(y_i),
                                              func=x_estimator,
                                              n_boot=n_boot)
                               for y_i in y_grouped]
                    ci_lims = [50 - x_ci / 2., 50 + x_ci / 2.]
                    y_ci = [moss.percentiles(y_i, ci_lims) for y_i in y_boots]
                    y_error = ci_to_errsize(np.transpose(y_ci), y_est)

                    ax.plot(x_vals, y_est, "o", mew=mew, ms=ms,
                            color=color, **scatter_kws)
                    ax.errorbar(x_vals, y_est, y_error,
                                fmt=None, ecolor=color)
                else:
                    x_ = data_ijk[x]
                    y_ = data_ijk[y]

                    if x_partial is not None:
                        for var in x_partial:
                            conf = data_ijk[var]
                            conf -= conf.mean()
                            x_mean = x_.mean()
                            x_ = moss.vector_reject(x_ - x_mean, conf)
                            x_ += x_mean
                    if y_partial is not None:
                        for var in y_partial:
                            conf = data_ijk[var]
                            conf -= conf.mean()
                            y_mean = y_.mean()
                            y_ = moss.vector_reject(y_ - y_mean, conf)
                            y_ += y_mean

                    if x_jitter is not None:
                        x_ += np.random.uniform(-x_jitter, x_jitter, x_.shape)
                    if y_jitter is not None:
                        y_ += np.random.uniform(-y_jitter, y_jitter, y_.shape)
                    ax.plot(x_, y_, "o", color=color, alpha=scatter_alpha,
                            mew=scatter_mew, ms=scatter_ms, **scatter_kws)

    for ax_i in np.ravel(axes):
        ax_i.set_xmargin(.05)
        ax_i.autoscale_view()

    # Now walk through again and plot the regression estimate
    # and a confidence interval for the regression line
    if fit_reg:
        for row_i, row_mask in enumerate(row_masks):
            for col_j, col_mask in enumerate(col_masks):
                if col_wrap is not None:
                    f_row = col_j // ncol
                    f_col = col_j % ncol
                else:
                    f_row, f_col = row_i, col_j
                ax = axes[f_row, f_col]
                xlim = ax.get_xlim()

                for hue_k, hue_mask in enumerate(hue_masks):
                    color = colors[hue_k]
                    data_ijk = data[row_mask & col_mask & hue_mask]
                    x_vals = np.array(data_ijk[x])
                    y_vals = np.array(data_ijk[y])
                    if not len(x_vals):
                        continue

                    # Sort out the limit of the fit
                    if truncate:
                        xx = np.linspace(x_vals.min(),
                                         x_vals.max(), 100)
                    else:
                        xx = np.linspace(xlim[0], xlim[1], 100)
                    xx_ = sm.add_constant(xx, prepend=True)

                    # Inner function to bootstrap the regression
                    def _regress(x, y):
                        if logistic:
                            x_ = sm.add_constant(x, prepend=True)
                            fit = sm.GLM(y, x_,
                                         family=sm.families.Binomial()).fit()
                            reg = fit.predict(xx_)
                        else:
                            fit = np.polyfit(x, y, order)
                            reg = np.polyval(fit, xx)
                        return reg

                    # Remove nuisance variables with vector rejection
                    if x_partial is not None:
                        for var in x_partial:
                            conf = data_ijk[var]
                            conf -= conf.mean()
                            x_mean = x_vals.mean()
                            x_vals = moss.vector_reject(x_vals - x_mean, conf)
                            x_vals += x_mean
                    if y_partial is not None:
                        for var in y_partial:
                            conf = data_ijk[var]
                            conf -= conf.mean()
                            y_mean = y_vals.mean()
                            y_vals = moss.vector_reject(y_vals - y_mean, conf)
                            y_vals += y_mean

                    # Regression line confidence interval
                    if ci is not None:
                        ci_lims = [50 - ci / 2., 50 + ci / 2.]
                        boots = moss.bootstrap(x_vals, y_vals,
                                               func=_regress,
                                               n_boot=n_boot)
                        ci_band = moss.percentiles(boots, ci_lims, axis=0)
                        ax.fill_between(xx, *ci_band, color=color, alpha=.15)

                    # Regression line
                    reg = _regress(x_vals, y_vals)
                    if color_factor is None:
                        label = ""
                    else:
                        label = hue_vals[hue_k]
                    ax.plot(xx, reg, color=color,
                            label=str(label), **line_kws)
                    ax.set_xlim(xlim)

    # Plot the legend on the upper left facet and adjust the layout
    if color_factor is not None and color_factor not in [row, col]:
        axes[0, 0].legend(loc="best", title=color_factor)
    plt.tight_layout()
Ejemplo n.º 22
0
def tsplot(data,
           time=None,
           unit=None,
           condition=None,
           value=None,
           err_style="ci_band",
           ci=68,
           interpolate=True,
           color=None,
           estimator=np.mean,
           n_boot=5000,
           err_palette=None,
           err_kws=None,
           legend=True,
           ax=None,
           **kwargs):
    """Plot one or more timeseries with flexible representation of uncertainty.

    This function can take data specified either as a long-form (tidy)
    DataFrame or as an ndarray with dimensions for sampling unit, time, and
    (optionally) condition. The interpretation of some of the other parameters
    changes depending on the type of object passed as data.

    Parameters
    ----------
    data : DataFrame or ndarray
        Data for the plot. Should either be a "long form" dataframe or an
        array with dimensions (unit, time, condition). In both cases, the
        condition field/dimension is optional. The type of this argument
        determines the interpretation of the next few parameters.
    time : string or series-like
        Either the name of the field corresponding to time in the data
        DataFrame or x values for a plot when data is an array. If a Series,
        the name will be used to label the x axis.
    value : string
        Either the name of the field corresponding to the data values in
        the data DataFrame (i.e. the y coordinate) or a string that forms
        the y axis label when data is an array.
    unit : string
        Field in the data DataFrame identifying the sampling unit (e.g.
        subject, neuron, etc.). The error representation will collapse over
        units at each time/condition observation. This has no role when data
        is an array.
    condition : string or Series-like
        Either the name of the field identifying the condition an observation
        falls under in the data DataFrame, or a sequence of names with a length
        equal to the size of the third dimension of data. There will be a
        separate trace plotted for each condition. If condition is a Series
        with a name attribute, the name will form the title for the plot
        legend (unless legend is set to False).
    err_style : string or list of strings or None
        Names of ways to plot uncertainty across units from set of
        {ci_band, ci_bars, boot_traces, book_kde, unit_traces, unit_points}.
        Can use one or more than one method.
    ci : float or list of floats in [0, 100]
        Confidence interaval size(s). If a list, it will stack the error
        plots for each confidence interval. Only relevant for error styles
        with "ci" in the name.
    interpolate : boolean
        Whether to do a linear interpolation between each timepoint when
        plotting. The value of this parameter also determines the marker
        used for the main plot traces, unless marker is specified as a keyword
        argument.
    color : seaborn palette or matplotlib color name or dictionary
        Palette or color for the main plots and error representation (unless
        plotting by unit, which can be separately controlled with err_palette).
        If a dictionary, should map condition name to color spec.
    estimator : callable
        Function to determine central tendency and to pass to bootstrap
        must take an ``axis`` argument.
    n_boot : int
        Number of bootstrap iterations.
    err_palette: seaborn palette
        Palette name or list of colors used when plotting data for each unit.
    err_kws : dict, optional
        Keyword argument dictionary passed through to matplotlib function
        generating the error plot,
    ax : axis object, optional
        Plot in given axis; if None creates a new figure
    kwargs :
        Other keyword arguments are passed to main plot() call

    Returns
    -------
    ax : matplotlib axis
        axis with plot data

    """
    # Sort out default values for the parameters
    if ax is None:
        ax = plt.gca()

    if err_kws is None:
        err_kws = {}

    # Handle case where data is an array
    if isinstance(data, pd.DataFrame):

        xlabel = time
        ylabel = value

        # Condition is optional
        if condition is None:
            condition = pd.Series(np.ones(len(data)))
            legend = False
            legend_name = None
            n_cond = 1
        else:
            legend = True and legend
            legend_name = condition
            n_cond = len(data[condition].unique())

    else:
        data = np.asarray(data)

        # Data can be a timecourse from a single unit or
        # several observations in one condition
        if data.ndim == 1:
            data = data[np.newaxis, :, np.newaxis]
        elif data.ndim == 2:
            data = data[:, :, np.newaxis]
        n_unit, n_time, n_cond = data.shape

        # Units are experimental observations. Maybe subjects, or neurons
        if unit is None:
            units = np.arange(n_unit)
        unit = "unit"
        units = np.repeat(units, n_time * n_cond)
        ylabel = None

        # Time forms the xaxis of the plot
        if time is None:
            times = np.arange(n_time)
        else:
            times = np.asarray(time)
        xlabel = None
        if hasattr(time, "name"):
            xlabel = time.name
        time = "time"
        times = np.tile(np.repeat(times, n_cond), n_unit)

        # Conditions split the timeseries plots
        if condition is None:
            conds = range(n_cond)
            legend = False
            if isinstance(color, dict):
                err = "Must have condition names if using color dict."
                raise ValueError(err)
        else:
            conds = np.asarray(condition)
            legend = True and legend
            if hasattr(condition, "name"):
                legend_name = condition.name
            else:
                legend_name = None
        condition = "cond"
        conds = np.tile(conds, n_unit * n_time)

        # Value forms the y value in the plot
        if value is None:
            ylabel = None
        else:
            ylabel = value
        value = "value"

        # Convert to long-form DataFrame
        data = pd.DataFrame(
            dict(value=data.ravel(), time=times, unit=units, cond=conds))

    # Set up the err_style and ci arguments for teh loop below
    if not hasattr(err_style, "__iter__"):
        err_style = [err_style]
    elif err_style is None:
        err_style = []
    if not hasattr(ci, "__iter__"):
        ci = [ci]

    # Set up the color palette
    if color is None:
        colors = color_palette()
    elif isinstance(color, dict):
        colors = [color[c] for c in data[condition].unique()]
    else:
        try:
            colors = color_palette(color, n_cond)
        except ValueError:
            color = mpl.colors.colorConverter.to_rgb(color)
            colors = [color] * n_cond

    # Do a groupby with condition and plot each trace
    for c, (cond, df_c) in enumerate(data.groupby(condition, sort=False)):

        df_c = df_c.pivot(unit, time, value)
        x = df_c.columns.values.astype(np.float)

        # Bootstrap the data for confidence intervals
        boot_data = moss.bootstrap(df_c.values,
                                   n_boot=n_boot,
                                   axis=0,
                                   func=estimator)
        cis = [moss.ci(boot_data, v, axis=0) for v in ci]
        central_data = estimator(df_c.values, axis=0)

        # Get the color for this condition
        color = colors[c]

        # Use subroutines to plot the uncertainty
        for style in err_style:

            # Allow for null style (only plot central tendency)
            if style is None:
                continue

            # Grab the function from the global environment
            try:
                plot_func = globals()["_plot_%s" % style]
            except KeyError:
                raise ValueError("%s is not a valid err_style" % style)

            # Possibly set up to plot each observation in a different color
            if err_palette is not None and "unit" in style:
                orig_color = color
                color = color_palette(err_palette, len(df_c.values))

            # Pass all parameters to the error plotter as keyword args
            plot_kwargs = dict(ax=ax,
                               x=x,
                               data=df_c.values,
                               boot_data=boot_data,
                               central_data=central_data,
                               color=color,
                               err_kws=err_kws)

            # Plot the error representation, possibly for multiple cis
            for ci_i in cis:
                plot_kwargs["ci"] = ci_i
                plot_func(**plot_kwargs)

            if err_palette is not None and "unit" in style:
                color = orig_color

        # Plot the central trace
        marker = kwargs.pop("marker", "" if interpolate else "o")
        linestyle = kwargs.pop("linestyle", "-" if interpolate else "")
        label = kwargs.pop("label", cond if legend else "_nolegend_")
        ax.plot(x,
                central_data,
                color=color,
                label=label,
                marker=marker,
                linestyle=linestyle,
                **kwargs)

    # Pad the sides of the plot only when not interpolating
    ax.set_xlim(x.min(), x.max())
    x_diff = x[1] - x[0]
    if not interpolate:
        ax.set_xlim(x.min() - x_diff, x.max() + x_diff)

    # Add the plot labels
    if xlabel is not None:
        ax.set_xlabel(xlabel)
    if ylabel is not None:
        ax.set_ylabel(ylabel)
    if legend:
        ax.legend(loc=0, title=legend_name)

    return ax
Ejemplo n.º 23
0
def lmplot(x, y, data, color=None, row=None, col=None,
           x_estimator=None, x_ci=95,
           fit_line=True, ci=95, truncate=False,
           sharex=True, sharey=True, palette="hls", size=None,
           scatter_kws=None, line_kws=None, palette_kws=None):
    """Plot a linear model from a DataFrame.

    Parameters
    ----------
    x, y : strings
        column names in `data` DataFrame for x and y variables
    data : DataFrame
        source of data for the model
    color : string, optional
        DataFrame column name to group the model by color
    row, col : strings, optional
        DataFrame column names to make separate plot facets
    x_estimator : callable, optional
        Interpret X values as factor labels and use this function
        to plot the point estimate and bootstrapped CI
    x_ci : int optional
        size of confidence interval for x_estimator error bars
    fit_line : bool, optional
        if True fit a regression line by color/row/col and plot
    ci : int, optional
        confidence interval for the regression line
    truncate : bool, optional
        if True, only fit line from data min to data max
    sharex, sharey : bools, optional
        only relevant if faceting; passed to plt.subplots
    palette : seaborn color palette argument
        if using separate plots by color, draw with this color palette
    size : float, optional
        size (plots are square) for each plot facet
    {scatter, line}_kws : dictionary
        keyword arguments to pass to the underlying plot functions
    palette_kws : dictionary
        keyword arguments for seaborn.color_palette

    """
    # TODO
    # - position_{dodge, jitter}
    # - legend when fit_line is False
    # - truncate fit
    # - wrap title when wide
    # - wrap columns

    # First sort out the general figure layout
    if size is None:
        size = mpl.rcParams["figure.figsize"][1]

    nrow = 1 if row is None else len(data[row].unique())
    ncol = 1 if col is None else len(data[col].unique())

    f, axes = plt.subplots(nrow, ncol, sharex=sharex, sharey=sharey,
                           figsize=(size * ncol, size * nrow))
    axes = np.atleast_2d(axes).reshape(nrow, ncol)

    if nrow == 1:
        row_masks = [np.repeat(True, len(data))]
    else:
        row_vals = np.sort(data[row].unique())
        row_masks = [data[row] == val for val in row_vals]

    if ncol == 1:
        col_masks = [np.repeat(True, len(data))]
    else:
        col_vals = np.sort(data[col].unique())
        col_masks = [data[col] == val for val in col_vals]

    if palette_kws is None:
        palette_kws = {}

    # Sort out the plot colors
    color_factor = color
    if color is None:
        hue_masks = [np.repeat(True, len(data))]
        colors = ["#222222"]
    else:
        hue_vals = np.sort(data[color].unique())
        hue_masks = [data[color] == val for val in hue_vals]
        colors = color_palette(palette, len(hue_masks), **palette_kws)

    # Default keyword arguments for plot components
    if scatter_kws is None:
        scatter_kws = {}
    if line_kws is None:
        line_kws = {}

    # First walk through the facets and plot the scatters
    for row_i, row_mask in enumerate(row_masks):
        for col_j, col_mask in enumerate(col_masks):
            ax = axes[row_i, col_j]
            if not sharex or (row_i + 1 == len(row_masks)):
                ax.set_xlabel(x)
            if not sharey or col_j == 0:
                ax.set_ylabel(y)

            # Title the plot if we are faceting
            title = ""
            if row is not None:
                title += "%s = %s" % (row, row_vals[row_i])
            if row is not None and col is not None:
                title += " | "
            if col is not None:
                title += "%s = %s" % (col, col_vals[col_j])
            ax.set_title(title)

            for hue_k, hue_mask in enumerate(hue_masks):
                color = colors[hue_k]
                data_ijk = data[row_mask & col_mask & hue_mask]

                if x_estimator is not None:
                    ms = scatter_kws.pop("ms", 7)
                    mew = scatter_kws.pop("mew", 0)
                    x_vals = data_ijk[x].unique()
                    y_grouped = [np.array(data_ijk[y][data_ijk[x] == v])
                                 for v in x_vals]
                    y_est = [x_estimator(y_i) for y_i in y_grouped]
                    y_boots = [moss.bootstrap(np.array(y_i), func=x_estimator)
                               for y_i in y_grouped]
                    ci_lims = [50 - x_ci / 2., 50 + x_ci / 2.]
                    y_ci = [moss.percentiles(y_i, ci_lims) for y_i in y_boots]
                    y_error = ci_to_errsize(np.transpose(y_ci), y_est)

                    ax.plot(x_vals, y_est, "o", mew=mew, ms=ms,
                            color=color, **scatter_kws)
                    ax.errorbar(x_vals, y_est, y_error,
                                fmt=None, ecolor=color)
                else:
                    ms = scatter_kws.pop("ms", 4)
                    mew = scatter_kws.pop("mew", 0)
                    ax.plot(data_ijk[x], data_ijk[y], "o",
                            color=color, mew=mew, ms=ms, **scatter_kws)

    for ax_i in np.ravel(axes):
        ax_i.set_xmargin(.05)
        ax_i.autoscale_view()

    # Now walk through again and plot the regression estimate
    # and a confidence interval for the regression line
    if fit_line:
        for row_i, row_mask in enumerate(row_masks):
            for col_j, col_mask in enumerate(col_masks):
                ax = axes[row_i, col_j]
                xlim = ax.get_xlim()

                for hue_k, hue_mask in enumerate(hue_masks):
                    color = colors[hue_k]
                    data_ijk = data[row_mask & col_mask & hue_mask]
                    x_vals = np.array(data_ijk[x])
                    y_vals = np.array(data_ijk[y])

                    # Sort out the limit of the fit
                    if truncate:
                        xx = np.linspace(x_vals.min(),
                                         x_vals.max(), 100)
                    else:
                        xx = np.linspace(xlim[0], xlim[1], 100)

                    # Inner function to bootstrap the regression
                    def _bootstrap_reg(x, y):
                        fit = np.polyfit(x, y, 1)
                        return np.polyval(fit, xx)

                    # Regression line confidence interval
                    if ci is not None:
                        ci_lims = [50 - ci / 2., 50 + ci / 2.]
                        boots = moss.bootstrap(x_vals, y_vals,
                                               func=_bootstrap_reg)
                        ci_band = moss.percentiles(boots, ci_lims, axis=0)
                        ax.fill_between(xx, *ci_band, color=color, alpha=.15)

                    fit = np.polyfit(x_vals, y_vals, 1)
                    reg = np.polyval(fit, xx)
                    if color_factor is None:
                        label = ""
                    else:
                        label = hue_vals[hue_k]
                    ax.plot(xx, reg, color=color,
                            label=str(label), **line_kws)
                    ax.set_xlim(xlim)

    # Plot the legend on the upper left facet and adjust the layout
    if color_factor is not None:
        axes[0, 0].legend(loc="best", title=color_factor)
    plt.tight_layout()