def _bivariate_kdeplot(x, y, filled, kernel, bw, gridsize, cut, clip, axlabel, ax, **kwargs): """Plot a joint KDE estimate as a bivariate contour plot.""" # Determine the clipping if clip is None: clip = [(-np.inf, np.inf), (-np.inf, np.inf)] elif np.ndim(clip) == 1: clip = [clip, clip] # Calculate the KDE try: xx, yy, z = _statsmodels_bivariate_kde(x, y, bw, gridsize, cut, clip) except ImportError: xx, yy, z = _scipy_bivariate_kde(x, y, bw, gridsize, cut, clip) # Plot the contours n_levels = kwargs.pop("n_levels", 10) cmap = kwargs.pop("cmap", "BuGn" if filled else "BuGn_d") if isinstance(cmap, str): if cmap.endswith("_d"): pal = ["#333333"] pal.extend(color_palette(cmap.replace("_d", "_r"), 2)) cmap = blend_palette(pal, as_cmap=True) contour_func = ax.contourf if filled else ax.contour contour_func(xx, yy, z, n_levels, cmap=cmap, **kwargs) # Label the axes if hasattr(x, "name") and axlabel: ax.set_xlabel(x.name) if hasattr(y, "name") and axlabel: ax.set_ylabel(y.name) return ax
def set_color_palette(name, n_colors=6, desat=None): """Set the matplotlib color cycle in one of a variety of ways. Parameters ---------- name : hls | husl | matplotlib colormap | seaborn color palette palette name n_colors : int only relevant for hls or matplotlib palettes desat : float desaturation factor for each color """ colors = utils.color_palette(name, n_colors, desat) mpl.rcParams["axes.color_cycle"] = colors mpl.rcParams["patch.facecolor"] = colors[0]
def _bivariate_kde(x, y, filled, kernel, bw, gridsize, cut, clip, axlabel, ax, **kwargs): """Plot a joint KDE estimate as a bivariate contour plot.""" # Determine the clipping if clip is None: clip = [(-np.inf, np.inf), (-np.inf, np.inf)] elif np.ndim(clip) == 1: clip = [clip, clip] # Calculate the KDE if isinstance(bw, str): bw_func = getattr(sm.nonparametric.bandwidths, "bw_" + bw) x_bw = bw_func(x) y_bw = bw_func(y) bw = [x_bw, y_bw] elif np.isscalar(bw): bw = [bw, bw] kde = sm.nonparametric.KDEMultivariate([x, y], "cc", bw) x_support = _kde_support(x, kde.bw[0], gridsize, cut, clip[0]) y_support = _kde_support(y, kde.bw[1], gridsize, cut, clip[1]) xx, yy = np.meshgrid(x_support, y_support) z = kde.pdf([xx.ravel(), yy.ravel()]).reshape(xx.shape) # Plot the contours n_levels = kwargs.pop("n_levels", 10) cmap = kwargs.pop("cmap", "BuGn" if filled else "BuGn_d") if isinstance(cmap, str): if cmap.endswith("_d"): pal = ["#333333"] pal.extend(color_palette(cmap.replace("_d", "_r"), 2)) cmap = blend_palette(pal, as_cmap=True) contour_func = ax.contourf if filled else ax.contour contour_func(xx, yy, z, n_levels, cmap=cmap, **kwargs) # Label the axes if hasattr(x, "name") and axlabel: ax.set_xlabel(x.name) if hasattr(y, "name") and axlabel: ax.set_ylabel(y.name) return ax
def _box_colors(vals, color): """Find colors to use for boxplots or violinplots.""" if color is None: colors = husl_palette(len(vals), l=.7) else: try: color = mpl.colors.colorConverter.to_rgb(color) colors = [color for _ in vals] except ValueError: colors = color_palette(color, len(vals)) # Desaturate a bit because these are patches colors = [mpl.colors.colorConverter.to_rgb(c) for c in colors] colors = [desaturate(c, .7) for c in colors] # Determine the gray color for the lines light_vals = [colorsys.rgb_to_hls(*c)[1] for c in colors] l = min(light_vals) * .6 gray = (l, l, l) return colors, gray
def set_color_palette(name, n_colors=8, desat=None, h=.01, l=.6, s=.65): """Set the matplotlib color cycle in one of a variety of ways. Parameters ---------- name : hls | matplotlib colormap | seaborn color palette palette name n_colors : int only relevant for hls or matplotlib palettes desat : float desaturation factor for each color h : float first hue for hls spokes l : float lightness of hls spokes s : float saturation of hls spokes """ colors = utils.color_palette(name, n_colors, desat, h, l, s) mpl.rcParams["axes.color_cycle"] = colors mpl.rcParams["patch.facecolor"] = colors[0]
def tsplot(x, data, err_style=["ci_band"], ci=68, interpolate=True, estimator=np.mean, n_boot=10000, smooth=False, err_palette=None, ax=None, **kwargs): """Plot timeseries from a set of observations. Parameters ---------- x : n_tp array x values data : n_obs x n_tp array array of timeseries data where first axis is e.g. subjects err_style : list of strings names of ways to plot uncertainty across observations from set of {ci_band, ci_bars, boot_traces, book_kde, obs_traces, obs_points} ci : int or list of ints confidence interaval size(s). if a list, it will stack the error plots for each confidence interval estimator : callable function to determine centralt tendency and to pass to bootstrap must take an ``axis`` argument n_boot : int number of bootstrap iterations smooth : boolean whether to perform a smooth bootstrap (resample from KDE) ax : axis object, optional plot in given axis; if None creates a new figure kwargs : further keyword arguments for main call to plot() Returns ------- ax : matplotlib axis axis with plot data """ if ax is None: ax = plt.subplot(111) # Bootstrap the data for confidence intervals boot_data = moss.bootstrap(data, n_boot=n_boot, smooth=smooth, axis=0, func=estimator) ci_list = hasattr(ci, "__iter__") if not ci_list: ci = [ci] ci_vals = [(50 - w / 2, 50 + w / 2) for w in ci] cis = [moss.percentiles(boot_data, ci, axis=0) for ci in ci_vals] central_data = estimator(data, axis=0) # Plot the timeseries line to get its color line, = ax.plot(x, central_data, **kwargs) color = line.get_color() line.remove() kwargs.pop("color", None) # Use subroutines to plot the uncertainty for style in err_style: # Grab the function from the global environment try: plot_func = globals()["_plot_%s" % style] except KeyError: raise ValueError("%s is not a valid err_style" % style) # Possibly set up to plot each observation in a different color if err_palette is not None and "obs" in style: orig_color = color color = color_palette(err_palette, len(data), desat=.99) plot_kwargs = dict(ax=ax, x=x, data=data, boot_data=boot_data, central_data=central_data, color=color) for ci_i in cis: plot_kwargs["ci"] = ci_i plot_func(**plot_kwargs) if err_palette is not None and "obs" in style: color = orig_color # Replot the central trace so it is prominent marker = kwargs.pop("marker", "" if interpolate else "o") linestyle = kwargs.pop("linestyle", "-" if interpolate else "") ax.plot(x, central_data, color=color, marker=marker, linestyle=linestyle, **kwargs) return ax
def coefplot(formula, data, groupby=None, intercept=False, ci=95, palette="husl"): """Plot the coefficients from a linear model. Parameters ---------- formula : string patsy formula for ols model data : dataframe data for the plot; formula terms must appear in columns groupby : grouping object, optional object to group data with to fit conditional models intercept : bool, optional if False, strips the intercept term before plotting ci : float, optional size of confidence intervals palette : seaborn color palette, optional palette for the horizonal plots """ alpha = 1 - ci / 100 if groupby is None: coefs = sf.ols(formula, data).fit().params cis = sf.ols(formula, data).fit().conf_int(alpha) else: grouped = data.groupby(groupby) coefs = grouped.apply(lambda d: sf.ols(formula, d).fit().params).T cis = grouped.apply(lambda d: sf.ols(formula, d).fit().conf_int(alpha)) # Possibly ignore the intercept if not intercept: coefs = coefs.ix[1:] n_terms = len(coefs) # Plot seperately depending on groupby w, h = mpl.rcParams["figure.figsize"] hsize = lambda n: n * (h / 2) wsize = lambda n: n * (w / (4 * (n / 5))) if groupby is None: colors = itertools.cycle(color_palette(palette, n_terms)) f, ax = plt.subplots(1, 1, figsize=(wsize(n_terms), hsize(1))) for i, term in enumerate(coefs.index): color = colors.next() low, high = cis.ix[term] ax.plot([i, i], [low, high], c=color, solid_capstyle="round", lw=2.5) ax.plot(i, coefs.ix[term], "o", c=color, ms=8) ax.set_xlim(-.5, n_terms - .5) ax.axhline(0, ls="--", c="dimgray") ax.set_xticks(range(n_terms)) ax.set_xticklabels(coefs.index) else: n_groups = len(coefs.columns) f, axes = plt.subplots(n_terms, 1, sharex=True, figsize=(wsize(n_groups), hsize(n_terms))) if n_terms == 1: axes = [axes] colors = itertools.cycle(color_palette(palette, n_groups)) for ax, term in zip(axes, coefs.index): for i, group in enumerate(coefs.columns): color = colors.next() low, high = cis.ix[(group, term)] ax.plot([i, i], [low, high], c=color, solid_capstyle="round", lw=2.5) ax.plot(i, coefs.loc[term, group], "o", c=color, ms=8) ax.set_xlim(-.5, n_groups - .5) ax.axhline(0, ls="--", c="dimgray") ax.set_title(term) ax.set_xlabel(groupby) ax.set_xticks(range(n_groups)) ax.set_xticklabels(coefs.columns)
def lmplot(x, y, data, color=None, row=None, col=None, col_wrap=None, x_estimator=None, x_ci=95, n_boot=5000, fit_reg=True, order=1, ci=95, logistic=False, truncate=False, x_partial=None, y_partial=None, x_jitter=None, y_jitter=None, sharex=True, sharey=True, palette="husl", size=None, scatter_kws=None, line_kws=None, palette_kws=None): """Plot a linear model from a DataFrame. Parameters ---------- x, y : strings column names in `data` DataFrame for x and y variables data : DataFrame source of data for the model color : string, optional DataFrame column name to group the model by color row, col : strings, optional DataFrame column names to make separate plot facets col_wrap : int, optional wrap col variable at this width - cannot be used with row facet x_estimator : callable, optional Interpret X values as factor labels and use this function to plot the point estimate and bootstrapped CI x_ci : int optional size of confidence interval for x_estimator error bars n_boot : int, optional number of bootstrap iterations to perform fit_reg : bool, optional if True fit a regression model by color/row/col and plot order : int, optional order of the regression polynomial to fit (default = 1) ci : int, optional confidence interval for the regression line logistic : bool, optional fit the regression line with logistic regression truncate : bool, optional if True, only fit line from data min to data max {x, y}_partial : string or list of strings, optional regress these variables out of the factors before plotting {x, y}_jitter : float, optional parameters for uniformly distributed random noise added to positions sharex, sharey : bools, optional only relevant if faceting; passed to plt.subplots palette : seaborn color palette argument if using separate plots by color, draw with this color palette size : float, optional size (plots are square) for each plot facet {scatter, line}_kws : dictionary keyword arguments to pass to the underlying plot functions palette_kws : dictionary keyword arguments for seaborn.color_palette """ # TODO # - legend when fit_line is False # - wrap title when wide # First sort out the general figure layout if size is None: size = mpl.rcParams["figure.figsize"][1] if col is None and col_wrap is not None: raise ValueError("Need column facet variable for `col_wrap`") if row is not None and col_wrap is not None: raise ValueError("Cannot facet rows when using `col_wrap`") nrow = 1 if row is None else len(data[row].unique()) ncol = 1 if col is None else len(data[col].unique()) if col_wrap is not None: ncol = col_wrap nrow = int(np.ceil(len(data[col].unique()) / col_wrap)) f, axes = plt.subplots(nrow, ncol, sharex=sharex, sharey=sharey, figsize=(size * ncol, size * nrow)) axes = np.atleast_2d(axes).reshape(nrow, ncol) if nrow == 1 or col_wrap is not None: row_masks = [np.repeat(True, len(data))] else: row_vals = np.sort(data[row].unique()) row_masks = [data[row] == val for val in row_vals] if ncol == 1: col_masks = [np.repeat(True, len(data))] else: col_vals = np.sort(data[col].unique()) col_masks = [data[col] == val for val in col_vals] if x_partial is not None: if not isinstance(x_partial, list): x_partial = [x_partial] if y_partial is not None: if not isinstance(y_partial, list): y_partial = [y_partial] if palette_kws is None: palette_kws = {} # Sort out the plot colors color_factor = color if color is None: hue_masks = [np.repeat(True, len(data))] colors = ["#222222"] else: hue_vals = np.sort(data[color].unique()) hue_masks = [data[color] == val for val in hue_vals] colors = color_palette(palette, len(hue_masks), **palette_kws) # Default keyword arguments for plot components if scatter_kws is None: scatter_kws = {} if line_kws is None: line_kws = {} # First walk through the facets and plot the scatters scatter_ms = scatter_kws.pop("ms", 4) scatter_mew = mew = scatter_kws.pop("mew", 0) scatter_alpha = mew = scatter_kws.pop("alpha", .77) for row_i, row_mask in enumerate(row_masks): for col_j, col_mask in enumerate(col_masks): if col_wrap is not None: f_row = col_j // ncol f_col = col_j % ncol else: f_row, f_col = row_i, col_j ax = axes[f_row, f_col] if f_row + 1 == nrow: ax.set_xlabel(x) if f_col == 0: ax.set_ylabel(y) # Title the plot if we are faceting title = "" if row is not None: title += "%s = %s" % (row, row_vals[row_i]) if row is not None and col is not None: title += " | " if col is not None: title += "%s = %s" % (col, col_vals[col_j]) ax.set_title(title) for hue_k, hue_mask in enumerate(hue_masks): color = colors[hue_k] data_ijk = data[row_mask & col_mask & hue_mask] if x_estimator is not None: ms = scatter_kws.pop("ms", 7) mew = scatter_kws.pop("mew", 0) x_vals = data_ijk[x].unique() y_vals = data_ijk[y] if y_partial is not None: for var in y_partial: conf = data_ijk[var] conf -= conf.mean() y_mean = y_vals.mean() y_vals = moss.vector_reject(y_vals - y_mean, conf) y_vals += y_mean y_grouped = [np.array(y_vals[data_ijk[x] == v]) for v in x_vals] y_est = [x_estimator(y_i) for y_i in y_grouped] y_boots = [moss.bootstrap(np.array(y_i), func=x_estimator, n_boot=n_boot) for y_i in y_grouped] ci_lims = [50 - x_ci / 2., 50 + x_ci / 2.] y_ci = [moss.percentiles(y_i, ci_lims) for y_i in y_boots] y_error = ci_to_errsize(np.transpose(y_ci), y_est) ax.plot(x_vals, y_est, "o", mew=mew, ms=ms, color=color, **scatter_kws) ax.errorbar(x_vals, y_est, y_error, fmt=None, ecolor=color) else: x_ = data_ijk[x] y_ = data_ijk[y] if x_partial is not None: for var in x_partial: conf = data_ijk[var] conf -= conf.mean() x_mean = x_.mean() x_ = moss.vector_reject(x_ - x_mean, conf) x_ += x_mean if y_partial is not None: for var in y_partial: conf = data_ijk[var] conf -= conf.mean() y_mean = y_.mean() y_ = moss.vector_reject(y_ - y_mean, conf) y_ += y_mean if x_jitter is not None: x_ += np.random.uniform(-x_jitter, x_jitter, x_.shape) if y_jitter is not None: y_ += np.random.uniform(-y_jitter, y_jitter, y_.shape) ax.plot(x_, y_, "o", color=color, alpha=scatter_alpha, mew=scatter_mew, ms=scatter_ms, **scatter_kws) for ax_i in np.ravel(axes): ax_i.set_xmargin(.05) ax_i.autoscale_view() # Now walk through again and plot the regression estimate # and a confidence interval for the regression line if fit_reg: for row_i, row_mask in enumerate(row_masks): for col_j, col_mask in enumerate(col_masks): if col_wrap is not None: f_row = col_j // ncol f_col = col_j % ncol else: f_row, f_col = row_i, col_j ax = axes[f_row, f_col] xlim = ax.get_xlim() for hue_k, hue_mask in enumerate(hue_masks): color = colors[hue_k] data_ijk = data[row_mask & col_mask & hue_mask] x_vals = np.array(data_ijk[x]) y_vals = np.array(data_ijk[y]) if not len(x_vals): continue # Sort out the limit of the fit if truncate: xx = np.linspace(x_vals.min(), x_vals.max(), 100) else: xx = np.linspace(xlim[0], xlim[1], 100) xx_ = sm.add_constant(xx, prepend=True) # Inner function to bootstrap the regression def _regress(x, y): if logistic: x_ = sm.add_constant(x, prepend=True) fit = sm.GLM(y, x_, family=sm.families.Binomial()).fit() reg = fit.predict(xx_) else: fit = np.polyfit(x, y, order) reg = np.polyval(fit, xx) return reg # Remove nuisance variables with vector rejection if x_partial is not None: for var in x_partial: conf = data_ijk[var] conf -= conf.mean() x_mean = x_vals.mean() x_vals = moss.vector_reject(x_vals - x_mean, conf) x_vals += x_mean if y_partial is not None: for var in y_partial: conf = data_ijk[var] conf -= conf.mean() y_mean = y_vals.mean() y_vals = moss.vector_reject(y_vals - y_mean, conf) y_vals += y_mean # Regression line confidence interval if ci is not None: ci_lims = [50 - ci / 2., 50 + ci / 2.] boots = moss.bootstrap(x_vals, y_vals, func=_regress, n_boot=n_boot) ci_band = moss.percentiles(boots, ci_lims, axis=0) ax.fill_between(xx, *ci_band, color=color, alpha=.15) # Regression line reg = _regress(x_vals, y_vals) if color_factor is None: label = "" else: label = hue_vals[hue_k] ax.plot(xx, reg, color=color, label=str(label), **line_kws) ax.set_xlim(xlim) # Plot the legend on the upper left facet and adjust the layout if color_factor is not None and color_factor not in [row, col]: axes[0, 0].legend(loc="best", title=color_factor) plt.tight_layout()
def tsplot(data, time=None, unit=None, condition=None, value=None, err_style="ci_band", ci=68, interpolate=True, color=None, estimator=np.mean, n_boot=5000, err_palette=None, err_kws=None, legend=True, ax=None, **kwargs): """Plot one or more timeseries with flexible representation of uncertainty. This function can take data specified either as a long-form (tidy) DataFrame or as an ndarray with dimensions for sampling unit, time, and (optionally) condition. The interpretation of some of the other parameters changes depending on the type of object passed as data. Parameters ---------- data : DataFrame or ndarray Data for the plot. Should either be a "long form" dataframe or an array with dimensions (unit, time, condition). In both cases, the condition field/dimension is optional. The type of this argument determines the interpretation of the next few parameters. time : string or series-like Either the name of the field corresponding to time in the data DataFrame or x values for a plot when data is an array. If a Series, the name will be used to label the x axis. value : string Either the name of the field corresponding to the data values in the data DataFrame (i.e. the y coordinate) or a string that forms the y axis label when data is an array. unit : string Field in the data DataFrame identifying the sampling unit (e.g. subject, neuron, etc.). The error representation will collapse over units at each time/condition observation. This has no role when data is an array. condition : string or Series-like Either the name of the field identifying the condition an observation falls under in the data DataFrame, or a sequence of names with a length equal to the size of the third dimension of data. There will be a separate trace plotted for each condition. If condition is a Series with a name attribute, the name will form the title for the plot legend (unless legend is set to False). err_style : string or list of strings or None Names of ways to plot uncertainty across units from set of {ci_band, ci_bars, boot_traces, book_kde, unit_traces, unit_points}. Can use one or more than one method. ci : float or list of floats in [0, 100] Confidence interaval size(s). If a list, it will stack the error plots for each confidence interval. Only relevant for error styles with "ci" in the name. interpolate : boolean Whether to do a linear interpolation between each timepoint when plotting. The value of this parameter also determines the marker used for the main plot traces, unless marker is specified as a keyword argument. color : seaborn palette or matplotlib color name or dictionary Palette or color for the main plots and error representation (unless plotting by unit, which can be separately controlled with err_palette). If a dictionary, should map condition name to color spec. estimator : callable Function to determine central tendency and to pass to bootstrap must take an ``axis`` argument. n_boot : int Number of bootstrap iterations. err_palette: seaborn palette Palette name or list of colors used when plotting data for each unit. err_kws : dict, optional Keyword argument dictionary passed through to matplotlib function generating the error plot, ax : axis object, optional Plot in given axis; if None creates a new figure kwargs : Other keyword arguments are passed to main plot() call Returns ------- ax : matplotlib axis axis with plot data """ # Sort out default values for the parameters if ax is None: ax = plt.gca() if err_kws is None: err_kws = {} # Handle different types of input data if isinstance(data, pd.DataFrame): xlabel = time ylabel = value # Condition is optional if condition is None: condition = pd.Series(np.ones(len(data))) legend = False legend_name = None n_cond = 1 else: legend = True and legend legend_name = condition n_cond = len(data[condition].unique()) else: data = np.asarray(data) # Data can be a timecourse from a single unit or # several observations in one condition if data.ndim == 1: data = data[np.newaxis, :, np.newaxis] elif data.ndim == 2: data = data[:, :, np.newaxis] n_unit, n_time, n_cond = data.shape # Units are experimental observations. Maybe subjects, or neurons if unit is None: units = np.arange(n_unit) unit = "unit" units = np.repeat(units, n_time * n_cond) ylabel = None # Time forms the xaxis of the plot if time is None: times = np.arange(n_time) else: times = np.asarray(time) xlabel = None if hasattr(time, "name"): xlabel = time.name time = "time" times = np.tile(np.repeat(times, n_cond), n_unit) # Conditions split the timeseries plots if condition is None: conds = range(n_cond) legend = False if isinstance(color, dict): err = "Must have condition names if using color dict." raise ValueError(err) else: conds = np.asarray(condition) legend = True and legend if hasattr(condition, "name"): legend_name = condition.name else: legend_name = None condition = "cond" conds = np.tile(conds, n_unit * n_time) # Value forms the y value in the plot if value is None: ylabel = None else: ylabel = value value = "value" # Convert to long-form DataFrame data = pd.DataFrame(dict(value=data.ravel(), time=times, unit=units, cond=conds)) # Set up the err_style and ci arguments for teh loop below if isinstance(err_style, string_types): err_style = [err_style] elif err_style is None: err_style = [] if not hasattr(ci, "__iter__"): ci = [ci] # Set up the color palette if color is None: colors = color_palette() elif isinstance(color, dict): colors = [color[c] for c in data[condition].unique()] else: try: colors = color_palette(color, n_cond) except ValueError: color = mpl.colors.colorConverter.to_rgb(color) colors = [color] * n_cond # Do a groupby with condition and plot each trace for c, (cond, df_c) in enumerate(data.groupby(condition, sort=False)): df_c = df_c.pivot(unit, time, value) x = df_c.columns.values.astype(np.float) # Bootstrap the data for confidence intervals boot_data = moss.bootstrap(df_c.values, n_boot=n_boot, axis=0, func=estimator) cis = [moss.ci(boot_data, v, axis=0) for v in ci] central_data = estimator(df_c.values, axis=0) # Get the color for this condition color = colors[c] # Use subroutines to plot the uncertainty for style in err_style: # Allow for null style (only plot central tendency) if style is None: continue # Grab the function from the global environment try: plot_func = globals()["_plot_%s" % style] except KeyError: raise ValueError("%s is not a valid err_style" % style) # Possibly set up to plot each observation in a different color if err_palette is not None and "unit" in style: orig_color = color color = color_palette(err_palette, len(df_c.values)) # Pass all parameters to the error plotter as keyword args plot_kwargs = dict(ax=ax, x=x, data=df_c.values, boot_data=boot_data, central_data=central_data, color=color, err_kws=err_kws) # Plot the error representation, possibly for multiple cis for ci_i in cis: plot_kwargs["ci"] = ci_i plot_func(**plot_kwargs) if err_palette is not None and "unit" in style: color = orig_color # Plot the central trace marker = kwargs.pop("marker", "" if interpolate else "o") linestyle = kwargs.pop("linestyle", "-" if interpolate else "") label = kwargs.pop("label", cond if legend else "_nolegend_") ax.plot(x, central_data, color=color, label=label, marker=marker, linestyle=linestyle, **kwargs) # Pad the sides of the plot only when not interpolating ax.set_xlim(x.min(), x.max()) x_diff = x[1] - x[0] if not interpolate: ax.set_xlim(x.min() - x_diff, x.max() + x_diff) # Add the plot labels if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) if legend: ax.legend(loc=0, title=legend_name) return ax
def violin(vals, groupby=None, inner="box", color=None, positions=None, names=None, widths=.8, alpha=None, join_rm=False, kde_thresh=1e-2, inner_kws=None, ax=None, **kwargs): """Create a violin plot (a combination of boxplot and KDE plot). Parameters ---------- vals : array or sequence of arrays data to plot groupby : grouping object if `vals` is a Series, this is used to group inner : box | sticks | points plot quartiles or individual sample values inside violin color : mpl color, sequence of colors, or seaborn palette name inner violin colors positions : number or sequence of numbers position of first violin or positions of each violin widths : float width of each violin at maximum density alpha : float, optional transparancy of violin fill join_rm : boolean, optional if True, positions in the input arrays are treated as repeated measures and are joined with a line plot names : list of strings, optional names to plot on x axis, otherwise plots numbers kde_thresh : float, optional proportion of maximum at which to threshold the KDE curve inner_kws : dict, optional keyword arugments for inner plot ax : matplotlib axis, optional axis to plot on, otherwise creates new one Returns ------- ax : matplotlib axis axis with violin plot """ if ax is None: ax = plt.gca() if isinstance(vals, pd.DataFrame): if names is None: names = vals.columns if vals.columns.name is not None: xlabel = vals.columns.name else: xlabel = None ylabel = None vals = vals.values elif isinstance(vals, pd.Series) and groupby is not None: if hasattr(groupby, "name"): xlabel = groupby.name if names is None: names = np.sort(pd.unique(groupby)) ylabel = vals.name grouped_vals = pd.groupby(vals, groupby).values vals = grouped_vals.values else: xlabel = None ylabel = None if hasattr(vals, 'shape'): if len(vals.shape) == 1: if hasattr(vals[0], 'shape'): vals = list(vals) else: vals = [vals] elif len(vals.shape) == 2: nr, nc = vals.shape if nr == 1: vals = [vals] elif nc == 1: vals = [vals.ravel()] else: vals = [vals[:, i] for i in xrange(nc)] else: raise ValueError("Input x can have no more than 2 dimensions") if not hasattr(vals[0], '__len__'): vals = [vals] vals = [np.asarray(a, float) for a in vals] if color is None: colors = husl_palette(len(vals), l=.7) else: if hasattr(color, "__iter__") and not isinstance(color, tuple): colors = color else: try: color = mpl.colors.colorConverter.to_rgb(color) colors = [color for _ in vals] except ValueError: colors = color_palette(color, len(vals)) colors = [mpl.colors.colorConverter.to_rgb(c) for c in colors] colors = [desaturate(c, .7) for c in colors] light_vals = [colorsys.rgb_to_hls(*c)[1] for c in colors] l = min(light_vals) * .6 gray = (l, l, l) if inner_kws is None: inner_kws = {} if positions is None: positions = np.arange(1, len(vals) + 1) elif not hasattr(positions, "__iter__"): positions = np.arange(positions, len(vals) + positions) in_alpha = inner_kws.pop("alpha", .6 if inner == "points" else 1) in_alpha *= 1 if alpha is None else alpha in_color = inner_kws.pop("color", gray) in_marker = inner_kws.pop("marker", ".") in_lw = inner_kws.pop("lw", 1.5 if inner == "box" else .8) for i, a in enumerate(vals): x = positions[i] kde = stats.gaussian_kde(a) y = _kde_support(a, kde, 1000, kde_thresh) dens = kde(y) scl = 1 / (dens.max() / (widths / 2)) dens *= scl ax.fill_betweenx(y, x - dens, x + dens, alpha=alpha, color=colors[i]) if inner == "box": for quant in moss.percentiles(a, [25, 75]): q_x = kde(quant) * scl q_x = [x - q_x, x + q_x] ax.plot(q_x, [quant, quant], color=in_color, linestyle=":", linewidth=in_lw, **inner_kws) med = np.median(a) m_x = kde(med) * scl m_x = [x - m_x, x + m_x] ax.plot(m_x, [med, med], color=in_color, linestyle="--", linewidth=in_lw, **inner_kws) elif inner == "stick": x_vals = kde(a) * scl x_vals = [x - x_vals, x + x_vals] ax.plot(x_vals, [a, a], color=in_color, linewidth=in_lw, alpha=in_alpha, **inner_kws) elif inner == "points": x_vals = [x for _ in a] ax.plot(x_vals, a, in_marker, color=in_color, alpha=in_alpha, mew=0, **inner_kws) for side in [-1, 1]: ax.plot((side * dens) + x, y, c=gray, linewidth=1.5) if join_rm: ax.plot(range(1, len(vals) + 1), vals, color=in_color, alpha=2. / 3) ax.set_xticks(positions) if names is not None: if len(vals) != len(names): raise ValueError("Length of names list must match nuber of bins") ax.set_xticklabels(names) ax.set_xlim(positions[0] - .5, positions[-1] + .5) if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) ax.xaxis.grid(False) return ax
def boxplot(vals, groupby=None, names=None, join_rm=False, color=None, alpha=None, fliersize=3, linewidth=1.5, widths=.8, ax=None, **kwargs): """Wrapper for matplotlib boxplot that allows better color control. Parameters ---------- vals : sequence of data containers data for plot groupby : grouping object if `vals` is a Series, this is used to group names : list of strings, optional names to plot on x axis, otherwise plots numbers join_rm : boolean, optional if True, positions in the input arrays are treated as repeated measures and are joined with a line plot color : mpl color, sequence of colors, or seaborn palette name inner box color alpha : float transparancy of the inner box color fliersize : float, optional markersize for the fliers linewidth : float, optional width for the box outlines and whiskers ax : matplotlib axis, optional will plot in axis, or create new figure axis kwargs : additional keyword arguments to boxplot Returns ------- ax : matplotlib axis axis where boxplot is plotted """ if ax is None: ax = plt.gca() if isinstance(vals, pd.DataFrame): if names is None: names = vals.columns if vals.columns.name is not None: xlabel = vals.columns.name else: xlabel = None vals = vals.values ylabel = None elif isinstance(vals, pd.Series) and groupby is not None: if names is None: names = np.sort(pd.unique(groupby)) if hasattr(groupby, "name"): xlabel = groupby.name ylabel = vals.name grouped_vals = pd.groupby(vals, groupby).values vals = grouped_vals.values else: xlabel = None ylabel = None boxes = ax.boxplot(vals, patch_artist=True, widths=widths, **kwargs) vals = np.atleast_2d(vals).T if color is None: colors = husl_palette(len(vals), l=.7) else: if hasattr(color, "__iter__") and not isinstance(color, tuple): colors = color else: try: color = mpl.colors.colorConverter.to_rgb(color) colors = [color for _ in vals] except ValueError: colors = color_palette(color, len(vals)) colors = [mpl.colors.colorConverter.to_rgb(c) for c in colors] colors = [desaturate(c, .7) for c in colors] light_vals = [colorsys.rgb_to_hls(*c)[1] for c in colors] l = min(light_vals) * .6 gray = (l, l, l) for i, box in enumerate(boxes["boxes"]): box.set_color(colors[i]) if alpha is not None: box.set_alpha(alpha) box.set_edgecolor(gray) box.set_linewidth(linewidth) for i, whisk in enumerate(boxes["whiskers"]): whisk.set_color(gray) whisk.set_linewidth(linewidth) whisk.set_linestyle("-") for i, cap in enumerate(boxes["caps"]): cap.set_color(gray) cap.set_linewidth(linewidth) for i, med in enumerate(boxes["medians"]): med.set_color(gray) med.set_linewidth(linewidth) for i, fly in enumerate(boxes["fliers"]): fly.set_color(gray) fly.set_marker("d") fly.set_markeredgecolor(gray) fly.set_markersize(fliersize) if join_rm: ax.plot(range(1, len(vals.T) + 1), vals.T, color=gray, alpha=2. / 3) if names is not None: ax.set_xticklabels(names) if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) ax.xaxis.grid(False) return ax
def violin(vals, groupby=None, inner="box", color=None, positions=None, names=None, widths=.8, alpha=None, join_rm=False, kde_thresh=1e-2, inner_kws=None, ax=None, **kwargs): """Create a violin plot (a combination of boxplot and KDE plot). Parameters ---------- vals : array or sequence of arrays data to plot groupby : grouping object if `vals` is a Series, this is used to group inner : box | sticks | points plot quartiles or individual sample values inside violin color : mpl color, sequence of colors, or seaborn palette name inner violin colors positions : number or sequence of numbers position of first violin or positions of each violin widths : float width of each violin at maximum density alpha : float, optional transparancy of violin fill join_rm : boolean, optional if True, positions in the input arrays are treated as repeated measures and are joined with a line plot names : list of strings, optional names to plot on x axis, otherwise plots numbers kde_thresh : float, optional proportion of maximum at which to threshold the KDE curve inner_kws : dict, optional keyword arugments for inner plot ax : matplotlib axis, optional axis to plot on, otherwise creates new one Returns ------- ax : matplotlib axis axis with violin plot """ if ax is None: ax = plt.gca() if isinstance(vals, pd.DataFrame): if names is None: names = vals.columns if vals.columns.name is not None: xlabel = vals.columns.name else: xlabel = None ylabel = None vals = vals.values elif isinstance(vals, pd.Series) and groupby is not None: if hasattr(groupby, "name"): xlabel = groupby.name ylabel = vals.name grouped_vals = pd.groupby(vals, groupby).values if names is None: names = grouped_vals.index vals = grouped_vals.values else: xlabel = None ylabel = None if hasattr(vals, 'shape'): if len(vals.shape) == 1: if hasattr(vals[0], 'shape'): vals = list(vals) else: vals = [vals] elif len(vals.shape) == 2: nr, nc = vals.shape if nr == 1: vals = [vals] elif nc == 1: vals = [vals.ravel()] else: vals = [vals[:, i] for i in xrange(nc)] else: raise ValueError("Input x can have no more than 2 dimensions") if not hasattr(vals[0], '__len__'): vals = [vals] vals = [np.asarray(a, float) for a in vals] if color is None: colors = husl_palette(len(vals), l=.7) else: if hasattr(color, "__iter__") and not isinstance(color, tuple): colors = color else: try: color = mpl.colors.colorConverter.to_rgb(color) colors = [color for _ in vals] except ValueError: colors = color_palette(color, len(vals)) colors = [mpl.colors.colorConverter.to_rgb(c) for c in colors] colors = [desaturate(c, .7) for c in colors] light_vals = [colorsys.rgb_to_hls(*c)[1] for c in colors] l = min(light_vals) * .6 gray = (l, l, l) if inner_kws is None: inner_kws = {} if positions is None: positions = np.arange(1, len(vals) + 1) elif not hasattr(positions, "__iter__"): positions = np.arange(positions, len(vals) + positions) in_alpha = inner_kws.pop("alpha", .6 if inner == "points" else 1) in_alpha *= 1 if alpha is None else alpha in_color = inner_kws.pop("color", gray) in_marker = inner_kws.pop("marker", ".") in_lw = inner_kws.pop("lw", 1.5 if inner == "box" else .8) for i, a in enumerate(vals): x = positions[i] kde = stats.gaussian_kde(a) y = _kde_support(a, kde, 1000, kde_thresh) dens = kde(y) scl = 1 / (dens.max() / (widths / 2)) dens *= scl ax.fill_betweenx(y, x - dens, x + dens, alpha=alpha, color=colors[i]) if inner == "box": for quant in moss.percentiles(a, [25, 75]): q_x = kde(quant) * scl q_x = [x - q_x, x + q_x] ax.plot(q_x, [quant, quant], color=in_color, linestyle=":", linewidth=in_lw, **inner_kws) med = np.median(a) m_x = kde(med) * scl m_x = [x - m_x, x + m_x] ax.plot(m_x, [med, med], color=in_color, linestyle="--", linewidth=in_lw, **inner_kws) elif inner == "stick": x_vals = kde(a) * scl x_vals = [x - x_vals, x + x_vals] ax.plot(x_vals, [a, a], color=in_color, linewidth=in_lw, alpha=in_alpha, **inner_kws) elif inner == "points": x_vals = [x for _ in a] ax.plot(x_vals, a, in_marker, color=in_color, alpha=in_alpha, mew=0, **inner_kws) for side in [-1, 1]: ax.plot((side * dens) + x, y, c=gray, linewidth=1.5) if join_rm: ax.plot(range(1, len(vals) + 1), vals, color=in_color, alpha=2. / 3) ax.set_xticks(positions) if names is not None: if len(vals) != len(names): raise ValueError("Length of names list must match nuber of bins") ax.set_xticklabels(names) ax.set_xlim(positions[0] - .5, positions[-1] + .5) if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) ax.xaxis.grid(False) return ax
def tsplot(x, data, err_style="ci_band", ci=68, interpolate=True, estimator=np.mean, n_boot=10000, smooth=False, err_palette=None, ax=None, err_kws=None, **kwargs): """Plot timeseries from a set of observations. Parameters ---------- x : n_tp array x values data : n_obs x n_tp array array of timeseries data where first axis is observations. other objects (e.g. DataFrames) are converted to an array if possible err_style : string or list of strings names of ways to plot uncertainty across observations from set of {ci_band, ci_bars, boot_traces, book_kde, obs_traces, obs_points} ci : int or list of ints confidence interaval size(s). if a list, it will stack the error plots for each confidence interval estimator : callable function to determine centralt tendency and to pass to bootstrap must take an ``axis`` argument n_boot : int number of bootstrap iterations smooth : boolean whether to perform a smooth bootstrap (resample from KDE) ax : axis object, optional plot in given axis; if None creates a new figure err_kws : dict, optional keyword argument dictionary passed through to matplotlib function generating the error plot kwargs : further keyword arguments for main call to plot() Returns ------- ax : matplotlib axis axis with plot data """ if ax is None: ax = plt.gca() if err_kws is None: err_kws = {} # Bootstrap the data for confidence intervals data = np.asarray(data) boot_data = moss.bootstrap(data, n_boot=n_boot, smooth=smooth, axis=0, func=estimator) ci_list = hasattr(ci, "__iter__") if not ci_list: ci = [ci] ci_vals = [(50 - w / 2, 50 + w / 2) for w in ci] cis = [moss.percentiles(boot_data, v, axis=0) for v in ci_vals] central_data = estimator(data, axis=0) # Plot the timeseries line to get its color line, = ax.plot(x, central_data, **kwargs) color = line.get_color() line.remove() kwargs.pop("color", None) # Use subroutines to plot the uncertainty if not hasattr(err_style, "__iter__"): err_style = [err_style] for style in err_style: # Grab the function from the global environment try: plot_func = globals()["_plot_%s" % style] except KeyError: raise ValueError("%s is not a valid err_style" % style) # Possibly set up to plot each observation in a different color if err_palette is not None and "obs" in style: orig_color = color color = color_palette(err_palette, len(data), desat=.99) plot_kwargs = dict(ax=ax, x=x, data=data, boot_data=boot_data, central_data=central_data, color=color, err_kws=err_kws) for ci_i in cis: plot_kwargs["ci"] = ci_i plot_func(**plot_kwargs) if err_palette is not None and "obs" in style: color = orig_color # Replot the central trace so it is prominent marker = kwargs.pop("marker", "" if interpolate else "o") linestyle = kwargs.pop("linestyle", "-" if interpolate else "") ax.plot(x, central_data, color=color, marker=marker, linestyle=linestyle, **kwargs) return ax
def boxplot(vals, groupby=None, names=None, join_rm=False, color=None, alpha=None, fliersize=3, linewidth=1.5, widths=.8, ax=None, **kwargs): """Wrapper for matplotlib boxplot that allows better color control. Parameters ---------- vals : sequence of data containers data for plot groupby : grouping object if `vals` is a Series, this is used to group names : list of strings, optional names to plot on x axis, otherwise plots numbers join_rm : boolean, optional if True, positions in the input arrays are treated as repeated measures and are joined with a line plot color : mpl color, sequence of colors, or seaborn palette name inner box color alpha : float transparancy of the inner box color fliersize : float, optional markersize for the fliers linewidth : float, optional width for the box outlines and whiskers ax : matplotlib axis, optional will plot in axis, or create new figure axis kwargs : additional keyword arguments to boxplot Returns ------- ax : matplotlib axis axis where boxplot is plotted """ if ax is None: ax = plt.gca() if isinstance(vals, pd.DataFrame): if names is None: names = vals.columns if vals.columns.name is not None: xlabel = vals.columns.name else: xlabel = None vals = vals.values ylabel = None elif isinstance(vals, pd.Series) and groupby is not None: if names is None: names = pd.unique(groupby) if hasattr(groupby, "name"): xlabel = groupby.name ylabel = vals.name grouped_vals = pd.groupby(vals, groupby).values if names is None: names = grouped_vals.index vals = grouped_vals.values else: xlabel = None ylabel = None boxes = ax.boxplot(vals, patch_artist=True, widths=widths, **kwargs) vals = np.atleast_2d(vals).T if color is None: colors = husl_palette(len(vals), l=.7) else: if hasattr(color, "__iter__") and not isinstance(color, tuple): colors = color else: try: color = mpl.colors.colorConverter.to_rgb(color) colors = [color for _ in vals] except ValueError: colors = color_palette(color, len(vals)) colors = [mpl.colors.colorConverter.to_rgb(c) for c in colors] colors = [desaturate(c, .7) for c in colors] light_vals = [colorsys.rgb_to_hls(*c)[1] for c in colors] l = min(light_vals) * .6 gray = (l, l, l) for i, box in enumerate(boxes["boxes"]): box.set_color(colors[i]) if alpha is not None: box.set_alpha(alpha) box.set_edgecolor(gray) box.set_linewidth(linewidth) for i, whisk in enumerate(boxes["whiskers"]): whisk.set_color(gray) whisk.set_linewidth(linewidth) whisk.set_linestyle("-") for i, cap in enumerate(boxes["caps"]): cap.set_color(gray) cap.set_linewidth(linewidth) for i, med in enumerate(boxes["medians"]): med.set_color(gray) med.set_linewidth(linewidth) for i, fly in enumerate(boxes["fliers"]): fly.set_color(gray) fly.set_marker("d") fly.set_markeredgecolor(gray) fly.set_markersize(fliersize) if join_rm: ax.plot(range(1, len(vals.T) + 1), vals.T, color=gray, alpha=2. / 3) if names is not None: ax.set_xticklabels(names) if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) ax.xaxis.grid(False) return ax
def lmplot(x, y, data, color=None, row=None, col=None, x_estimator=None, x_ci=95, fit_line=True, ci=95, truncate=False, sharex=True, sharey=True, palette="hls", size=None, scatter_kws=None, line_kws=None, palette_kws=None): """Plot a linear model from a DataFrame. Parameters ---------- x, y : strings column names in `data` DataFrame for x and y variables data : DataFrame source of data for the model color : string, optional DataFrame column name to group the model by color row, col : strings, optional DataFrame column names to make separate plot facets x_estimator : callable, optional Interpret X values as factor labels and use this function to plot the point estimate and bootstrapped CI x_ci : int optional size of confidence interval for x_estimator error bars fit_line : bool, optional if True fit a regression line by color/row/col and plot ci : int, optional confidence interval for the regression line truncate : bool, optional if True, only fit line from data min to data max sharex, sharey : bools, optional only relevant if faceting; passed to plt.subplots palette : seaborn color palette argument if using separate plots by color, draw with this color palette size : float, optional size (plots are square) for each plot facet {scatter, line}_kws : dictionary keyword arguments to pass to the underlying plot functions palette_kws : dictionary keyword arguments for seaborn.color_palette """ # TODO # - position_{dodge, jitter} # - legend when fit_line is False # - truncate fit # - wrap title when wide # - wrap columns # First sort out the general figure layout if size is None: size = mpl.rcParams["figure.figsize"][1] nrow = 1 if row is None else len(data[row].unique()) ncol = 1 if col is None else len(data[col].unique()) f, axes = plt.subplots(nrow, ncol, sharex=sharex, sharey=sharey, figsize=(size * ncol, size * nrow)) axes = np.atleast_2d(axes).reshape(nrow, ncol) if nrow == 1: row_masks = [np.repeat(True, len(data))] else: row_vals = np.sort(data[row].unique()) row_masks = [data[row] == val for val in row_vals] if ncol == 1: col_masks = [np.repeat(True, len(data))] else: col_vals = np.sort(data[col].unique()) col_masks = [data[col] == val for val in col_vals] if palette_kws is None: palette_kws = {} # Sort out the plot colors color_factor = color if color is None: hue_masks = [np.repeat(True, len(data))] colors = ["#222222"] else: hue_vals = np.sort(data[color].unique()) hue_masks = [data[color] == val for val in hue_vals] colors = color_palette(palette, len(hue_masks), **palette_kws) # Default keyword arguments for plot components if scatter_kws is None: scatter_kws = {} if line_kws is None: line_kws = {} # First walk through the facets and plot the scatters for row_i, row_mask in enumerate(row_masks): for col_j, col_mask in enumerate(col_masks): ax = axes[row_i, col_j] if not sharex or (row_i + 1 == len(row_masks)): ax.set_xlabel(x) if not sharey or col_j == 0: ax.set_ylabel(y) # Title the plot if we are faceting title = "" if row is not None: title += "%s = %s" % (row, row_vals[row_i]) if row is not None and col is not None: title += " | " if col is not None: title += "%s = %s" % (col, col_vals[col_j]) ax.set_title(title) for hue_k, hue_mask in enumerate(hue_masks): color = colors[hue_k] data_ijk = data[row_mask & col_mask & hue_mask] if x_estimator is not None: ms = scatter_kws.pop("ms", 7) mew = scatter_kws.pop("mew", 0) x_vals = data_ijk[x].unique() y_grouped = [ np.array(data_ijk[y][data_ijk[x] == v]) for v in x_vals ] y_est = [x_estimator(y_i) for y_i in y_grouped] y_boots = [ moss.bootstrap(np.array(y_i), func=x_estimator) for y_i in y_grouped ] ci_lims = [50 - x_ci / 2., 50 + x_ci / 2.] y_ci = [moss.percentiles(y_i, ci_lims) for y_i in y_boots] y_error = ci_to_errsize(np.transpose(y_ci), y_est) ax.plot(x_vals, y_est, "o", mew=mew, ms=ms, color=color, **scatter_kws) ax.errorbar(x_vals, y_est, y_error, fmt=None, ecolor=color) else: ms = scatter_kws.pop("ms", 4) mew = scatter_kws.pop("mew", 0) ax.plot(data_ijk[x], data_ijk[y], "o", color=color, mew=mew, ms=ms, **scatter_kws) for ax_i in np.ravel(axes): ax_i.set_xmargin(.05) ax_i.autoscale_view() # Now walk through again and plot the regression estimate # and a confidence interval for the regression line if fit_line: for row_i, row_mask in enumerate(row_masks): for col_j, col_mask in enumerate(col_masks): ax = axes[row_i, col_j] xlim = ax.get_xlim() for hue_k, hue_mask in enumerate(hue_masks): color = colors[hue_k] data_ijk = data[row_mask & col_mask & hue_mask] x_vals = np.array(data_ijk[x]) y_vals = np.array(data_ijk[y]) # Sort out the limit of the fit if truncate: xx = np.linspace(x_vals.min(), x_vals.max(), 100) else: xx = np.linspace(xlim[0], xlim[1], 100) # Inner function to bootstrap the regression def _bootstrap_reg(x, y): fit = np.polyfit(x, y, 1) return np.polyval(fit, xx) # Regression line confidence interval if ci is not None: ci_lims = [50 - ci / 2., 50 + ci / 2.] boots = moss.bootstrap(x_vals, y_vals, func=_bootstrap_reg) ci_band = moss.percentiles(boots, ci_lims, axis=0) ax.fill_between(xx, *ci_band, color=color, alpha=.15) fit = np.polyfit(x_vals, y_vals, 1) reg = np.polyval(fit, xx) if color_factor is None: label = "" else: label = hue_vals[hue_k] ax.plot(xx, reg, color=color, label=str(label), **line_kws) ax.set_xlim(xlim) # Plot the legend on the upper left facet and adjust the layout if color_factor is not None: axes[0, 0].legend(loc="best", title=color_factor) plt.tight_layout()
def coefplot(formula, data, groupby=None, intercept=False, ci=95, palette="husl"): """Plot the coefficients from a linear model. Parameters ---------- formula : string patsy formula for ols model data : dataframe data for the plot; formula terms must appear in columns groupby : grouping object, optional object to group data with to fit conditional models intercept : bool, optional if False, strips the intercept term before plotting ci : float, optional size of confidence intervals palette : seaborn color palette, optional palette for the horizonal plots """ alpha = 1 - ci / 100 if groupby is None: coefs = sf.ols(formula, data).fit().params cis = sf.ols(formula, data).fit().conf_int(alpha) else: grouped = data.groupby(groupby) coefs = grouped.apply(lambda d: sf.ols(formula, d).fit().params).T cis = grouped.apply(lambda d: sf.ols(formula, d).fit().conf_int(alpha)) # Possibly ignore the intercept if not intercept: coefs = coefs.ix[1:] n_terms = len(coefs) # Plot seperately depending on groupby w, h = mpl.rcParams["figure.figsize"] hsize = lambda n: n * (h / 2) wsize = lambda n: n * (w / (4 * (n / 5))) if groupby is None: colors = itertools.cycle(color_palette(palette, n_terms)) f, ax = plt.subplots(1, 1, figsize=(wsize(n_terms), hsize(1))) for i, term in enumerate(coefs.index): color = next(colors) low, high = cis.ix[term] ax.plot([i, i], [low, high], c=color, solid_capstyle="round", lw=2.5) ax.plot(i, coefs.ix[term], "o", c=color, ms=8) ax.set_xlim(-.5, n_terms - .5) ax.axhline(0, ls="--", c="dimgray") ax.set_xticks(range(n_terms)) ax.set_xticklabels(coefs.index) else: n_groups = len(coefs.columns) f, axes = plt.subplots(n_terms, 1, sharex=True, figsize=(wsize(n_groups), hsize(n_terms))) if n_terms == 1: axes = [axes] colors = itertools.cycle(color_palette(palette, n_groups)) for ax, term in zip(axes, coefs.index): for i, group in enumerate(coefs.columns): color = next(colors) low, high = cis.ix[(group, term)] ax.plot([i, i], [low, high], c=color, solid_capstyle="round", lw=2.5) ax.plot(i, coefs.loc[term, group], "o", c=color, ms=8) ax.set_xlim(-.5, n_groups - .5) ax.axhline(0, ls="--", c="dimgray") ax.set_title(term) ax.set_xlabel(groupby) ax.set_xticks(range(n_groups)) ax.set_xticklabels(coefs.columns)
def lmplot(x, y, data, color=None, row=None, col=None, col_wrap=None, x_estimator=None, x_ci=95, x_bins=None, n_boot=5000, fit_reg=True, order=1, ci=95, logistic=False, truncate=False, x_partial=None, y_partial=None, x_jitter=None, y_jitter=None, sharex=True, sharey=True, palette="husl", size=None, scatter_kws=None, line_kws=None, palette_kws=None): """Plot a linear model with faceting, color binning, and other options. Parameters ---------- x, y : strings Column names in `data` DataFrame for x and y variables. data : DataFrame Dource of data for the model. color : string, optional DataFrame column name to group the model by color. row, col : strings, optional DataFrame column names to make separate plot facets. col_wrap : int, optional Wrap col variable at this width - cannot be used with row facet. x_estimator : callable, optional Interpret X values as factor labels and use this function to plot the point estimate and bootstrapped CI. x_ci : int optional Size of confidence interval for x_estimator error bars. x_bins : sequence of floats, optional Bin the x variable with these values. Implies that x_estimator is mean, unless otherwise provided. n_boot : int, optional Number of bootstrap iterations to perform. fit_reg : bool, optional If True fit a regression model by color/row/col and plot. order : int, optional Order of the regression polynomial to fit. ci : int, optional Confidence interval for the regression line. logistic : bool, optional Fit the regression line with logistic regression. truncate : bool, optional If True, only fit line from data min to data max. {x, y}_partial : string or list of strings, optional Regress these variables out of the factors before plotting. {x, y}_jitter : float, optional Parameters for uniformly distributed random noise added to positions. sharex, sharey : bools, optional Only relevant if faceting; passed to plt.subplots. palette : seaborn color palette argument If using separate plots by color, draw with this color palette. size : float, optional Size (plots are square) for each plot facet. {scatter, line}_kws : dictionary Keyword arguments to pass to the underlying plot functions. palette_kws : dictionary Keyword arguments for seaborn.color_palette. """ # TODO # - legend when fit_line is False # First sort out the general figure layout if size is None: size = mpl.rcParams["figure.figsize"][1] if col is None and col_wrap is not None: raise ValueError("Need column facet variable for `col_wrap`") if row is not None and col_wrap is not None: raise ValueError("Cannot facet rows when using `col_wrap`") nrow = 1 if row is None else len(data[row].unique()) ncol = 1 if col is None else len(data[col].unique()) if col_wrap is not None: ncol = col_wrap nrow = int(np.ceil(len(data[col].unique()) / col_wrap)) f, axes = plt.subplots(nrow, ncol, sharex=sharex, sharey=sharey, figsize=(size * ncol, size * nrow)) axes = np.atleast_2d(axes).reshape(nrow, ncol) if nrow == 1 or col_wrap is not None: row_masks = [np.repeat(True, len(data))] else: row_vals = np.sort(data[row].unique()) row_masks = [data[row] == val for val in row_vals] if ncol == 1: col_masks = [np.repeat(True, len(data))] else: col_vals = np.sort(data[col].unique()) col_masks = [data[col] == val for val in col_vals] if x_bins is not None: x_estimator = np.mean if x_estimator is None else x_estimator x_bins = np.c_[x_bins] if x_partial is not None: if not isinstance(x_partial, list): x_partial = [x_partial] if y_partial is not None: if not isinstance(y_partial, list): y_partial = [y_partial] if palette_kws is None: palette_kws = {} # Sort out the plot colors color_factor = color if color is None: hue_masks = [np.repeat(True, len(data))] colors = ["#222222"] else: hue_vals = np.sort(data[color].unique()) hue_masks = [data[color] == val for val in hue_vals] colors = color_palette(palette, len(hue_masks), **palette_kws) # Default keyword arguments for plot components if scatter_kws is None: scatter_kws = {} if line_kws is None: line_kws = {} # First walk through the facets and plot the scatters scatter_ms = scatter_kws.pop("ms", 4) scatter_mew = mew = scatter_kws.pop("mew", 0) scatter_alpha = mew = scatter_kws.pop("alpha", .77) for row_i, row_mask in enumerate(row_masks): for col_j, col_mask in enumerate(col_masks): if col_wrap is not None: f_row = col_j // ncol f_col = col_j % ncol else: f_row, f_col = row_i, col_j ax = axes[f_row, f_col] if f_row + 1 == nrow: ax.set_xlabel(x) if f_col == 0: ax.set_ylabel(y) # Title the plot if we are faceting title = "" if row is not None: title += "%s = %s" % (row, row_vals[row_i]) if row is not None and col is not None: title += " | " if col is not None: title += "%s = %s" % (col, col_vals[col_j]) if size < 3: title = title.replace(" | ", "\n") ax.set_title(title) for hue_k, hue_mask in enumerate(hue_masks): color = colors[hue_k] data_ijk = data[row_mask & col_mask & hue_mask] if x_estimator is not None: ms = scatter_kws.pop("ms", 7) mew = scatter_kws.pop("mew", 0) if x_bins is None: x_vals = data_ijk[x].unique() x_data = data_ijk[x] else: dist = distance.cdist(np.c_[data_ijk[x]], x_bins) x_vals = x_bins.ravel() x_data = x_bins[np.argmin(dist, axis=1)].ravel() y_vals = data_ijk[y] if y_partial is not None: for var in y_partial: conf = data_ijk[var] conf -= conf.mean() y_mean = y_vals.mean() y_vals = moss.vector_reject(y_vals - y_mean, conf) y_vals += y_mean y_grouped = [np.array(y_vals[x_data == v]) for v in x_vals] y_est = [x_estimator(y_i) for y_i in y_grouped] y_boots = [moss.bootstrap(np.array(y_i), func=x_estimator, n_boot=n_boot) for y_i in y_grouped] ci_lims = [50 - x_ci / 2., 50 + x_ci / 2.] y_ci = [moss.percentiles(y_i, ci_lims) for y_i in y_boots] y_error = ci_to_errsize(np.transpose(y_ci), y_est) ax.plot(x_vals, y_est, "o", mew=mew, ms=ms, color=color, **scatter_kws) ax.errorbar(x_vals, y_est, y_error, fmt=None, ecolor=color) else: x_ = data_ijk[x] y_ = data_ijk[y] if x_partial is not None: for var in x_partial: conf = data_ijk[var] conf -= conf.mean() x_mean = x_.mean() x_ = moss.vector_reject(x_ - x_mean, conf) x_ += x_mean if y_partial is not None: for var in y_partial: conf = data_ijk[var] conf -= conf.mean() y_mean = y_.mean() y_ = moss.vector_reject(y_ - y_mean, conf) y_ += y_mean if x_jitter is not None: x_ += np.random.uniform(-x_jitter, x_jitter, x_.shape) if y_jitter is not None: y_ += np.random.uniform(-y_jitter, y_jitter, y_.shape) ax.plot(x_, y_, "o", color=color, alpha=scatter_alpha, mew=scatter_mew, ms=scatter_ms, **scatter_kws) for ax_i in np.ravel(axes): ax_i.set_xmargin(.05) ax_i.autoscale_view() # Now walk through again and plot the regression estimate # and a confidence interval for the regression line if fit_reg: for row_i, row_mask in enumerate(row_masks): for col_j, col_mask in enumerate(col_masks): if col_wrap is not None: f_row = col_j // ncol f_col = col_j % ncol else: f_row, f_col = row_i, col_j ax = axes[f_row, f_col] xlim = ax.get_xlim() for hue_k, hue_mask in enumerate(hue_masks): color = colors[hue_k] data_ijk = data[row_mask & col_mask & hue_mask] x_vals = np.array(data_ijk[x]) y_vals = np.array(data_ijk[y]) if not len(x_vals): continue # Sort out the limit of the fit if truncate: xx = np.linspace(x_vals.min(), x_vals.max(), 100) else: xx = np.linspace(xlim[0], xlim[1], 100) xx_ = sm.add_constant(xx, prepend=True) # Inner function to bootstrap the regression def _regress(x, y): if logistic: x_ = sm.add_constant(x, prepend=True) fit = sm.GLM(y, x_, family=sm.families.Binomial()).fit() reg = fit.predict(xx_) else: fit = np.polyfit(x, y, order) reg = np.polyval(fit, xx) return reg # Remove nuisance variables with vector rejection if x_partial is not None: for var in x_partial: conf = data_ijk[var] conf -= conf.mean() x_mean = x_vals.mean() x_vals = moss.vector_reject(x_vals - x_mean, conf) x_vals += x_mean if y_partial is not None: for var in y_partial: conf = data_ijk[var] conf -= conf.mean() y_mean = y_vals.mean() y_vals = moss.vector_reject(y_vals - y_mean, conf) y_vals += y_mean # Regression line confidence interval if ci is not None: ci_lims = [50 - ci / 2., 50 + ci / 2.] boots = moss.bootstrap(x_vals, y_vals, func=_regress, n_boot=n_boot) ci_band = moss.percentiles(boots, ci_lims, axis=0) ax.fill_between(xx, *ci_band, color=color, alpha=.15) # Regression line reg = _regress(x_vals, y_vals) if color_factor is None: label = "" else: label = hue_vals[hue_k] ax.plot(xx, reg, color=color, label=str(label), **line_kws) ax.set_xlim(xlim) # Plot the legend on the upper left facet and adjust the layout if color_factor is not None and color_factor not in [row, col]: axes[0, 0].legend(loc="best", title=color_factor) plt.tight_layout()
def tsplot(data, time=None, unit=None, condition=None, value=None, err_style="ci_band", ci=68, interpolate=True, color=None, estimator=np.mean, n_boot=5000, err_palette=None, err_kws=None, legend=True, ax=None, **kwargs): """Plot one or more timeseries with flexible representation of uncertainty. This function can take data specified either as a long-form (tidy) DataFrame or as an ndarray with dimensions for sampling unit, time, and (optionally) condition. The interpretation of some of the other parameters changes depending on the type of object passed as data. Parameters ---------- data : DataFrame or ndarray Data for the plot. Should either be a "long form" dataframe or an array with dimensions (unit, time, condition). In both cases, the condition field/dimension is optional. The type of this argument determines the interpretation of the next few parameters. time : string or series-like Either the name of the field corresponding to time in the data DataFrame or x values for a plot when data is an array. If a Series, the name will be used to label the x axis. value : string Either the name of the field corresponding to the data values in the data DataFrame (i.e. the y coordinate) or a string that forms the y axis label when data is an array. unit : string Field in the data DataFrame identifying the sampling unit (e.g. subject, neuron, etc.). The error representation will collapse over units at each time/condition observation. This has no role when data is an array. condition : string or Series-like Either the name of the field identifying the condition an observation falls under in the data DataFrame, or a sequence of names with a length equal to the size of the third dimension of data. There will be a separate trace plotted for each condition. If condition is a Series with a name attribute, the name will form the title for the plot legend (unless legend is set to False). err_style : string or list of strings or None Names of ways to plot uncertainty across units from set of {ci_band, ci_bars, boot_traces, book_kde, unit_traces, unit_points}. Can use one or more than one method. ci : float or list of floats in [0, 100] Confidence interaval size(s). If a list, it will stack the error plots for each confidence interval. Only relevant for error styles with "ci" in the name. interpolate : boolean Whether to do a linear interpolation between each timepoint when plotting. The value of this parameter also determines the marker used for the main plot traces, unless marker is specified as a keyword argument. color : seaborn palette or matplotlib color name or dictionary Palette or color for the main plots and error representation (unless plotting by unit, which can be separately controlled with err_palette). If a dictionary, should map condition name to color spec. estimator : callable Function to determine central tendency and to pass to bootstrap must take an ``axis`` argument. n_boot : int Number of bootstrap iterations. err_palette: seaborn palette Palette name or list of colors used when plotting data for each unit. err_kws : dict, optional Keyword argument dictionary passed through to matplotlib function generating the error plot, ax : axis object, optional Plot in given axis; if None creates a new figure kwargs : Other keyword arguments are passed to main plot() call Returns ------- ax : matplotlib axis axis with plot data """ # Sort out default values for the parameters if ax is None: ax = plt.gca() if err_kws is None: err_kws = {} # Handle case where data is an array if isinstance(data, pd.DataFrame): xlabel = time ylabel = value # Condition is optional if condition is None: condition = pd.Series(np.ones(len(data))) legend = False legend_name = None n_cond = 1 else: legend = True and legend legend_name = condition n_cond = len(data[condition].unique()) else: data = np.asarray(data) # Data can be a timecourse from a single unit or # several observations in one condition if data.ndim == 1: data = data[np.newaxis, :, np.newaxis] elif data.ndim == 2: data = data[:, :, np.newaxis] n_unit, n_time, n_cond = data.shape # Units are experimental observations. Maybe subjects, or neurons if unit is None: units = np.arange(n_unit) unit = "unit" units = np.repeat(units, n_time * n_cond) ylabel = None # Time forms the xaxis of the plot if time is None: times = np.arange(n_time) else: times = np.asarray(time) xlabel = None if hasattr(time, "name"): xlabel = time.name time = "time" times = np.tile(np.repeat(times, n_cond), n_unit) # Conditions split the timeseries plots if condition is None: conds = range(n_cond) legend = False if isinstance(color, dict): err = "Must have condition names if using color dict." raise ValueError(err) else: conds = np.asarray(condition) legend = True and legend if hasattr(condition, "name"): legend_name = condition.name else: legend_name = None condition = "cond" conds = np.tile(conds, n_unit * n_time) # Value forms the y value in the plot if value is None: ylabel = None else: ylabel = value value = "value" # Convert to long-form DataFrame data = pd.DataFrame( dict(value=data.ravel(), time=times, unit=units, cond=conds)) # Set up the err_style and ci arguments for teh loop below if not hasattr(err_style, "__iter__"): err_style = [err_style] elif err_style is None: err_style = [] if not hasattr(ci, "__iter__"): ci = [ci] # Set up the color palette if color is None: colors = color_palette() elif isinstance(color, dict): colors = [color[c] for c in data[condition].unique()] else: try: colors = color_palette(color, n_cond) except ValueError: color = mpl.colors.colorConverter.to_rgb(color) colors = [color] * n_cond # Do a groupby with condition and plot each trace for c, (cond, df_c) in enumerate(data.groupby(condition, sort=False)): df_c = df_c.pivot(unit, time, value) x = df_c.columns.values.astype(np.float) # Bootstrap the data for confidence intervals boot_data = moss.bootstrap(df_c.values, n_boot=n_boot, axis=0, func=estimator) cis = [moss.ci(boot_data, v, axis=0) for v in ci] central_data = estimator(df_c.values, axis=0) # Get the color for this condition color = colors[c] # Use subroutines to plot the uncertainty for style in err_style: # Allow for null style (only plot central tendency) if style is None: continue # Grab the function from the global environment try: plot_func = globals()["_plot_%s" % style] except KeyError: raise ValueError("%s is not a valid err_style" % style) # Possibly set up to plot each observation in a different color if err_palette is not None and "unit" in style: orig_color = color color = color_palette(err_palette, len(df_c.values)) # Pass all parameters to the error plotter as keyword args plot_kwargs = dict(ax=ax, x=x, data=df_c.values, boot_data=boot_data, central_data=central_data, color=color, err_kws=err_kws) # Plot the error representation, possibly for multiple cis for ci_i in cis: plot_kwargs["ci"] = ci_i plot_func(**plot_kwargs) if err_palette is not None and "unit" in style: color = orig_color # Plot the central trace marker = kwargs.pop("marker", "" if interpolate else "o") linestyle = kwargs.pop("linestyle", "-" if interpolate else "") label = kwargs.pop("label", cond if legend else "_nolegend_") ax.plot(x, central_data, color=color, label=label, marker=marker, linestyle=linestyle, **kwargs) # Pad the sides of the plot only when not interpolating ax.set_xlim(x.min(), x.max()) x_diff = x[1] - x[0] if not interpolate: ax.set_xlim(x.min() - x_diff, x.max() + x_diff) # Add the plot labels if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) if legend: ax.legend(loc=0, title=legend_name) return ax
def lmplot(x, y, data, color=None, row=None, col=None, x_estimator=None, x_ci=95, fit_line=True, ci=95, truncate=False, sharex=True, sharey=True, palette="hls", size=None, scatter_kws=None, line_kws=None, palette_kws=None): """Plot a linear model from a DataFrame. Parameters ---------- x, y : strings column names in `data` DataFrame for x and y variables data : DataFrame source of data for the model color : string, optional DataFrame column name to group the model by color row, col : strings, optional DataFrame column names to make separate plot facets x_estimator : callable, optional Interpret X values as factor labels and use this function to plot the point estimate and bootstrapped CI x_ci : int optional size of confidence interval for x_estimator error bars fit_line : bool, optional if True fit a regression line by color/row/col and plot ci : int, optional confidence interval for the regression line truncate : bool, optional if True, only fit line from data min to data max sharex, sharey : bools, optional only relevant if faceting; passed to plt.subplots palette : seaborn color palette argument if using separate plots by color, draw with this color palette size : float, optional size (plots are square) for each plot facet {scatter, line}_kws : dictionary keyword arguments to pass to the underlying plot functions palette_kws : dictionary keyword arguments for seaborn.color_palette """ # TODO # - position_{dodge, jitter} # - legend when fit_line is False # - truncate fit # - wrap title when wide # - wrap columns # First sort out the general figure layout if size is None: size = mpl.rcParams["figure.figsize"][1] nrow = 1 if row is None else len(data[row].unique()) ncol = 1 if col is None else len(data[col].unique()) f, axes = plt.subplots(nrow, ncol, sharex=sharex, sharey=sharey, figsize=(size * ncol, size * nrow)) axes = np.atleast_2d(axes).reshape(nrow, ncol) if nrow == 1: row_masks = [np.repeat(True, len(data))] else: row_vals = np.sort(data[row].unique()) row_masks = [data[row] == val for val in row_vals] if ncol == 1: col_masks = [np.repeat(True, len(data))] else: col_vals = np.sort(data[col].unique()) col_masks = [data[col] == val for val in col_vals] if palette_kws is None: palette_kws = {} # Sort out the plot colors color_factor = color if color is None: hue_masks = [np.repeat(True, len(data))] colors = ["#222222"] else: hue_vals = np.sort(data[color].unique()) hue_masks = [data[color] == val for val in hue_vals] colors = color_palette(palette, len(hue_masks), **palette_kws) # Default keyword arguments for plot components if scatter_kws is None: scatter_kws = {} if line_kws is None: line_kws = {} # First walk through the facets and plot the scatters for row_i, row_mask in enumerate(row_masks): for col_j, col_mask in enumerate(col_masks): ax = axes[row_i, col_j] if not sharex or (row_i + 1 == len(row_masks)): ax.set_xlabel(x) if not sharey or col_j == 0: ax.set_ylabel(y) # Title the plot if we are faceting title = "" if row is not None: title += "%s = %s" % (row, row_vals[row_i]) if row is not None and col is not None: title += " | " if col is not None: title += "%s = %s" % (col, col_vals[col_j]) ax.set_title(title) for hue_k, hue_mask in enumerate(hue_masks): color = colors[hue_k] data_ijk = data[row_mask & col_mask & hue_mask] if x_estimator is not None: ms = scatter_kws.pop("ms", 7) mew = scatter_kws.pop("mew", 0) x_vals = data_ijk[x].unique() y_grouped = [np.array(data_ijk[y][data_ijk[x] == v]) for v in x_vals] y_est = [x_estimator(y_i) for y_i in y_grouped] y_boots = [moss.bootstrap(np.array(y_i), func=x_estimator) for y_i in y_grouped] ci_lims = [50 - x_ci / 2., 50 + x_ci / 2.] y_ci = [moss.percentiles(y_i, ci_lims) for y_i in y_boots] y_error = ci_to_errsize(np.transpose(y_ci), y_est) ax.plot(x_vals, y_est, "o", mew=mew, ms=ms, color=color, **scatter_kws) ax.errorbar(x_vals, y_est, y_error, fmt=None, ecolor=color) else: ms = scatter_kws.pop("ms", 4) mew = scatter_kws.pop("mew", 0) ax.plot(data_ijk[x], data_ijk[y], "o", color=color, mew=mew, ms=ms, **scatter_kws) for ax_i in np.ravel(axes): ax_i.set_xmargin(.05) ax_i.autoscale_view() # Now walk through again and plot the regression estimate # and a confidence interval for the regression line if fit_line: for row_i, row_mask in enumerate(row_masks): for col_j, col_mask in enumerate(col_masks): ax = axes[row_i, col_j] xlim = ax.get_xlim() for hue_k, hue_mask in enumerate(hue_masks): color = colors[hue_k] data_ijk = data[row_mask & col_mask & hue_mask] x_vals = np.array(data_ijk[x]) y_vals = np.array(data_ijk[y]) # Sort out the limit of the fit if truncate: xx = np.linspace(x_vals.min(), x_vals.max(), 100) else: xx = np.linspace(xlim[0], xlim[1], 100) # Inner function to bootstrap the regression def _bootstrap_reg(x, y): fit = np.polyfit(x, y, 1) return np.polyval(fit, xx) # Regression line confidence interval if ci is not None: ci_lims = [50 - ci / 2., 50 + ci / 2.] boots = moss.bootstrap(x_vals, y_vals, func=_bootstrap_reg) ci_band = moss.percentiles(boots, ci_lims, axis=0) ax.fill_between(xx, *ci_band, color=color, alpha=.15) fit = np.polyfit(x_vals, y_vals, 1) reg = np.polyval(fit, xx) if color_factor is None: label = "" else: label = hue_vals[hue_k] ax.plot(xx, reg, color=color, label=str(label), **line_kws) ax.set_xlim(xlim) # Plot the legend on the upper left facet and adjust the layout if color_factor is not None: axes[0, 0].legend(loc="best", title=color_factor) plt.tight_layout()