def write_mask_report(mask_file, orig_file, mean_file): """Write pngs with the mask and mean iamges.""" mean = nib.load(mean_file).get_data() orig = nib.load(orig_file).get_data() mask = nib.load(mask_file).get_data().astype(float) mask[mask == 0] = np.nan n_slices = mean.shape[-1] n_row, n_col = n_slices // 8, 8 start = n_slices % n_col // 2 figsize = (10, 1.375 * n_row) # Write the functional mask image f, axes = plt.subplots(n_row, n_col, figsize=figsize, facecolor="k") vmin, vmax = 0, moss.percentiles(orig, 98) cmap = mpl.colors.ListedColormap(["MediumSpringGreen"]) for i, ax in enumerate(axes.ravel(), start): ax.imshow(orig[..., i].T, cmap="gray", vmin=vmin, vmax=vmax) ax.imshow(mask[..., i].T, alpha=.6, cmap=cmap) ax.set_xticks([]) ax.set_yticks([]) f.subplots_adjust(hspace=1e-5, wspace=1e-5) mask_png = os.path.abspath("functional_mask.png") f.savefig(mask_png, dpi=100, bbox_inches="tight", facecolor="k", edgecolor="k") plt.close(f) # Write the mean func image f, axes = plt.subplots(n_row, n_col, figsize=figsize, facecolor="k") vmin, vmax = 0, moss.percentiles(mean, 98) for i, ax in enumerate(axes.ravel(), start): ax.imshow(mean[..., i].T, cmap="gray", vmin=vmin, vmax=vmax) ax.imshow(mean[..., i].T, cmap="hot", alpha=.6, vmin=vmin, vmax=vmax) ax.set_xticks([]) ax.set_yticks([]) f.subplots_adjust(hspace=1e-5, wspace=1e-5) mean_png = os.path.abspath("mean_func.png") f.savefig(mean_png, dpi=100, bbox_inches="tight", facecolor="k", edgecolor="k") plt.close(f) return [mask_png, mean_png]
def realign_report(target_file, realign_params, displace_params): """Create files summarizing the motion correction.""" # Create a DataFrame with the 6 motion parameters rot = ["rot_" + dim for dim in ["x", "y", "z"]] trans = ["trans_" + dim for dim in ["x", "y", "z"]] df = pd.DataFrame(np.loadtxt(realign_params), columns=rot + trans) abs, rel = displace_params df["displace_abs"] = np.loadtxt(abs) df["displace_rel"] = pd.Series(np.loadtxt(rel), index=df.index[1:]) df.loc[0, "displace_rel"] = 0 motion_file = os.path.abspath("realignment_params.csv") df.to_csv(motion_file, index=False) # Write the motion plots seaborn.set() seaborn.set_color_palette("husl", 3) f, (ax_rot, ax_trans) = plt.subplots(2, 1, figsize=(8, 3.75), sharex=True) ax_rot.plot(df[rot] * 100) ax_rot.axhline(0, c="#444444", ls="--", zorder=1) ax_trans.plot(df[trans]) ax_trans.axhline(0, c="#444444", ls="--", zorder=1) ax_rot.set_xlim(0, len(df) - 1) ax_rot.set_ylabel(r"Rotations (rad $\times$ 100)") ax_trans.set_ylabel("Translations (mm)") plt.tight_layout() plot_file = os.path.abspath("realignment_plots.png") f.savefig(plot_file, dpi=100, bbox_inches="tight") plt.close(f) # Write the example func plot data = nib.load(target_file).get_data() n_slices = data.shape[-1] n_row, n_col = n_slices // 8, 8 start = n_slices % n_col // 2 figsize = (10, 1.375 * n_row) f, axes = plt.subplots(n_row, n_col, figsize=figsize, facecolor="k") vmin, vmax = 0, moss.percentiles(data, 99) for i, ax in enumerate(axes.ravel(), start): ax.imshow(data[..., i].T, cmap="gray", vmin=vmin, vmax=vmax) ax.set_xticks([]) ax.set_yticks([]) f.subplots_adjust(hspace=1e-5, wspace=1e-5) target_file = os.path.abspath("example_func.png") f.savefig(target_file, dpi=100, bbox_inches="tight", facecolor="k", edgecolor="k") plt.close(f) return [motion_file, plot_file, target_file], motion_file
def write_coreg_plot(subject_id, in_file): """Plot the wm surface edges on the mean functional.""" bold = nib.load(in_file).get_data() # Load the white matter volume from recon-all subj_dir = os.environ["SUBJECTS_DIR"] wm_file = os.path.join(subj_dir, subject_id, "mri/wm.mgz") wm = nib.load(wm_file).get_data() # Find the limits of the data # note that FS conformed space is not (x, y, z) xdata = np.flatnonzero(bold.any(axis=1).any(axis=1)) xmin, xmax = xdata.min(), xdata.max() ydata = np.flatnonzero(bold.any(axis=0).any(axis=0)) ymin, ymax = ydata.min(), ydata.max() zdata = np.flatnonzero(bold.any(axis=0).any(axis=1)) zmin, zmax = zdata.min() + 10, zdata.max() - 25 # Figure out the plot parameters n_slices = (zmax - zmin) // 3 n_row, n_col = n_slices // 8, 8 start = n_slices % n_col // 2 + zmin figsize = (10, 1.375 * n_row) slices = (start + np.arange(zmax - zmin))[::3][:n_slices] # Draw the slices and save vmin, vmax = 0, moss.percentiles(bold, 99) f, axes = plt.subplots(n_row, n_col, figsize=figsize, facecolor="k") cmap = mpl.colors.ListedColormap(["#C41E3A"]) for i, ax in enumerate(reversed(axes.ravel())): i = slices[i] ax.imshow(np.flipud(bold[xmin:xmax, i, ymin:ymax].T), cmap="gray", vmin=vmin, vmax=vmax) try: ax.contour(np.flipud(wm[xmin:xmax, i, ymin:ymax].T), linewidths=.5, cmap=cmap) except ValueError: pass ax.set_xticks([]) ax.set_yticks([]) out_file = os.path.abspath("func2anat.png") plt.savefig(out_file, dpi=100, bbox_inches="tight", facecolor="k", edgecolor="k") plt.close(f) return out_file
def lmplot(ax): n = 80 c = "#222222" rs = np.random.RandomState(5) x = rs.normal(4, 1, n) y = 2 + 1.5 * x + rs.normal(0, 3, n) ax.plot(x, y, "o", c=c, alpha=.8) xx = np.linspace(1 + 1e-9, 7 - 1e-9, 100) lmpred = lambda x, y: np.polyval(np.polyfit(x, y, 1), xx) yy = lmpred(x, y) ax.plot(xx, yy, c=c) boots = moss.bootstrap(x, y, func=lmpred, n_boot=100) ci = moss.percentiles(boots, [2.5, 97.5], 0) ax.fill_between(xx, *ci, alpha=.15, color=c) ax.set_title("lmplot()")
def _ts_kde(ax, x, data, color, **kwargs): """Upsample over time and plot a KDE of the bootstrap distribution.""" kde_data = [] y_min, y_max = moss.percentiles(data, [1, 99]) y_vals = np.linspace(y_min, y_max, 100) upsampler = interpolate.interp1d(x, data) data_upsample = upsampler(np.linspace(x.min(), x.max(), 100)) for pt_data in data_upsample.T: pt_kde = stats.kde.gaussian_kde(pt_data) kde_data.append(pt_kde(y_vals)) kde_data = np.transpose(kde_data) rgb = mpl.colors.ColorConverter().to_rgb(color) img = np.zeros((kde_data.shape[0], kde_data.shape[1], 4)) img[:, :, :3] = rgb kde_data /= kde_data.max(axis=0) kde_data[kde_data > 1] = 1 img[:, :, 3] = kde_data ax.imshow(img, interpolation="spline16", zorder=1, extent=(x.min(), x.max(), y_min, y_max), aspect="auto", origin="lower")
def refine_mask(timeseries, mask_file): """Improve brain mask by thresholding and dilating masked timeseries.""" ts_img = nib.load(timeseries) ts_data = ts_img.get_data() mask_img = nib.load(mask_file) # Find a robust 10% threshold and apply it to the timeseries rmin, rmax = moss.percentiles(ts_data, [2, 98]) thresh = rmin + 0.1 * (rmax + rmin) ts_data[ts_data < thresh] = 0 ts_min = ts_data.min(axis=-1) mask = ts_min > 0 # Dilate the resulting mask by one voxel dilator = sp.ndimage.generate_binary_structure(3, 3) mask = sp.ndimage.binary_dilation(mask, dilator) # Mask the timeseries and save it ts_data[~mask] = 0 timeseries = os.path.abspath("timeseries_masked.nii.gz") new_ts = nib.Nifti1Image(ts_data, ts_img.get_affine(), ts_img.get_header()) new_ts.to_filename(timeseries) # Save the mask image mask_file = os.path.abspath("functional_mask.nii.gz") new_mask = nib.Nifti1Image(mask, mask_img.get_affine(), mask_img.get_header()) new_mask.to_filename(mask_file) # Make a new mean functional image and save it mean_file = os.path.abspath("mean_func.nii.gz") new_mean = nib.Nifti1Image(ts_data.mean(axis=-1), ts_img.get_affine(), ts_img.get_header()) new_mean.to_filename(mean_file) return timeseries, mask_file, mean_file
def violin(vals, groupby=None, inner="box", color=None, positions=None, names=None, widths=.8, alpha=None, join_rm=False, kde_thresh=1e-2, inner_kws=None, ax=None, **kwargs): """Create a violin plot (a combination of boxplot and KDE plot). Parameters ---------- vals : array or sequence of arrays data to plot groupby : grouping object if `vals` is a Series, this is used to group inner : box | sticks | points plot quartiles or individual sample values inside violin color : mpl color, sequence of colors, or seaborn palette name inner violin colors positions : number or sequence of numbers position of first violin or positions of each violin widths : float width of each violin at maximum density alpha : float, optional transparancy of violin fill join_rm : boolean, optional if True, positions in the input arrays are treated as repeated measures and are joined with a line plot names : list of strings, optional names to plot on x axis, otherwise plots numbers kde_thresh : float, optional proportion of maximum at which to threshold the KDE curve inner_kws : dict, optional keyword arugments for inner plot ax : matplotlib axis, optional axis to plot on, otherwise creates new one Returns ------- ax : matplotlib axis axis with violin plot """ if ax is None: ax = plt.gca() if isinstance(vals, pd.DataFrame): if names is None: names = vals.columns if vals.columns.name is not None: xlabel = vals.columns.name else: xlabel = None ylabel = None vals = vals.values elif isinstance(vals, pd.Series) and groupby is not None: if hasattr(groupby, "name"): xlabel = groupby.name if names is None: names = np.sort(pd.unique(groupby)) ylabel = vals.name grouped_vals = pd.groupby(vals, groupby).values vals = grouped_vals.values else: xlabel = None ylabel = None if hasattr(vals, 'shape'): if len(vals.shape) == 1: if hasattr(vals[0], 'shape'): vals = list(vals) else: vals = [vals] elif len(vals.shape) == 2: nr, nc = vals.shape if nr == 1: vals = [vals] elif nc == 1: vals = [vals.ravel()] else: vals = [vals[:, i] for i in xrange(nc)] else: raise ValueError("Input x can have no more than 2 dimensions") if not hasattr(vals[0], '__len__'): vals = [vals] vals = [np.asarray(a, float) for a in vals] if color is None: colors = husl_palette(len(vals), l=.7) else: if hasattr(color, "__iter__") and not isinstance(color, tuple): colors = color else: try: color = mpl.colors.colorConverter.to_rgb(color) colors = [color for _ in vals] except ValueError: colors = color_palette(color, len(vals)) colors = [mpl.colors.colorConverter.to_rgb(c) for c in colors] colors = [desaturate(c, .7) for c in colors] light_vals = [colorsys.rgb_to_hls(*c)[1] for c in colors] l = min(light_vals) * .6 gray = (l, l, l) if inner_kws is None: inner_kws = {} if positions is None: positions = np.arange(1, len(vals) + 1) elif not hasattr(positions, "__iter__"): positions = np.arange(positions, len(vals) + positions) in_alpha = inner_kws.pop("alpha", .6 if inner == "points" else 1) in_alpha *= 1 if alpha is None else alpha in_color = inner_kws.pop("color", gray) in_marker = inner_kws.pop("marker", ".") in_lw = inner_kws.pop("lw", 1.5 if inner == "box" else .8) for i, a in enumerate(vals): x = positions[i] kde = stats.gaussian_kde(a) y = _kde_support(a, kde, 1000, kde_thresh) dens = kde(y) scl = 1 / (dens.max() / (widths / 2)) dens *= scl ax.fill_betweenx(y, x - dens, x + dens, alpha=alpha, color=colors[i]) if inner == "box": for quant in moss.percentiles(a, [25, 75]): q_x = kde(quant) * scl q_x = [x - q_x, x + q_x] ax.plot(q_x, [quant, quant], color=in_color, linestyle=":", linewidth=in_lw, **inner_kws) med = np.median(a) m_x = kde(med) * scl m_x = [x - m_x, x + m_x] ax.plot(m_x, [med, med], color=in_color, linestyle="--", linewidth=in_lw, **inner_kws) elif inner == "stick": x_vals = kde(a) * scl x_vals = [x - x_vals, x + x_vals] ax.plot(x_vals, [a, a], color=in_color, linewidth=in_lw, alpha=in_alpha, **inner_kws) elif inner == "points": x_vals = [x for _ in a] ax.plot(x_vals, a, in_marker, color=in_color, alpha=in_alpha, mew=0, **inner_kws) for side in [-1, 1]: ax.plot((side * dens) + x, y, c=gray, linewidth=1.5) if join_rm: ax.plot(range(1, len(vals) + 1), vals, color=in_color, alpha=2. / 3) ax.set_xticks(positions) if names is not None: if len(vals) != len(names): raise ValueError("Length of names list must match nuber of bins") ax.set_xticklabels(names) ax.set_xlim(positions[0] - .5, positions[-1] + .5) if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) ax.xaxis.grid(False) return ax
def lmplot(x, y, data, color=None, row=None, col=None, col_wrap=None, x_estimator=None, x_ci=95, n_boot=5000, fit_reg=True, order=1, ci=95, logistic=False, truncate=False, x_partial=None, y_partial=None, x_jitter=None, y_jitter=None, sharex=True, sharey=True, palette="husl", size=None, scatter_kws=None, line_kws=None, palette_kws=None): """Plot a linear model from a DataFrame. Parameters ---------- x, y : strings column names in `data` DataFrame for x and y variables data : DataFrame source of data for the model color : string, optional DataFrame column name to group the model by color row, col : strings, optional DataFrame column names to make separate plot facets col_wrap : int, optional wrap col variable at this width - cannot be used with row facet x_estimator : callable, optional Interpret X values as factor labels and use this function to plot the point estimate and bootstrapped CI x_ci : int optional size of confidence interval for x_estimator error bars n_boot : int, optional number of bootstrap iterations to perform fit_reg : bool, optional if True fit a regression model by color/row/col and plot order : int, optional order of the regression polynomial to fit (default = 1) ci : int, optional confidence interval for the regression line logistic : bool, optional fit the regression line with logistic regression truncate : bool, optional if True, only fit line from data min to data max {x, y}_partial : string or list of strings, optional regress these variables out of the factors before plotting {x, y}_jitter : float, optional parameters for uniformly distributed random noise added to positions sharex, sharey : bools, optional only relevant if faceting; passed to plt.subplots palette : seaborn color palette argument if using separate plots by color, draw with this color palette size : float, optional size (plots are square) for each plot facet {scatter, line}_kws : dictionary keyword arguments to pass to the underlying plot functions palette_kws : dictionary keyword arguments for seaborn.color_palette """ # TODO # - legend when fit_line is False # - wrap title when wide # First sort out the general figure layout if size is None: size = mpl.rcParams["figure.figsize"][1] if col is None and col_wrap is not None: raise ValueError("Need column facet variable for `col_wrap`") if row is not None and col_wrap is not None: raise ValueError("Cannot facet rows when using `col_wrap`") nrow = 1 if row is None else len(data[row].unique()) ncol = 1 if col is None else len(data[col].unique()) if col_wrap is not None: ncol = col_wrap nrow = int(np.ceil(len(data[col].unique()) / col_wrap)) f, axes = plt.subplots(nrow, ncol, sharex=sharex, sharey=sharey, figsize=(size * ncol, size * nrow)) axes = np.atleast_2d(axes).reshape(nrow, ncol) if nrow == 1 or col_wrap is not None: row_masks = [np.repeat(True, len(data))] else: row_vals = np.sort(data[row].unique()) row_masks = [data[row] == val for val in row_vals] if ncol == 1: col_masks = [np.repeat(True, len(data))] else: col_vals = np.sort(data[col].unique()) col_masks = [data[col] == val for val in col_vals] if x_partial is not None: if not isinstance(x_partial, list): x_partial = [x_partial] if y_partial is not None: if not isinstance(y_partial, list): y_partial = [y_partial] if palette_kws is None: palette_kws = {} # Sort out the plot colors color_factor = color if color is None: hue_masks = [np.repeat(True, len(data))] colors = ["#222222"] else: hue_vals = np.sort(data[color].unique()) hue_masks = [data[color] == val for val in hue_vals] colors = color_palette(palette, len(hue_masks), **palette_kws) # Default keyword arguments for plot components if scatter_kws is None: scatter_kws = {} if line_kws is None: line_kws = {} # First walk through the facets and plot the scatters scatter_ms = scatter_kws.pop("ms", 4) scatter_mew = mew = scatter_kws.pop("mew", 0) scatter_alpha = mew = scatter_kws.pop("alpha", .77) for row_i, row_mask in enumerate(row_masks): for col_j, col_mask in enumerate(col_masks): if col_wrap is not None: f_row = col_j // ncol f_col = col_j % ncol else: f_row, f_col = row_i, col_j ax = axes[f_row, f_col] if f_row + 1 == nrow: ax.set_xlabel(x) if f_col == 0: ax.set_ylabel(y) # Title the plot if we are faceting title = "" if row is not None: title += "%s = %s" % (row, row_vals[row_i]) if row is not None and col is not None: title += " | " if col is not None: title += "%s = %s" % (col, col_vals[col_j]) ax.set_title(title) for hue_k, hue_mask in enumerate(hue_masks): color = colors[hue_k] data_ijk = data[row_mask & col_mask & hue_mask] if x_estimator is not None: ms = scatter_kws.pop("ms", 7) mew = scatter_kws.pop("mew", 0) x_vals = data_ijk[x].unique() y_vals = data_ijk[y] if y_partial is not None: for var in y_partial: conf = data_ijk[var] conf -= conf.mean() y_mean = y_vals.mean() y_vals = moss.vector_reject(y_vals - y_mean, conf) y_vals += y_mean y_grouped = [np.array(y_vals[data_ijk[x] == v]) for v in x_vals] y_est = [x_estimator(y_i) for y_i in y_grouped] y_boots = [moss.bootstrap(np.array(y_i), func=x_estimator, n_boot=n_boot) for y_i in y_grouped] ci_lims = [50 - x_ci / 2., 50 + x_ci / 2.] y_ci = [moss.percentiles(y_i, ci_lims) for y_i in y_boots] y_error = ci_to_errsize(np.transpose(y_ci), y_est) ax.plot(x_vals, y_est, "o", mew=mew, ms=ms, color=color, **scatter_kws) ax.errorbar(x_vals, y_est, y_error, fmt=None, ecolor=color) else: x_ = data_ijk[x] y_ = data_ijk[y] if x_partial is not None: for var in x_partial: conf = data_ijk[var] conf -= conf.mean() x_mean = x_.mean() x_ = moss.vector_reject(x_ - x_mean, conf) x_ += x_mean if y_partial is not None: for var in y_partial: conf = data_ijk[var] conf -= conf.mean() y_mean = y_.mean() y_ = moss.vector_reject(y_ - y_mean, conf) y_ += y_mean if x_jitter is not None: x_ += np.random.uniform(-x_jitter, x_jitter, x_.shape) if y_jitter is not None: y_ += np.random.uniform(-y_jitter, y_jitter, y_.shape) ax.plot(x_, y_, "o", color=color, alpha=scatter_alpha, mew=scatter_mew, ms=scatter_ms, **scatter_kws) for ax_i in np.ravel(axes): ax_i.set_xmargin(.05) ax_i.autoscale_view() # Now walk through again and plot the regression estimate # and a confidence interval for the regression line if fit_reg: for row_i, row_mask in enumerate(row_masks): for col_j, col_mask in enumerate(col_masks): if col_wrap is not None: f_row = col_j // ncol f_col = col_j % ncol else: f_row, f_col = row_i, col_j ax = axes[f_row, f_col] xlim = ax.get_xlim() for hue_k, hue_mask in enumerate(hue_masks): color = colors[hue_k] data_ijk = data[row_mask & col_mask & hue_mask] x_vals = np.array(data_ijk[x]) y_vals = np.array(data_ijk[y]) if not len(x_vals): continue # Sort out the limit of the fit if truncate: xx = np.linspace(x_vals.min(), x_vals.max(), 100) else: xx = np.linspace(xlim[0], xlim[1], 100) xx_ = sm.add_constant(xx, prepend=True) # Inner function to bootstrap the regression def _regress(x, y): if logistic: x_ = sm.add_constant(x, prepend=True) fit = sm.GLM(y, x_, family=sm.families.Binomial()).fit() reg = fit.predict(xx_) else: fit = np.polyfit(x, y, order) reg = np.polyval(fit, xx) return reg # Remove nuisance variables with vector rejection if x_partial is not None: for var in x_partial: conf = data_ijk[var] conf -= conf.mean() x_mean = x_vals.mean() x_vals = moss.vector_reject(x_vals - x_mean, conf) x_vals += x_mean if y_partial is not None: for var in y_partial: conf = data_ijk[var] conf -= conf.mean() y_mean = y_vals.mean() y_vals = moss.vector_reject(y_vals - y_mean, conf) y_vals += y_mean # Regression line confidence interval if ci is not None: ci_lims = [50 - ci / 2., 50 + ci / 2.] boots = moss.bootstrap(x_vals, y_vals, func=_regress, n_boot=n_boot) ci_band = moss.percentiles(boots, ci_lims, axis=0) ax.fill_between(xx, *ci_band, color=color, alpha=.15) # Regression line reg = _regress(x_vals, y_vals) if color_factor is None: label = "" else: label = hue_vals[hue_k] ax.plot(xx, reg, color=color, label=str(label), **line_kws) ax.set_xlim(xlim) # Plot the legend on the upper left facet and adjust the layout if color_factor is not None and color_factor not in [row, col]: axes[0, 0].legend(loc="best", title=color_factor) plt.tight_layout()
def violin(vals, inner="box", position=None, widths=.5, join_rm=False, names=None, ax=None, **kwargs): """Create a violin plot (a combination of boxplot and KDE plot. Parameters ---------- vals : array or sequence of arrays data to plot inner : box | sticks | points plot quartiles or individual sample values inside violin positions : number or sequence of numbers position of first violin or positions of each violin widths : float width of each violin at maximum density join_rm : boolean, optional if True, positions in the input arrays are treated as repeated measures and are joined with a line plot names : list of strings, optional names to plot on x axis, otherwise plots numbers ax : matplotlib axis, optional axis to plot on, otherwise creates new one Returns ------- ax : matplotlib axis axis with violin plot """ if ax is None: ax = plt.subplot(111) if hasattr(vals, 'shape'): if len(vals.shape) == 1: if hasattr(vals[0], 'shape'): vals = list(vals) else: vals = [vals] elif len(vals.shape) == 2: nr, nc = vals.shape if nr == 1: vals = [vals] elif nc == 1: vals = [vals.ravel()] else: vals = [vals[:, i] for i in xrange(nc)] else: raise ValueError("Input x can have no more than 2 dimensions") if not hasattr(vals[0], '__len__'): vals = [vals] vals = [np.asarray(a, float) for a in vals] line, = ax.plot(vals[0].mean(), vals[0].mean(), **kwargs) color = line.get_color() line.remove() gray = "#555555" if position is None: position = np.arange(1, len(vals) + 1) elif not hasattr(position, "__iter__"): position = np.arange(position, len(vals) + position) for i, a in enumerate(vals): x = position[i] kde = stats.gaussian_kde(a) y = _kde_support(a, kde, 1000) dens = kde(y) scl = 1 / (dens.max() / (widths / 2)) dens *= scl ax.fill_betweenx(y, x - dens, x + dens, alpha=.7, color=color) if inner == "box": for quant in moss.percentiles(a, [25, 75]): q_x = kde(quant) * scl q_x = [x - q_x, x + q_x] ax.plot(q_x, [quant, quant], gray, linestyle=":", linewidth=1.5) med = np.median(a) m_x = kde(med) * scl m_x = [x - m_x, x + m_x] ax.plot(m_x, [med, med], gray, linestyle="--", linewidth=1.2) elif inner == "stick": x_vals = kde(a) * scl x_vals = [x - x_vals, x + x_vals] ax.plot(x_vals, [a, a], gray, linewidth=.7, alpha=.7) elif inner == "points": x_vals = [x for i in a] ax.plot(x_vals, a, "o", color=gray, alpha=.3) for side in [-1, 1]: ax.plot((side * dens) + x, y, gray, linewidth=1) if join_rm: ax.plot(range(1, len(vals) + 1), vals, color=color, alpha=2. / 3) ax.set_xticks(position) if names is not None: if len(vals) != len(names): raise ValueError("Length of names list must match nuber of bins") ax.set_xticklabels(names) ax.set_xlim(position[0] - .5, position[-1] + .5) return ax
def violinplot(vals, groupby=None, inner="box", color=None, positions=None, names=None, order=None, kernel="gau", bw="scott", widths=.8, alpha=None, join_rm=False, gridsize=100, cut=3, inner_kws=None, ax=None, **kwargs): """Create a violin plot (a combination of boxplot and kernel density plot). Parameters ---------- vals : DataFrame, Series, 2D array, or list of vectors. Data for plot. DataFrames and 2D arrays are assuemd to be "wide" with each column mapping to a box. Lists of data are assumed to have one element per box. Can also provide one long Series in conjunction with a grouping element as the `groupy` parameter to reshape the data into several violins. Otherwise 1D data will produce a single violins. groupby : grouping object If `vals` is a Series, this is used to group into boxes by calling pd.groupby(vals, groupby). inner : box | sticks | points Plot quartiles or individual sample values inside violin. color : mpl color, sequence of colors, or seaborn palette name Inner violin colors positions : number or sequence of numbers Position of first violin or positions of each violin. names : list of strings, optional Names to plot on x axis; otherwise plots numbers. This will override names inferred from Pandas inputs. order : list of strings, optional If vals is a Pandas object with name information, you can control the order of the plot by providing the violin names in your preferred order. kernel : {'gau' | 'cos' | 'biw' | 'epa' | 'tri' | 'triw' } Code for shape of kernel to fit with. bw : {'scott' | 'silverman' | scalar} Name of reference method to determine kernel size, or size as a scalar. widths : float Width of each violin at maximum density. alpha : float, optional Transparancy of violin fill. join_rm : boolean, optional If True, positions in the input arrays are treated as repeated measures and are joined with a line plot. gridsize : int Number of discrete gridpoints to evaluate the density on. cut : scalar Draw the estimate to cut * bw from the extreme data points. inner_kws : dict, optional Keyword arugments for inner plot. ax : matplotlib axis, optional Axis to plot on, otherwise grab current axis. kwargs : additional parameters to fill_betweenx Returns ------- ax : matplotlib axis Axis with violin plot. """ if ax is None: ax = plt.gca() # Reshape and find labels for the plot vals, xlabel, ylabel, names = _box_reshape(vals, groupby, names, order) # Sort out the plot colors colors, gray = _box_colors(vals, color) # Initialize the kwarg dict for the inner plot if inner_kws is None: inner_kws = {} in_alpha = inner_kws.pop("alpha", .6 if inner == "points" else 1) in_alpha *= 1 if alpha is None else alpha in_color = inner_kws.pop("color", gray) in_marker = inner_kws.pop("marker", ".") in_lw = inner_kws.pop("lw", 1.5 if inner == "box" else .8) # Find where the violins are going if positions is None: positions = np.arange(1, len(vals) + 1) elif not hasattr(positions, "__iter__"): positions = np.arange(positions, len(vals) + positions) # Set the default linewidth if not provided in kwargs try: lw = kwargs[({"lw", "linewidth"} & set(kwargs)).pop()] except KeyError: lw = 1.5 # Iterate over the variables for i, a in enumerate(vals): # Fit the KDE x = positions[i] kde = sm.nonparametric.KDEUnivariate(a) fft = kernel == "gau" kde.fit(bw=bw, kernel=kernel, gridsize=gridsize, cut=cut, fft=fft) y, dens = kde.support, kde.density scl = 1 / (dens.max() / (widths / 2)) dens *= scl # Draw the violin ax.fill_betweenx(y, x - dens, x + dens, alpha=alpha, color=colors[i]) if inner == "box": for quant in moss.percentiles(a, [25, 75]): q_x = kde.evaluate(quant) * scl q_x = [x - q_x, x + q_x] ax.plot(q_x, [quant, quant], color=in_color, linestyle=":", linewidth=in_lw, **inner_kws) med = np.median(a) m_x = kde.evaluate(med) * scl m_x = [x - m_x, x + m_x] ax.plot(m_x, [med, med], color=in_color, linestyle="--", linewidth=in_lw, **inner_kws) elif inner == "stick": x_vals = kde.evaluate(a) * scl x_vals = [x - x_vals, x + x_vals] ax.plot(x_vals, [a, a], color=in_color, linewidth=in_lw, alpha=in_alpha, **inner_kws) elif inner == "points": x_vals = [x for _ in a] ax.plot(x_vals, a, in_marker, color=in_color, alpha=in_alpha, mew=0, **inner_kws) for side in [-1, 1]: ax.plot((side * dens) + x, y, c=gray, lw=lw) # Draw the repeated measure bridges if join_rm: ax.plot(range(1, len(vals) + 1), vals, color=in_color, alpha=2. / 3) # Add in semantic labels ax.set_xticks(positions) if names is not None: if len(vals) != len(names): raise ValueError("Length of names list must match nuber of bins") ax.set_xticklabels(names) ax.set_xlim(positions[0] - .5, positions[-1] + .5) if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) ax.xaxis.grid(False) return ax
def fixedfx_report(space, anatomy, zstat_files, r2_files, masks): """Plot the resulting data.""" sns.set() bg = nib.load(anatomy).get_data() mask_data = [nib.load(f).get_data() for f in masks] mask = np.where(np.all(mask_data, axis=0), np.nan, 1) mask[bg < moss.percentiles(bg, 5)] = np.nan # Find the plot parameters xdata = np.flatnonzero(bg.any(axis=1).any(axis=1)) xslice = slice(xdata.min(), xdata.max() + 1) ydata = np.flatnonzero(bg.any(axis=0).any(axis=1)) yslice = slice(ydata.min(), ydata.max() + 1) zdata = np.flatnonzero(bg.any(axis=0).any(axis=0)) zmin, zmax = zdata.min(), zdata.max() step = 2 if space == "mni" else 1 offset = 4 if space == "mni" else 0 n_slices = (zmax - zmin) // step n_row, n_col = n_slices // 8, 8 start = n_slices % n_col // step + zmin + offset figsize = (10, 1.375 * n_row) slices = (start + np.arange(zmax - zmin))[::step][:n_slices] pltkws = dict(nrows=int(n_row), ncols=int(n_col), figsize=figsize, facecolor="k") pngkws = dict(dpi=100, bbox_inches="tight", facecolor="k", edgecolor="k") vmin, vmax = 0, moss.percentiles(bg, 95) mask_cmap = mpl.colors.ListedColormap(["#160016"]) report = [] def add_colorbar(f, cmap, low, high, left, width, fmt): cbar = np.outer(np.arange(0, 1, .01), np.ones(10)) cbar_ax = f.add_axes([left, 0, width, .03]) cbar_ax.imshow(cbar.T, aspect="auto", cmap=cmap) cbar_ax.axis("off") f.text(left - .01, .018, fmt % low, ha="right", va="center", color="white", size=13, weight="demibold") f.text(left + width + .01, .018, fmt % high, ha="left", va="center", color="white", size=13, weight="demibold") # Plot the mask edges f, axes = plt.subplots(**pltkws) mask_colors = sns.husl_palette(len(mask_data)) mask_colors.reverse() cmaps = [mpl.colors.ListedColormap([c]) for c in mask_colors] for i, ax in zip(slices, axes.ravel()): ax.imshow(bg[xslice, yslice, i].T, cmap="gray", vmin=vmin, vmax=vmax) for j, m in enumerate(mask_data): if m[xslice, yslice, i].any(): ax.contour(m[xslice, yslice, i].T, cmap=cmaps[j], linewidths=.75) ax.axis("off") text_min = max(.15, .5 - len(mask_data) * .05) text_max = min(.85, .5 + len(mask_data) * .05) text_pos = np.linspace(text_min, text_max, len(mask_data)) for i, color in enumerate(mask_colors): f.text(text_pos[i], .03, "Run %d" % (i + 1), color=color, size=11, weight="demibold", ha="center", va="center") mask_png = op.abspath("mask_overlap.png") plt.savefig(mask_png, **pngkws) report.append(mask_png) plt.close(f) # Now plot the R2 images for fname, cmap in zip(r2_files, ["GnBu_r", "YlGn_r"]): f, axes = plt.subplots(**pltkws) r2data = nib.load(fname).get_data() rmax = r2data[~np.isnan(r2data)].max() r2data[bg == 0] = np.nan for i, ax in zip(slices, axes.ravel()): ax.imshow(bg[xslice, yslice, i].T, cmap="gray", vmin=vmin, vmax=vmax) ax.imshow(r2data[xslice, yslice, i].T, cmap=cmap, vmin=0, vmax=rmax, alpha=.8) ax.imshow(mask[xslice, yslice, i].T, alpha=.5, cmap=mask_cmap, interpolation="nearest") ax.axis("off") savename = op.abspath(op.basename(fname).replace(".nii.gz", ".png")) add_colorbar(f, cmap, 0, rmax, .35, .3, "%.2f") plt.savefig(savename, **pngkws) report.append(savename) plt.close(f) # Finally plot each zstat image for fname in zstat_files: zdata = nib.load(fname).get_data() zpos = zdata.copy() zneg = zdata.copy() zpos[zdata < 2.3] = np.nan zneg[zdata > -2.3] = np.nan zlow = 2.3 zhigh = max(np.abs(zdata).max(), 3.71) f, axes = plt.subplots(**pltkws) for i, ax in zip(slices, axes.ravel()): ax.imshow(bg[xslice, yslice, i].T, cmap="gray", vmin=vmin, vmax=vmax) ax.imshow(zpos[xslice, yslice, i].T, cmap="Reds_r", vmin=zlow, vmax=zhigh) ax.imshow(zneg[xslice, yslice, i].T, cmap="Blues", vmin=-zhigh, vmax=-zlow) ax.imshow(mask[xslice, yslice, i].T, alpha=.5, cmap=mask_cmap, interpolation="nearest") ax.axis("off") add_colorbar(f, "Blues", -zhigh, -zlow, .18, .23, "%.1f") add_colorbar(f, "Reds_r", zlow, zhigh, .59, .23, "%.1f") contrast = fname.split("/")[-2] os.mkdir(contrast) savename = op.join(contrast, "zstat1.png") f.savefig(savename, **pngkws) report.append(op.abspath(contrast)) plt.close(f) return report
def regplot(x, y, data=None, corr_func=stats.pearsonr, func_name=None, xlabel="", ylabel="", ci=95, size=None, annotloc=None, color=None, reg_kws=None, scatter_kws=None, dist_kws=None, text_kws=None): """Scatterplot with regresion line, marginals, and correlation value. Parameters ---------- x : sequence or string Independent variable. y : sequence or string Dependent variable. data : dataframe, optional If dataframe is given, x, and y are interpreted as string keys for selecting to dataframe column names. corr_func : callable, optional Correlation function; expected to take two arrays and return a numeric or (statistic, pval) tuple. func_name : string, optional Use in lieu of function name for fit statistic annotation. xlabel, ylabel : string, optional Axis label names if inputs are not Pandas objects or to override. ci : int or None Confidence interval for the regression estimate. size: int Figure size (will be a square; only need one int). annotloc : two or three tuple Specified with (xpos, ypos [, horizontalalignment]). color : matplotlib color scheme Color of everything but the regression line; can be overridden by passing `color` to subfunc kwargs. {reg, scatter, dist, text}_kws: dicts Further keyword arguments for the constituent plots. """ # Interperet inputs if data is not None: if not xlabel: xlabel = x if not ylabel: ylabel = y x = data[x].values y = data[y].values else: if hasattr(x, "name") and not xlabel: if x.name is not None: xlabel = x.name if hasattr(y, "name") and not ylabel: if y.name is not None: ylabel = y.name x = np.asarray(x) y = np.asarray(y) # Set up the figure and axes size = mpl.rcParams["figure.figsize"][1] if size is None else size fig = plt.figure(figsize=(size, size)) ax_scatter = fig.add_axes([0.05, 0.05, 0.75, 0.75]) ax_x_marg = fig.add_axes([0.05, 0.82, 0.75, 0.13]) ax_y_marg = fig.add_axes([0.82, 0.05, 0.13, 0.75]) # Plot the scatter if scatter_kws is None: scatter_kws = {} if color is not None and "color" not in scatter_kws: scatter_kws.update(color=color) marker = scatter_kws.pop("markerstyle", "o") mew = scatter_kws.pop("mew", 0) alpha_maker = stats.norm(0, 100) alpha = alpha_maker.pdf(len(x)) / alpha_maker.pdf(0) alpha = max(alpha, .1) alpha = scatter_kws.pop("alpha", alpha) ax_scatter.plot(x, y, marker, alpha=alpha, mew=mew, **scatter_kws) ax_scatter.set_xlabel(xlabel) ax_scatter.set_ylabel(ylabel) # Marginal plots using our distplot function if dist_kws is None: dist_kws = {} if color is not None and "color" not in dist_kws: dist_kws.update(color=color) dist_kws["axlabel"] = False distplot(x, ax=ax_x_marg, **dist_kws) distplot(y, ax=ax_y_marg, vertical=True, **dist_kws) for ax in [ax_x_marg, ax_y_marg]: ax.set_xticklabels([]) ax.set_yticklabels([]) # Regression line plot xlim = ax_scatter.get_xlim() a, b = np.polyfit(x, y, 1) if reg_kws is None: reg_kws = {} reg_color = reg_kws.pop("color", "#222222") yhat = np.polyval([a, b], xlim) ax_scatter.plot(xlim, yhat, color=reg_color, **reg_kws) # This is a hack to get the annotation to work reg = ax_scatter.plot(xlim, yhat, lw=0) # Bootstrapped regression standard error if ci is not None: xx = np.linspace(xlim[0], xlim[1], 100) def _bootstrap_reg(x, y): fit = np.polyfit(x, y, 1) return np.polyval(fit, xx) boots = moss.bootstrap(x, y, func=_bootstrap_reg) ci_lims = [50 - ci / 2., 50 + ci / 2.] ci_band = moss.percentiles(boots, ci_lims, axis=0) ax_scatter.fill_between(xx, *ci_band, color=reg_color, alpha=.15) ax_scatter.set_xlim(xlim) # Calcluate a fit statistic and p value if func_name is None: func_name = corr_func.__name__ out = corr_func(x, y) try: s, p = out msg = "%s: %.2g (p=%.2g%s)" % (func_name, s, p, moss.sig_stars(p)) except TypeError: s = corr_func(x, y) msg = "%s: %.3f" % (func_name, s) if text_kws is None: text_kws = {} ax_scatter.legend(reg, [msg], loc="best", prop=text_kws) # Set the axes on the marginal plots ax_x_marg.set_xlim(ax_scatter.get_xlim()) ax_x_marg.set_yticks([]) ax_y_marg.set_ylim(ax_scatter.get_ylim()) ax_y_marg.set_xticks([])
def lmplot(x, y, data, color=None, row=None, col=None, x_estimator=None, x_ci=95, fit_line=True, ci=95, truncate=False, sharex=True, sharey=True, palette="hls", size=None, scatter_kws=None, line_kws=None, palette_kws=None): """Plot a linear model from a DataFrame. Parameters ---------- x, y : strings column names in `data` DataFrame for x and y variables data : DataFrame source of data for the model color : string, optional DataFrame column name to group the model by color row, col : strings, optional DataFrame column names to make separate plot facets x_estimator : callable, optional Interpret X values as factor labels and use this function to plot the point estimate and bootstrapped CI x_ci : int optional size of confidence interval for x_estimator error bars fit_line : bool, optional if True fit a regression line by color/row/col and plot ci : int, optional confidence interval for the regression line truncate : bool, optional if True, only fit line from data min to data max sharex, sharey : bools, optional only relevant if faceting; passed to plt.subplots palette : seaborn color palette argument if using separate plots by color, draw with this color palette size : float, optional size (plots are square) for each plot facet {scatter, line}_kws : dictionary keyword arguments to pass to the underlying plot functions palette_kws : dictionary keyword arguments for seaborn.color_palette """ # TODO # - position_{dodge, jitter} # - legend when fit_line is False # - truncate fit # - wrap title when wide # - wrap columns # First sort out the general figure layout if size is None: size = mpl.rcParams["figure.figsize"][1] nrow = 1 if row is None else len(data[row].unique()) ncol = 1 if col is None else len(data[col].unique()) f, axes = plt.subplots(nrow, ncol, sharex=sharex, sharey=sharey, figsize=(size * ncol, size * nrow)) axes = np.atleast_2d(axes).reshape(nrow, ncol) if nrow == 1: row_masks = [np.repeat(True, len(data))] else: row_vals = np.sort(data[row].unique()) row_masks = [data[row] == val for val in row_vals] if ncol == 1: col_masks = [np.repeat(True, len(data))] else: col_vals = np.sort(data[col].unique()) col_masks = [data[col] == val for val in col_vals] if palette_kws is None: palette_kws = {} # Sort out the plot colors color_factor = color if color is None: hue_masks = [np.repeat(True, len(data))] colors = ["#222222"] else: hue_vals = np.sort(data[color].unique()) hue_masks = [data[color] == val for val in hue_vals] colors = color_palette(palette, len(hue_masks), **palette_kws) # Default keyword arguments for plot components if scatter_kws is None: scatter_kws = {} if line_kws is None: line_kws = {} # First walk through the facets and plot the scatters for row_i, row_mask in enumerate(row_masks): for col_j, col_mask in enumerate(col_masks): ax = axes[row_i, col_j] if not sharex or (row_i + 1 == len(row_masks)): ax.set_xlabel(x) if not sharey or col_j == 0: ax.set_ylabel(y) # Title the plot if we are faceting title = "" if row is not None: title += "%s = %s" % (row, row_vals[row_i]) if row is not None and col is not None: title += " | " if col is not None: title += "%s = %s" % (col, col_vals[col_j]) ax.set_title(title) for hue_k, hue_mask in enumerate(hue_masks): color = colors[hue_k] data_ijk = data[row_mask & col_mask & hue_mask] if x_estimator is not None: ms = scatter_kws.pop("ms", 7) mew = scatter_kws.pop("mew", 0) x_vals = data_ijk[x].unique() y_grouped = [ np.array(data_ijk[y][data_ijk[x] == v]) for v in x_vals ] y_est = [x_estimator(y_i) for y_i in y_grouped] y_boots = [ moss.bootstrap(np.array(y_i), func=x_estimator) for y_i in y_grouped ] ci_lims = [50 - x_ci / 2., 50 + x_ci / 2.] y_ci = [moss.percentiles(y_i, ci_lims) for y_i in y_boots] y_error = ci_to_errsize(np.transpose(y_ci), y_est) ax.plot(x_vals, y_est, "o", mew=mew, ms=ms, color=color, **scatter_kws) ax.errorbar(x_vals, y_est, y_error, fmt=None, ecolor=color) else: ms = scatter_kws.pop("ms", 4) mew = scatter_kws.pop("mew", 0) ax.plot(data_ijk[x], data_ijk[y], "o", color=color, mew=mew, ms=ms, **scatter_kws) for ax_i in np.ravel(axes): ax_i.set_xmargin(.05) ax_i.autoscale_view() # Now walk through again and plot the regression estimate # and a confidence interval for the regression line if fit_line: for row_i, row_mask in enumerate(row_masks): for col_j, col_mask in enumerate(col_masks): ax = axes[row_i, col_j] xlim = ax.get_xlim() for hue_k, hue_mask in enumerate(hue_masks): color = colors[hue_k] data_ijk = data[row_mask & col_mask & hue_mask] x_vals = np.array(data_ijk[x]) y_vals = np.array(data_ijk[y]) # Sort out the limit of the fit if truncate: xx = np.linspace(x_vals.min(), x_vals.max(), 100) else: xx = np.linspace(xlim[0], xlim[1], 100) # Inner function to bootstrap the regression def _bootstrap_reg(x, y): fit = np.polyfit(x, y, 1) return np.polyval(fit, xx) # Regression line confidence interval if ci is not None: ci_lims = [50 - ci / 2., 50 + ci / 2.] boots = moss.bootstrap(x_vals, y_vals, func=_bootstrap_reg) ci_band = moss.percentiles(boots, ci_lims, axis=0) ax.fill_between(xx, *ci_band, color=color, alpha=.15) fit = np.polyfit(x_vals, y_vals, 1) reg = np.polyval(fit, xx) if color_factor is None: label = "" else: label = hue_vals[hue_k] ax.plot(xx, reg, color=color, label=str(label), **line_kws) ax.set_xlim(xlim) # Plot the legend on the upper left facet and adjust the layout if color_factor is not None: axes[0, 0].legend(loc="best", title=color_factor) plt.tight_layout()
def regplot(x, y, data=None, corr_func=stats.pearsonr, xlabel="", ylabel="", ci=95, size=None, annotloc=None, color=None, reg_kws=None, scatter_kws=None, dist_kws=None, text_kws=None): """Scatterplot with regreesion line, marginals, and correlation value. Parameters ---------- x : sequence independent variables y : sequence dependent variables data : dataframe, optional if dataframe is given, x, and y are interpreted as string keys mapping to dataframe column names corr_func : callable, optional correlation function; expected to take two arrays and return a (statistic, pval) tuple xlabel, ylabel : string, optional label names ci : int or None confidence interval for the regression line size: int figure size (will be a square; only need one int) annotloc : two or three tuple (xpos, ypos [, horizontalalignment]) color : matplotlib color scheme color of everything but the regression line overridden by passing `color` to subfunc kwargs {reg, scatter, dist, text}_kws: dicts further keyword arguments for the constituent plots """ # Interperet inputs if data is not None: xlabel, ylabel = x, y x = np.array(data[x]) y = np.array(data[y]) # Set up the figure and axes size = 6 if size is None else size fig = plt.figure(figsize=(size, size)) ax_scatter = fig.add_axes([0.05, 0.05, 0.75, 0.75]) ax_x_marg = fig.add_axes([0.05, 0.82, 0.75, 0.13]) ax_y_marg = fig.add_axes([0.82, 0.05, 0.13, 0.75]) # Plot the scatter if scatter_kws is None: scatter_kws = {} if color is not None and "color" not in scatter_kws: scatter_kws.update(color=color) marker = scatter_kws.pop("markerstyle", "o") alpha_maker = stats.norm(0, 100) alpha = alpha_maker.pdf(len(x)) / alpha_maker.pdf(0) alpha = max(alpha, .1) alpha = scatter_kws.pop("alpha", alpha) ax_scatter.plot(x, y, marker, alpha=alpha, mew=0, **scatter_kws) ax_scatter.set_xlabel(xlabel) ax_scatter.set_ylabel(ylabel) # Marginal plots using our distplot function if dist_kws is None: dist_kws = {} if color is not None and "color" not in dist_kws: dist_kws.update(color=color) if "legend" not in dist_kws: dist_kws["legend"] = False distplot(x, ax=ax_x_marg, **dist_kws) distplot(y, ax=ax_y_marg, vertical=True, **dist_kws) for ax in [ax_x_marg, ax_y_marg]: ax.set_xticklabels([]) ax.set_yticklabels([]) # Regression line plot xlim = ax_scatter.get_xlim() a, b = np.polyfit(x, y, 1) if reg_kws is None: reg_kws = {} reg_color = reg_kws.pop("color", "#222222") ax_scatter.plot(xlim, np.polyval([a, b], xlim), color=reg_color, **reg_kws) # Bootstrapped regression standard error if ci is not None: xx = np.linspace(xlim[0], xlim[1], 100) def _bootstrap_reg(x, y): fit = np.polyfit(x, y, 1) return np.polyval(fit, xx) boots = moss.bootstrap(x, y, func=_bootstrap_reg) ci_lims = [50 - ci / 2., 50 + ci / 2.] ci_band = moss.percentiles(boots, ci_lims, axis=0) ax_scatter.fill_between(xx, *ci_band, color=reg_color, alpha=.15) ax_scatter.set_xlim(xlim) # Calcluate a correlation statistic and p value r, p = corr_func(x, y) msg = "%s: %.3f (p=%.3g%s)" % (corr_func.__name__, r, p, moss.sig_stars(p)) if annotloc is None: xmin, xmax = xlim x_range = xmax - xmin if r < 0: xloc, align = xmax - x_range * .02, "right" else: xloc, align = xmin + x_range * .02, "left" ymin, ymax = ax_scatter.get_ylim() y_range = ymax - ymin yloc = ymax - y_range * .02 else: if len(annotloc) == 3: xloc, yloc, align = annotloc else: xloc, yloc = annotloc align = "left" if text_kws is None: text_kws = {} ax_scatter.text(xloc, yloc, msg, ha=align, va="top", **text_kws) # Set the axes on the marginal plots ax_x_marg.set_xlim(ax_scatter.get_xlim()) ax_x_marg.set_yticks([]) ax_y_marg.set_ylim(ax_scatter.get_ylim()) ax_y_marg.set_xticks([])
def tsplot(x, data, err_style="ci_band", ci=68, interpolate=True, estimator=np.mean, n_boot=10000, smooth=False, err_palette=None, ax=None, err_kws=None, **kwargs): """Plot timeseries from a set of observations. Parameters ---------- x : n_tp array x values data : n_obs x n_tp array array of timeseries data where first axis is observations. other objects (e.g. DataFrames) are converted to an array if possible err_style : string or list of strings names of ways to plot uncertainty across observations from set of {ci_band, ci_bars, boot_traces, book_kde, obs_traces, obs_points} ci : int or list of ints confidence interaval size(s). if a list, it will stack the error plots for each confidence interval estimator : callable function to determine centralt tendency and to pass to bootstrap must take an ``axis`` argument n_boot : int number of bootstrap iterations smooth : boolean whether to perform a smooth bootstrap (resample from KDE) ax : axis object, optional plot in given axis; if None creates a new figure err_kws : dict, optional keyword argument dictionary passed through to matplotlib function generating the error plot kwargs : further keyword arguments for main call to plot() Returns ------- ax : matplotlib axis axis with plot data """ if ax is None: ax = plt.gca() if err_kws is None: err_kws = {} # Bootstrap the data for confidence intervals data = np.asarray(data) boot_data = moss.bootstrap(data, n_boot=n_boot, smooth=smooth, axis=0, func=estimator) ci_list = hasattr(ci, "__iter__") if not ci_list: ci = [ci] ci_vals = [(50 - w / 2, 50 + w / 2) for w in ci] cis = [moss.percentiles(boot_data, v, axis=0) for v in ci_vals] central_data = estimator(data, axis=0) # Plot the timeseries line to get its color line, = ax.plot(x, central_data, **kwargs) color = line.get_color() line.remove() kwargs.pop("color", None) # Use subroutines to plot the uncertainty if not hasattr(err_style, "__iter__"): err_style = [err_style] for style in err_style: # Grab the function from the global environment try: plot_func = globals()["_plot_%s" % style] except KeyError: raise ValueError("%s is not a valid err_style" % style) # Possibly set up to plot each observation in a different color if err_palette is not None and "obs" in style: orig_color = color color = color_palette(err_palette, len(data), desat=.99) plot_kwargs = dict(ax=ax, x=x, data=data, boot_data=boot_data, central_data=central_data, color=color, err_kws=err_kws) for ci_i in cis: plot_kwargs["ci"] = ci_i plot_func(**plot_kwargs) if err_palette is not None and "obs" in style: color = orig_color # Replot the central trace so it is prominent marker = kwargs.pop("marker", "" if interpolate else "o") linestyle = kwargs.pop("linestyle", "-" if interpolate else "") ax.plot(x, central_data, color=color, marker=marker, linestyle=linestyle, **kwargs) return ax
w_boot = zeros((iterations, len(w_ols))) for i in xrange(iterations): # Get an index vector to sample with replacement samp = randint(0, n_obs, n_obs) # Resample data and model samp_X = X[samp, :] samp_y = y[samp] # Fit the model on this iteration w_boot[i, :] = ols_fit(samp_X, samp_y) # Get the 95% confidence interval for each weight (across each column in # the design matrix) w_ols_ci = moss.percentiles(w_boot, [2.5, 97.5], axis=0) # The bar() function expects errorbar coordinates to be relative to bar height # We have a convenience function in our plotting library to convert these ebar_coords = seaborn.ci_to_errsize(w_ols_ci, w_ols) # Plot the confidence intervals as error bars on our barplot from above bar(arange(5) + .1, w, .4, label="actual weights") bar(arange(5) + .5, w_ols, .4, yerr=ebar_coords, color=colors[1], ecolor="gray", label="estimated weights")
def violin(vals, groupby=None, inner="box", color=None, positions=None, names=None, widths=.8, alpha=None, join_rm=False, kde_thresh=1e-2, inner_kws=None, ax=None, **kwargs): """Create a violin plot (a combination of boxplot and KDE plot). Parameters ---------- vals : array or sequence of arrays data to plot groupby : grouping object if `vals` is a Series, this is used to group inner : box | sticks | points plot quartiles or individual sample values inside violin color : mpl color, sequence of colors, or seaborn palette name inner violin colors positions : number or sequence of numbers position of first violin or positions of each violin widths : float width of each violin at maximum density alpha : float, optional transparancy of violin fill join_rm : boolean, optional if True, positions in the input arrays are treated as repeated measures and are joined with a line plot names : list of strings, optional names to plot on x axis, otherwise plots numbers kde_thresh : float, optional proportion of maximum at which to threshold the KDE curve inner_kws : dict, optional keyword arugments for inner plot ax : matplotlib axis, optional axis to plot on, otherwise creates new one Returns ------- ax : matplotlib axis axis with violin plot """ if ax is None: ax = plt.gca() if isinstance(vals, pd.DataFrame): if names is None: names = vals.columns if vals.columns.name is not None: xlabel = vals.columns.name else: xlabel = None ylabel = None vals = vals.values elif isinstance(vals, pd.Series) and groupby is not None: if hasattr(groupby, "name"): xlabel = groupby.name ylabel = vals.name grouped_vals = pd.groupby(vals, groupby).values if names is None: names = grouped_vals.index vals = grouped_vals.values else: xlabel = None ylabel = None if hasattr(vals, 'shape'): if len(vals.shape) == 1: if hasattr(vals[0], 'shape'): vals = list(vals) else: vals = [vals] elif len(vals.shape) == 2: nr, nc = vals.shape if nr == 1: vals = [vals] elif nc == 1: vals = [vals.ravel()] else: vals = [vals[:, i] for i in xrange(nc)] else: raise ValueError("Input x can have no more than 2 dimensions") if not hasattr(vals[0], '__len__'): vals = [vals] vals = [np.asarray(a, float) for a in vals] if color is None: colors = husl_palette(len(vals), l=.7) else: if hasattr(color, "__iter__") and not isinstance(color, tuple): colors = color else: try: color = mpl.colors.colorConverter.to_rgb(color) colors = [color for _ in vals] except ValueError: colors = color_palette(color, len(vals)) colors = [mpl.colors.colorConverter.to_rgb(c) for c in colors] colors = [desaturate(c, .7) for c in colors] light_vals = [colorsys.rgb_to_hls(*c)[1] for c in colors] l = min(light_vals) * .6 gray = (l, l, l) if inner_kws is None: inner_kws = {} if positions is None: positions = np.arange(1, len(vals) + 1) elif not hasattr(positions, "__iter__"): positions = np.arange(positions, len(vals) + positions) in_alpha = inner_kws.pop("alpha", .6 if inner == "points" else 1) in_alpha *= 1 if alpha is None else alpha in_color = inner_kws.pop("color", gray) in_marker = inner_kws.pop("marker", ".") in_lw = inner_kws.pop("lw", 1.5 if inner == "box" else .8) for i, a in enumerate(vals): x = positions[i] kde = stats.gaussian_kde(a) y = _kde_support(a, kde, 1000, kde_thresh) dens = kde(y) scl = 1 / (dens.max() / (widths / 2)) dens *= scl ax.fill_betweenx(y, x - dens, x + dens, alpha=alpha, color=colors[i]) if inner == "box": for quant in moss.percentiles(a, [25, 75]): q_x = kde(quant) * scl q_x = [x - q_x, x + q_x] ax.plot(q_x, [quant, quant], color=in_color, linestyle=":", linewidth=in_lw, **inner_kws) med = np.median(a) m_x = kde(med) * scl m_x = [x - m_x, x + m_x] ax.plot(m_x, [med, med], color=in_color, linestyle="--", linewidth=in_lw, **inner_kws) elif inner == "stick": x_vals = kde(a) * scl x_vals = [x - x_vals, x + x_vals] ax.plot(x_vals, [a, a], color=in_color, linewidth=in_lw, alpha=in_alpha, **inner_kws) elif inner == "points": x_vals = [x for _ in a] ax.plot(x_vals, a, in_marker, color=in_color, alpha=in_alpha, mew=0, **inner_kws) for side in [-1, 1]: ax.plot((side * dens) + x, y, c=gray, linewidth=1.5) if join_rm: ax.plot(range(1, len(vals) + 1), vals, color=in_color, alpha=2. / 3) ax.set_xticks(positions) if names is not None: if len(vals) != len(names): raise ValueError("Length of names list must match nuber of bins") ax.set_xticklabels(names) ax.set_xlim(positions[0] - .5, positions[-1] + .5) if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) ax.xaxis.grid(False) return ax
def regplot(x, y, data=None, corr_func=stats.pearsonr, func_name=None, xlabel="", ylabel="", ci=95, size=None, annotloc=None, color=None, reg_kws=None, scatter_kws=None, dist_kws=None, text_kws=None): """Scatterplot with regresion line, marginals, and correlation value. Parameters ---------- x : sequence or string Independent variable. y : sequence or string Dependent variable. data : dataframe, optional If dataframe is given, x, and y are interpreted as string keys for selecting to dataframe column names. corr_func : callable, optional Correlation function; expected to take two arrays and return a numeric or (statistic, pval) tuple. func_name : string, optional Use in lieu of function name for fit statistic annotation. xlabel, ylabel : string, optional Axis label names if inputs are not Pandas objects or to override. ci : int or None Confidence interval for the regression estimate. size: int Figure size (will be a square; only need one int). annotloc : two or three tuple Specified with (xpos, ypos [, horizontalalignment]). color : matplotlib color scheme Color of everything but the regression line; can be overridden by passing `color` to subfunc kwargs. {reg, scatter, dist, text}_kws: dicts Further keyword arguments for the constituent plots. """ # Interperet inputs if data is not None: if not xlabel: xlabel = x if not ylabel: ylabel = y x = data[x].values y = data[y].values else: if hasattr(x, "name") and not xlabel: if x.name is not None: xlabel = x.name if hasattr(y, "name") and not ylabel: if y.name is not None: ylabel = y.name x = np.asarray(x) y = np.asarray(y) # Set up the figure and axes size = mpl.rcParams["figure.figsize"][0] if size is None else size fig = plt.figure(figsize=(size, size)) ax_scatter = fig.add_axes([0.05, 0.05, 0.75, 0.75]) ax_x_marg = fig.add_axes([0.05, 0.82, 0.75, 0.13]) ax_y_marg = fig.add_axes([0.82, 0.05, 0.13, 0.75]) # Plot the scatter if scatter_kws is None: scatter_kws = {} if color is not None and "color" not in scatter_kws: scatter_kws.update(color=color) marker = scatter_kws.pop("markerstyle", "o") alpha_maker = stats.norm(0, 100) alpha = alpha_maker.pdf(len(x)) / alpha_maker.pdf(0) alpha = max(alpha, .1) alpha = scatter_kws.pop("alpha", alpha) ax_scatter.plot(x, y, marker, alpha=alpha, mew=0, **scatter_kws) ax_scatter.set_xlabel(xlabel) ax_scatter.set_ylabel(ylabel) # Marginal plots using our distplot function if dist_kws is None: dist_kws = {} if color is not None and "color" not in dist_kws: dist_kws.update(color=color) dist_kws["xlabel"] = False distplot(x, ax=ax_x_marg, **dist_kws) distplot(y, ax=ax_y_marg, vertical=True, **dist_kws) for ax in [ax_x_marg, ax_y_marg]: ax.set_xticklabels([]) ax.set_yticklabels([]) # Regression line plot xlim = ax_scatter.get_xlim() a, b = np.polyfit(x, y, 1) if reg_kws is None: reg_kws = {} reg_color = reg_kws.pop("color", "#222222") ax_scatter.plot(xlim, np.polyval([a, b], xlim), color=reg_color, **reg_kws) # Bootstrapped regression standard error if ci is not None: xx = np.linspace(xlim[0], xlim[1], 100) def _bootstrap_reg(x, y): fit = np.polyfit(x, y, 1) return np.polyval(fit, xx) boots = moss.bootstrap(x, y, func=_bootstrap_reg) ci_lims = [50 - ci / 2., 50 + ci / 2.] ci_band = moss.percentiles(boots, ci_lims, axis=0) ax_scatter.fill_between(xx, *ci_band, color=reg_color, alpha=.15) ax_scatter.set_xlim(xlim) # Calcluate a fit statistic and p value if func_name is None: func_name = corr_func.__name__ out = corr_func(x, y) try: s, p = out msg = "%s: %.3f (p=%.3g%s)" % (func_name, s, p, moss.sig_stars(p)) except TypeError: s = corr_func(x, y) msg = "%s: %.3f" % (func_name, s) if annotloc is None: xmin, xmax = xlim x_range = xmax - xmin # Assume the fit statistic is correlation-esque for some # intuition on where the fit annotation should go if s < 0: xloc, align = xmax - x_range * .02, "right" else: xloc, align = xmin + x_range * .02, "left" ymin, ymax = ax_scatter.get_ylim() y_range = ymax - ymin yloc = ymax - y_range * .02 else: if len(annotloc) == 3: xloc, yloc, align = annotloc else: xloc, yloc = annotloc align = "left" if text_kws is None: text_kws = {} ax_scatter.text(xloc, yloc, msg, ha=align, va="top", **text_kws) # Set the axes on the marginal plots ax_x_marg.set_xlim(ax_scatter.get_xlim()) ax_x_marg.set_yticks([]) ax_y_marg.set_ylim(ax_scatter.get_ylim()) ax_y_marg.set_xticks([])
def report_model(timeseries, sigmasquareds_file, zstat_files, r2_files): """Build the model report images, mostly from axial montages.""" sns.set() # Load the timeseries, get a mean image ts_img = nib.load(timeseries) ts_aff, ts_header = ts_img.get_affine(), ts_img.get_header() ts_data = ts_img.get_data() mean_data = ts_data.mean(axis=-1) mlow, mhigh = 0, moss.percentiles(mean_data, 98) # Get the plot params. OMG I need a general function for this n_slices = mean_data.shape[-1] n_row, n_col = n_slices // 8, 8 start = n_slices % n_col // 2 figsize = (10, 1.4 * n_row) spkws = dict(nrows=n_row, ncols=n_col, figsize=figsize, facecolor="k") savekws = dict(dpi=100, bbox_inches="tight", facecolor="k", edgecolor="k") def add_colorbar(f, cmap, low, high, left, width, fmt): cbar = np.outer(np.arange(0, 1, .01), np.ones(10)) cbar_ax = f.add_axes([left, 0, width, .03]) cbar_ax.imshow(cbar.T, aspect="auto", cmap=cmap) cbar_ax.axis("off") f.text(left - .01, .018, fmt % low, ha="right", va="center", color="white", size=13, weight="demibold") f.text(left + width + .01, .018, fmt % high, ha="left", va="center", color="white", size=13, weight="demibold") report = [] # Plot the residual image (sigmasquareds) ss = nib.load(sigmasquareds_file).get_data() sslow, sshigh = moss.percentiles(ss, [2, 98]) ss[mean_data == 0] = np.nan f, axes = plt.subplots(**spkws) for i, ax in enumerate(axes.ravel(), start): ax.imshow(mean_data[..., i].T, cmap="gray", vmin=mlow, vmax=mhigh, interpolation="nearest") ax.imshow(ss[..., i].T, cmap="PuRd_r", vmin=sslow, vmax=sshigh, alpha=.7) ax.axis("off") add_colorbar(f, "PuRd_r", sslow, sshigh, .35, .3, "%d") ss_png = op.abspath("sigmasquareds.png") f.savefig(ss_png, **savekws) plt.close(f) report.append(ss_png) # Now plot each zstat file for z_i, zname in enumerate(zstat_files, 1): zdata = nib.load(zname).get_data() pos = zdata.copy() pos[pos < 2.3] = np.nan neg = zdata.copy() neg[neg > -2.3] = np.nan zlow = 2.3 zhigh = max(np.abs(zdata).max(), 3.71) f, axes = plt.subplots(**spkws) for i, ax in enumerate(axes.ravel()): ax.imshow(mean_data[..., i].T, cmap="gray", vmin=mlow, vmax=mhigh) ax.imshow(pos[..., i].T, cmap="Reds_r", vmin=zlow, vmax=zhigh) ax.imshow(neg[..., i].T, cmap="Blues", vmin=-zhigh, vmax=-zlow) ax.axis("off") add_colorbar(f, "Blues", -zhigh, -zlow, .15, .3, "%.1f") add_colorbar(f, "Reds_r", zlow, zhigh, .55, .3, "%.1f") fname = op.abspath("zstat%d.png" % z_i) f.savefig(fname, **savekws) plt.close(f) report.append(fname) # Now the r_2 files for rname, cmap in zip(r2_files, ["GnBu_r", "YlGn_r", "OrRd_r"]): data = nib.load(rname).get_data() rhigh = moss.percentiles(np.nan_to_num(data), 99) f, axes = plt.subplots(**spkws) for i, ax in enumerate(axes.ravel(), start): ax.imshow(mean_data[..., i].T, cmap="gray", vmin=mlow, vmax=mhigh, interpolation="nearest") ax.imshow(data[..., i].T, cmap=cmap, vmin=0, vmax=rhigh, alpha=.7) ax.axis("off") add_colorbar(f, cmap, 0, rhigh, .35, .3, "%.2f") fname = op.abspath(op.basename(rname).replace(".nii.gz", ".png")) f.savefig(fname, **savekws) plt.close(f) report.append(fname) return report
def tsplot(x, data, err_style=["ci_band"], ci=68, interpolate=True, estimator=np.mean, n_boot=10000, smooth=False, err_palette=None, ax=None, **kwargs): """Plot timeseries from a set of observations. Parameters ---------- x : n_tp array x values data : n_obs x n_tp array array of timeseries data where first axis is e.g. subjects err_style : list of strings names of ways to plot uncertainty across observations from set of {ci_band, ci_bars, boot_traces, book_kde, obs_traces, obs_points} ci : int or list of ints confidence interaval size(s). if a list, it will stack the error plots for each confidence interval estimator : callable function to determine centralt tendency and to pass to bootstrap must take an ``axis`` argument n_boot : int number of bootstrap iterations smooth : boolean whether to perform a smooth bootstrap (resample from KDE) ax : axis object, optional plot in given axis; if None creates a new figure kwargs : further keyword arguments for main call to plot() Returns ------- ax : matplotlib axis axis with plot data """ if ax is None: ax = plt.subplot(111) # Bootstrap the data for confidence intervals boot_data = moss.bootstrap(data, n_boot=n_boot, smooth=smooth, axis=0, func=estimator) ci_list = hasattr(ci, "__iter__") if not ci_list: ci = [ci] ci_vals = [(50 - w / 2, 50 + w / 2) for w in ci] cis = [moss.percentiles(boot_data, ci, axis=0) for ci in ci_vals] central_data = estimator(data, axis=0) # Plot the timeseries line to get its color line, = ax.plot(x, central_data, **kwargs) color = line.get_color() line.remove() kwargs.pop("color", None) # Use subroutines to plot the uncertainty for style in err_style: # Grab the function from the global environment try: plot_func = globals()["_plot_%s" % style] except KeyError: raise ValueError("%s is not a valid err_style" % style) # Possibly set up to plot each observation in a different color if err_palette is not None and "obs" in style: orig_color = color color = color_palette(err_palette, len(data), desat=.99) plot_kwargs = dict(ax=ax, x=x, data=data, boot_data=boot_data, central_data=central_data, color=color) for ci_i in cis: plot_kwargs["ci"] = ci_i plot_func(**plot_kwargs) if err_palette is not None and "obs" in style: color = orig_color # Replot the central trace so it is prominent marker = kwargs.pop("marker", "" if interpolate else "o") linestyle = kwargs.pop("linestyle", "-" if interpolate else "") ax.plot(x, central_data, color=color, marker=marker, linestyle=linestyle, **kwargs) return ax
def lmplot(x, y, data, color=None, row=None, col=None, col_wrap=None, x_estimator=None, x_ci=95, x_bins=None, n_boot=5000, fit_reg=True, order=1, ci=95, logistic=False, truncate=False, x_partial=None, y_partial=None, x_jitter=None, y_jitter=None, sharex=True, sharey=True, palette="husl", size=None, scatter_kws=None, line_kws=None, palette_kws=None): """Plot a linear model with faceting, color binning, and other options. Parameters ---------- x, y : strings Column names in `data` DataFrame for x and y variables. data : DataFrame Dource of data for the model. color : string, optional DataFrame column name to group the model by color. row, col : strings, optional DataFrame column names to make separate plot facets. col_wrap : int, optional Wrap col variable at this width - cannot be used with row facet. x_estimator : callable, optional Interpret X values as factor labels and use this function to plot the point estimate and bootstrapped CI. x_ci : int optional Size of confidence interval for x_estimator error bars. x_bins : sequence of floats, optional Bin the x variable with these values. Implies that x_estimator is mean, unless otherwise provided. n_boot : int, optional Number of bootstrap iterations to perform. fit_reg : bool, optional If True fit a regression model by color/row/col and plot. order : int, optional Order of the regression polynomial to fit. ci : int, optional Confidence interval for the regression line. logistic : bool, optional Fit the regression line with logistic regression. truncate : bool, optional If True, only fit line from data min to data max. {x, y}_partial : string or list of strings, optional Regress these variables out of the factors before plotting. {x, y}_jitter : float, optional Parameters for uniformly distributed random noise added to positions. sharex, sharey : bools, optional Only relevant if faceting; passed to plt.subplots. palette : seaborn color palette argument If using separate plots by color, draw with this color palette. size : float, optional Size (plots are square) for each plot facet. {scatter, line}_kws : dictionary Keyword arguments to pass to the underlying plot functions. palette_kws : dictionary Keyword arguments for seaborn.color_palette. """ # TODO # - legend when fit_line is False # First sort out the general figure layout if size is None: size = mpl.rcParams["figure.figsize"][1] if col is None and col_wrap is not None: raise ValueError("Need column facet variable for `col_wrap`") if row is not None and col_wrap is not None: raise ValueError("Cannot facet rows when using `col_wrap`") nrow = 1 if row is None else len(data[row].unique()) ncol = 1 if col is None else len(data[col].unique()) if col_wrap is not None: ncol = col_wrap nrow = int(np.ceil(len(data[col].unique()) / col_wrap)) f, axes = plt.subplots(nrow, ncol, sharex=sharex, sharey=sharey, figsize=(size * ncol, size * nrow)) axes = np.atleast_2d(axes).reshape(nrow, ncol) if nrow == 1 or col_wrap is not None: row_masks = [np.repeat(True, len(data))] else: row_vals = np.sort(data[row].unique()) row_masks = [data[row] == val for val in row_vals] if ncol == 1: col_masks = [np.repeat(True, len(data))] else: col_vals = np.sort(data[col].unique()) col_masks = [data[col] == val for val in col_vals] if x_bins is not None: x_estimator = np.mean if x_estimator is None else x_estimator x_bins = np.c_[x_bins] if x_partial is not None: if not isinstance(x_partial, list): x_partial = [x_partial] if y_partial is not None: if not isinstance(y_partial, list): y_partial = [y_partial] if palette_kws is None: palette_kws = {} # Sort out the plot colors color_factor = color if color is None: hue_masks = [np.repeat(True, len(data))] colors = ["#222222"] else: hue_vals = np.sort(data[color].unique()) hue_masks = [data[color] == val for val in hue_vals] colors = color_palette(palette, len(hue_masks), **palette_kws) # Default keyword arguments for plot components if scatter_kws is None: scatter_kws = {} if line_kws is None: line_kws = {} # First walk through the facets and plot the scatters scatter_ms = scatter_kws.pop("ms", 4) scatter_mew = mew = scatter_kws.pop("mew", 0) scatter_alpha = mew = scatter_kws.pop("alpha", .77) for row_i, row_mask in enumerate(row_masks): for col_j, col_mask in enumerate(col_masks): if col_wrap is not None: f_row = col_j // ncol f_col = col_j % ncol else: f_row, f_col = row_i, col_j ax = axes[f_row, f_col] if f_row + 1 == nrow: ax.set_xlabel(x) if f_col == 0: ax.set_ylabel(y) # Title the plot if we are faceting title = "" if row is not None: title += "%s = %s" % (row, row_vals[row_i]) if row is not None and col is not None: title += " | " if col is not None: title += "%s = %s" % (col, col_vals[col_j]) if size < 3: title = title.replace(" | ", "\n") ax.set_title(title) for hue_k, hue_mask in enumerate(hue_masks): color = colors[hue_k] data_ijk = data[row_mask & col_mask & hue_mask] if x_estimator is not None: ms = scatter_kws.pop("ms", 7) mew = scatter_kws.pop("mew", 0) if x_bins is None: x_vals = data_ijk[x].unique() x_data = data_ijk[x] else: dist = distance.cdist(np.c_[data_ijk[x]], x_bins) x_vals = x_bins.ravel() x_data = x_bins[np.argmin(dist, axis=1)].ravel() y_vals = data_ijk[y] if y_partial is not None: for var in y_partial: conf = data_ijk[var] conf -= conf.mean() y_mean = y_vals.mean() y_vals = moss.vector_reject(y_vals - y_mean, conf) y_vals += y_mean y_grouped = [np.array(y_vals[x_data == v]) for v in x_vals] y_est = [x_estimator(y_i) for y_i in y_grouped] y_boots = [moss.bootstrap(np.array(y_i), func=x_estimator, n_boot=n_boot) for y_i in y_grouped] ci_lims = [50 - x_ci / 2., 50 + x_ci / 2.] y_ci = [moss.percentiles(y_i, ci_lims) for y_i in y_boots] y_error = ci_to_errsize(np.transpose(y_ci), y_est) ax.plot(x_vals, y_est, "o", mew=mew, ms=ms, color=color, **scatter_kws) ax.errorbar(x_vals, y_est, y_error, fmt=None, ecolor=color) else: x_ = data_ijk[x] y_ = data_ijk[y] if x_partial is not None: for var in x_partial: conf = data_ijk[var] conf -= conf.mean() x_mean = x_.mean() x_ = moss.vector_reject(x_ - x_mean, conf) x_ += x_mean if y_partial is not None: for var in y_partial: conf = data_ijk[var] conf -= conf.mean() y_mean = y_.mean() y_ = moss.vector_reject(y_ - y_mean, conf) y_ += y_mean if x_jitter is not None: x_ += np.random.uniform(-x_jitter, x_jitter, x_.shape) if y_jitter is not None: y_ += np.random.uniform(-y_jitter, y_jitter, y_.shape) ax.plot(x_, y_, "o", color=color, alpha=scatter_alpha, mew=scatter_mew, ms=scatter_ms, **scatter_kws) for ax_i in np.ravel(axes): ax_i.set_xmargin(.05) ax_i.autoscale_view() # Now walk through again and plot the regression estimate # and a confidence interval for the regression line if fit_reg: for row_i, row_mask in enumerate(row_masks): for col_j, col_mask in enumerate(col_masks): if col_wrap is not None: f_row = col_j // ncol f_col = col_j % ncol else: f_row, f_col = row_i, col_j ax = axes[f_row, f_col] xlim = ax.get_xlim() for hue_k, hue_mask in enumerate(hue_masks): color = colors[hue_k] data_ijk = data[row_mask & col_mask & hue_mask] x_vals = np.array(data_ijk[x]) y_vals = np.array(data_ijk[y]) if not len(x_vals): continue # Sort out the limit of the fit if truncate: xx = np.linspace(x_vals.min(), x_vals.max(), 100) else: xx = np.linspace(xlim[0], xlim[1], 100) xx_ = sm.add_constant(xx, prepend=True) # Inner function to bootstrap the regression def _regress(x, y): if logistic: x_ = sm.add_constant(x, prepend=True) fit = sm.GLM(y, x_, family=sm.families.Binomial()).fit() reg = fit.predict(xx_) else: fit = np.polyfit(x, y, order) reg = np.polyval(fit, xx) return reg # Remove nuisance variables with vector rejection if x_partial is not None: for var in x_partial: conf = data_ijk[var] conf -= conf.mean() x_mean = x_vals.mean() x_vals = moss.vector_reject(x_vals - x_mean, conf) x_vals += x_mean if y_partial is not None: for var in y_partial: conf = data_ijk[var] conf -= conf.mean() y_mean = y_vals.mean() y_vals = moss.vector_reject(y_vals - y_mean, conf) y_vals += y_mean # Regression line confidence interval if ci is not None: ci_lims = [50 - ci / 2., 50 + ci / 2.] boots = moss.bootstrap(x_vals, y_vals, func=_regress, n_boot=n_boot) ci_band = moss.percentiles(boots, ci_lims, axis=0) ax.fill_between(xx, *ci_band, color=color, alpha=.15) # Regression line reg = _regress(x_vals, y_vals) if color_factor is None: label = "" else: label = hue_vals[hue_k] ax.plot(xx, reg, color=color, label=str(label), **line_kws) ax.set_xlim(xlim) # Plot the legend on the upper left facet and adjust the layout if color_factor is not None and color_factor not in [row, col]: axes[0, 0].legend(loc="best", title=color_factor) plt.tight_layout()
from scipy.optimize import curve_fit scatter(x_pop, y_pop, facecolor='none') l, r = xlim() xx = linspace(l, r, 200) # Loop over each bootstrap iteration and compute that models prediction # for all of the x values y_model = [] for i, w in enumerate(w_ols_boot): y_i = dot(xx, w_ols_boot[i][0]) + w_ols_boot[i][1] y_model.append(y_i) # Plot the linear prediction and transparent error bars pcts = [16, 84] plot(xx, median(y_model, axis=0)) lin_ci = moss.percentiles(y_model, pcts, axis=0) fill_between(xx, *lin_ci, color=colors[0], alpha=.2) # Now derive the nonlinear predictions and plot y_model_nlin = [] for i, w in enumerate(w_nonlin_boot): y_i = modelfunc(xx, *w) y_model_nlin.append(y_i) plot(xx, median(y_model_nlin, axis=0)) nlin_ci = moss.percentiles(y_model_nlin, pcts, axis=0) fill_between(xx, *nlin_ci, color=colors[1], alpha=.2) scatter(x_pop, y_pop, facecolor='none') plot(xx, mean(y_model_nlin, 0), label="mean", color=colors[3]) plot(xx, median(y_model_nlin, 0), label="median", color=colors[4]) legend(loc="best")
ax = plt.subplot(gs[:2, 0]) plt.title("lmplot()") n = 80 c = "#222222" rs = np.random.RandomState(5) x = rs.normal(4, 1, n) y = 2 + 1.5 * x + rs.normal(0, 3, n) ax.plot(x, y, "o", c=c, alpha=.8) xx = np.linspace(1, 7, 100) lmpred = lambda x, y: np.polyval(np.polyfit(x, y, 1), xx) yy = lmpred(x, y) ax.plot(xx, yy, c=c) boots = moss.bootstrap(x, y, func=lmpred, n_boot=100) ci = moss.percentiles(boots, [2.5, 97.5], 0) ax.fill_between(xx, *ci, alpha=.15, color=c) # Timeseries plot # --------------- ax = plt.subplot(gs[:2, 1]) plt.title("tsplot()") n = 20 t = 10 ax.set_xlim(0, t) x = np.linspace(0, t, 100) s = np.array([stats.gamma.pdf(x, a) for a in [3, 5, 7]]) d = s[:, np.newaxis, :] rs = np.random.RandomState(24)
def lmplot(x, y, data, color=None, row=None, col=None, x_estimator=None, x_ci=95, fit_line=True, ci=95, truncate=False, sharex=True, sharey=True, palette="hls", size=None, scatter_kws=None, line_kws=None, palette_kws=None): """Plot a linear model from a DataFrame. Parameters ---------- x, y : strings column names in `data` DataFrame for x and y variables data : DataFrame source of data for the model color : string, optional DataFrame column name to group the model by color row, col : strings, optional DataFrame column names to make separate plot facets x_estimator : callable, optional Interpret X values as factor labels and use this function to plot the point estimate and bootstrapped CI x_ci : int optional size of confidence interval for x_estimator error bars fit_line : bool, optional if True fit a regression line by color/row/col and plot ci : int, optional confidence interval for the regression line truncate : bool, optional if True, only fit line from data min to data max sharex, sharey : bools, optional only relevant if faceting; passed to plt.subplots palette : seaborn color palette argument if using separate plots by color, draw with this color palette size : float, optional size (plots are square) for each plot facet {scatter, line}_kws : dictionary keyword arguments to pass to the underlying plot functions palette_kws : dictionary keyword arguments for seaborn.color_palette """ # TODO # - position_{dodge, jitter} # - legend when fit_line is False # - truncate fit # - wrap title when wide # - wrap columns # First sort out the general figure layout if size is None: size = mpl.rcParams["figure.figsize"][1] nrow = 1 if row is None else len(data[row].unique()) ncol = 1 if col is None else len(data[col].unique()) f, axes = plt.subplots(nrow, ncol, sharex=sharex, sharey=sharey, figsize=(size * ncol, size * nrow)) axes = np.atleast_2d(axes).reshape(nrow, ncol) if nrow == 1: row_masks = [np.repeat(True, len(data))] else: row_vals = np.sort(data[row].unique()) row_masks = [data[row] == val for val in row_vals] if ncol == 1: col_masks = [np.repeat(True, len(data))] else: col_vals = np.sort(data[col].unique()) col_masks = [data[col] == val for val in col_vals] if palette_kws is None: palette_kws = {} # Sort out the plot colors color_factor = color if color is None: hue_masks = [np.repeat(True, len(data))] colors = ["#222222"] else: hue_vals = np.sort(data[color].unique()) hue_masks = [data[color] == val for val in hue_vals] colors = color_palette(palette, len(hue_masks), **palette_kws) # Default keyword arguments for plot components if scatter_kws is None: scatter_kws = {} if line_kws is None: line_kws = {} # First walk through the facets and plot the scatters for row_i, row_mask in enumerate(row_masks): for col_j, col_mask in enumerate(col_masks): ax = axes[row_i, col_j] if not sharex or (row_i + 1 == len(row_masks)): ax.set_xlabel(x) if not sharey or col_j == 0: ax.set_ylabel(y) # Title the plot if we are faceting title = "" if row is not None: title += "%s = %s" % (row, row_vals[row_i]) if row is not None and col is not None: title += " | " if col is not None: title += "%s = %s" % (col, col_vals[col_j]) ax.set_title(title) for hue_k, hue_mask in enumerate(hue_masks): color = colors[hue_k] data_ijk = data[row_mask & col_mask & hue_mask] if x_estimator is not None: ms = scatter_kws.pop("ms", 7) mew = scatter_kws.pop("mew", 0) x_vals = data_ijk[x].unique() y_grouped = [np.array(data_ijk[y][data_ijk[x] == v]) for v in x_vals] y_est = [x_estimator(y_i) for y_i in y_grouped] y_boots = [moss.bootstrap(np.array(y_i), func=x_estimator) for y_i in y_grouped] ci_lims = [50 - x_ci / 2., 50 + x_ci / 2.] y_ci = [moss.percentiles(y_i, ci_lims) for y_i in y_boots] y_error = ci_to_errsize(np.transpose(y_ci), y_est) ax.plot(x_vals, y_est, "o", mew=mew, ms=ms, color=color, **scatter_kws) ax.errorbar(x_vals, y_est, y_error, fmt=None, ecolor=color) else: ms = scatter_kws.pop("ms", 4) mew = scatter_kws.pop("mew", 0) ax.plot(data_ijk[x], data_ijk[y], "o", color=color, mew=mew, ms=ms, **scatter_kws) for ax_i in np.ravel(axes): ax_i.set_xmargin(.05) ax_i.autoscale_view() # Now walk through again and plot the regression estimate # and a confidence interval for the regression line if fit_line: for row_i, row_mask in enumerate(row_masks): for col_j, col_mask in enumerate(col_masks): ax = axes[row_i, col_j] xlim = ax.get_xlim() for hue_k, hue_mask in enumerate(hue_masks): color = colors[hue_k] data_ijk = data[row_mask & col_mask & hue_mask] x_vals = np.array(data_ijk[x]) y_vals = np.array(data_ijk[y]) # Sort out the limit of the fit if truncate: xx = np.linspace(x_vals.min(), x_vals.max(), 100) else: xx = np.linspace(xlim[0], xlim[1], 100) # Inner function to bootstrap the regression def _bootstrap_reg(x, y): fit = np.polyfit(x, y, 1) return np.polyval(fit, xx) # Regression line confidence interval if ci is not None: ci_lims = [50 - ci / 2., 50 + ci / 2.] boots = moss.bootstrap(x_vals, y_vals, func=_bootstrap_reg) ci_band = moss.percentiles(boots, ci_lims, axis=0) ax.fill_between(xx, *ci_band, color=color, alpha=.15) fit = np.polyfit(x_vals, y_vals, 1) reg = np.polyval(fit, xx) if color_factor is None: label = "" else: label = hue_vals[hue_k] ax.plot(xx, reg, color=color, label=str(label), **line_kws) ax.set_xlim(xlim) # Plot the legend on the upper left facet and adjust the layout if color_factor is not None: axes[0, 0].legend(loc="best", title=color_factor) plt.tight_layout()