Ejemplo n.º 1
0
def write_mask_report(mask_file, orig_file, mean_file):
    """Write pngs with the mask and mean iamges."""
    mean = nib.load(mean_file).get_data()
    orig = nib.load(orig_file).get_data()
    mask = nib.load(mask_file).get_data().astype(float)
    mask[mask == 0] = np.nan

    n_slices = mean.shape[-1]
    n_row, n_col = n_slices // 8, 8
    start = n_slices % n_col // 2
    figsize = (10, 1.375 * n_row)

    # Write the functional mask image
    f, axes = plt.subplots(n_row, n_col, figsize=figsize, facecolor="k")
    vmin, vmax = 0, moss.percentiles(orig, 98)

    cmap = mpl.colors.ListedColormap(["MediumSpringGreen"])
    for i, ax in enumerate(axes.ravel(), start):
        ax.imshow(orig[..., i].T, cmap="gray", vmin=vmin, vmax=vmax)
        ax.imshow(mask[..., i].T, alpha=.6, cmap=cmap)
        ax.set_xticks([])
        ax.set_yticks([])
    f.subplots_adjust(hspace=1e-5, wspace=1e-5)
    mask_png = os.path.abspath("functional_mask.png")
    f.savefig(mask_png, dpi=100, bbox_inches="tight",
              facecolor="k", edgecolor="k")
    plt.close(f)

    # Write the mean func image
    f, axes = plt.subplots(n_row, n_col, figsize=figsize, facecolor="k")
    vmin, vmax = 0, moss.percentiles(mean, 98)

    for i, ax in enumerate(axes.ravel(), start):
        ax.imshow(mean[..., i].T, cmap="gray", vmin=vmin, vmax=vmax)
        ax.imshow(mean[..., i].T, cmap="hot", alpha=.6,
                  vmin=vmin, vmax=vmax)
        ax.set_xticks([])
        ax.set_yticks([])
    f.subplots_adjust(hspace=1e-5, wspace=1e-5)
    mean_png = os.path.abspath("mean_func.png")
    f.savefig(mean_png, dpi=100, bbox_inches="tight",
              facecolor="k", edgecolor="k")
    plt.close(f)

    return [mask_png, mean_png]
Ejemplo n.º 2
0
def realign_report(target_file, realign_params, displace_params):
    """Create files summarizing the motion correction."""
    # Create a DataFrame with the 6 motion parameters
    rot = ["rot_" + dim for dim in ["x", "y", "z"]]
    trans = ["trans_" + dim for dim in ["x", "y", "z"]]
    df = pd.DataFrame(np.loadtxt(realign_params),
                      columns=rot + trans)

    abs, rel = displace_params
    df["displace_abs"] = np.loadtxt(abs)
    df["displace_rel"] = pd.Series(np.loadtxt(rel), index=df.index[1:])
    df.loc[0, "displace_rel"] = 0
    motion_file = os.path.abspath("realignment_params.csv")
    df.to_csv(motion_file, index=False)

    # Write the motion plots
    seaborn.set()
    seaborn.set_color_palette("husl", 3)
    f, (ax_rot, ax_trans) = plt.subplots(2, 1,
                                         figsize=(8, 3.75),
                                         sharex=True)
    ax_rot.plot(df[rot] * 100)
    ax_rot.axhline(0, c="#444444", ls="--", zorder=1)
    ax_trans.plot(df[trans])
    ax_trans.axhline(0, c="#444444", ls="--", zorder=1)
    ax_rot.set_xlim(0, len(df) - 1)

    ax_rot.set_ylabel(r"Rotations (rad $\times$ 100)")
    ax_trans.set_ylabel("Translations (mm)")
    plt.tight_layout()

    plot_file = os.path.abspath("realignment_plots.png")
    f.savefig(plot_file, dpi=100, bbox_inches="tight")
    plt.close(f)

    # Write the example func plot
    data = nib.load(target_file).get_data()
    n_slices = data.shape[-1]
    n_row, n_col = n_slices // 8, 8
    start = n_slices % n_col // 2
    figsize = (10, 1.375 * n_row)
    f, axes = plt.subplots(n_row, n_col, figsize=figsize, facecolor="k")

    vmin, vmax = 0, moss.percentiles(data, 99)
    for i, ax in enumerate(axes.ravel(), start):
        ax.imshow(data[..., i].T, cmap="gray", vmin=vmin, vmax=vmax)
        ax.set_xticks([])
        ax.set_yticks([])
    f.subplots_adjust(hspace=1e-5, wspace=1e-5)
    target_file = os.path.abspath("example_func.png")
    f.savefig(target_file, dpi=100, bbox_inches="tight",
              facecolor="k", edgecolor="k")
    plt.close(f)

    return [motion_file, plot_file, target_file], motion_file
Ejemplo n.º 3
0
def write_coreg_plot(subject_id, in_file):
    """Plot the wm surface edges on the mean functional."""
    bold = nib.load(in_file).get_data()

    # Load the white matter volume from recon-all
    subj_dir = os.environ["SUBJECTS_DIR"]
    wm_file = os.path.join(subj_dir, subject_id, "mri/wm.mgz")
    wm = nib.load(wm_file).get_data()

    # Find the limits of the data
    # note that FS conformed space is not (x, y, z)
    xdata = np.flatnonzero(bold.any(axis=1).any(axis=1))
    xmin, xmax = xdata.min(), xdata.max()
    ydata = np.flatnonzero(bold.any(axis=0).any(axis=0))
    ymin, ymax = ydata.min(), ydata.max()
    zdata = np.flatnonzero(bold.any(axis=0).any(axis=1))
    zmin, zmax = zdata.min() + 10, zdata.max() - 25

    # Figure out the plot parameters
    n_slices = (zmax - zmin) // 3
    n_row, n_col = n_slices // 8, 8
    start = n_slices % n_col // 2 + zmin
    figsize = (10, 1.375 * n_row)
    slices = (start + np.arange(zmax - zmin))[::3][:n_slices]

    # Draw the slices and save
    vmin, vmax = 0, moss.percentiles(bold, 99)
    f, axes = plt.subplots(n_row, n_col, figsize=figsize, facecolor="k")
    cmap = mpl.colors.ListedColormap(["#C41E3A"])
    for i, ax in enumerate(reversed(axes.ravel())):
        i = slices[i]
        ax.imshow(np.flipud(bold[xmin:xmax, i, ymin:ymax].T),
                  cmap="gray", vmin=vmin, vmax=vmax)
        try:
            ax.contour(np.flipud(wm[xmin:xmax, i, ymin:ymax].T),
                       linewidths=.5, cmap=cmap)
        except ValueError:
            pass
        ax.set_xticks([])
        ax.set_yticks([])

    out_file = os.path.abspath("func2anat.png")
    plt.savefig(out_file, dpi=100, bbox_inches="tight",
                facecolor="k", edgecolor="k")
    plt.close(f)
    return out_file
Ejemplo n.º 4
0
def lmplot(ax):

    n = 80
    c = "#222222"
    rs = np.random.RandomState(5)
    x = rs.normal(4, 1, n)
    y = 2 + 1.5 * x + rs.normal(0, 3, n)
    ax.plot(x, y, "o", c=c, alpha=.8)

    xx = np.linspace(1 + 1e-9, 7 - 1e-9, 100)
    lmpred = lambda x, y: np.polyval(np.polyfit(x, y, 1), xx)
    yy = lmpred(x, y)
    ax.plot(xx, yy, c=c)
    boots = moss.bootstrap(x, y, func=lmpred, n_boot=100)
    ci = moss.percentiles(boots, [2.5, 97.5], 0)
    ax.fill_between(xx, *ci, alpha=.15, color=c)
    ax.set_title("lmplot()")
Ejemplo n.º 5
0
def lmplot(ax):

    n = 80
    c = "#222222"
    rs = np.random.RandomState(5)
    x = rs.normal(4, 1, n)
    y = 2 + 1.5 * x + rs.normal(0, 3, n)
    ax.plot(x, y, "o", c=c, alpha=.8)

    xx = np.linspace(1 + 1e-9, 7 - 1e-9, 100)
    lmpred = lambda x, y: np.polyval(np.polyfit(x, y, 1), xx)
    yy = lmpred(x, y)
    ax.plot(xx, yy, c=c)
    boots = moss.bootstrap(x, y, func=lmpred, n_boot=100)
    ci = moss.percentiles(boots, [2.5, 97.5], 0)
    ax.fill_between(xx, *ci, alpha=.15, color=c)
    ax.set_title("lmplot()")
Ejemplo n.º 6
0
def _ts_kde(ax, x, data, color, **kwargs):
    """Upsample over time and plot a KDE of the bootstrap distribution."""
    kde_data = []
    y_min, y_max = moss.percentiles(data, [1, 99])
    y_vals = np.linspace(y_min, y_max, 100)
    upsampler = interpolate.interp1d(x, data)
    data_upsample = upsampler(np.linspace(x.min(), x.max(), 100))
    for pt_data in data_upsample.T:
        pt_kde = stats.kde.gaussian_kde(pt_data)
        kde_data.append(pt_kde(y_vals))
    kde_data = np.transpose(kde_data)
    rgb = mpl.colors.ColorConverter().to_rgb(color)
    img = np.zeros((kde_data.shape[0], kde_data.shape[1], 4))
    img[:, :, :3] = rgb
    kde_data /= kde_data.max(axis=0)
    kde_data[kde_data > 1] = 1
    img[:, :, 3] = kde_data
    ax.imshow(img, interpolation="spline16", zorder=1,
              extent=(x.min(), x.max(), y_min, y_max),
              aspect="auto", origin="lower")
Ejemplo n.º 7
0
def _ts_kde(ax, x, data, color, **kwargs):
    """Upsample over time and plot a KDE of the bootstrap distribution."""
    kde_data = []
    y_min, y_max = moss.percentiles(data, [1, 99])
    y_vals = np.linspace(y_min, y_max, 100)
    upsampler = interpolate.interp1d(x, data)
    data_upsample = upsampler(np.linspace(x.min(), x.max(), 100))
    for pt_data in data_upsample.T:
        pt_kde = stats.kde.gaussian_kde(pt_data)
        kde_data.append(pt_kde(y_vals))
    kde_data = np.transpose(kde_data)
    rgb = mpl.colors.ColorConverter().to_rgb(color)
    img = np.zeros((kde_data.shape[0], kde_data.shape[1], 4))
    img[:, :, :3] = rgb
    kde_data /= kde_data.max(axis=0)
    kde_data[kde_data > 1] = 1
    img[:, :, 3] = kde_data
    ax.imshow(img, interpolation="spline16", zorder=1,
              extent=(x.min(), x.max(), y_min, y_max),
              aspect="auto", origin="lower")
Ejemplo n.º 8
0
def refine_mask(timeseries, mask_file):
    """Improve brain mask by thresholding and dilating masked timeseries."""
    ts_img = nib.load(timeseries)
    ts_data = ts_img.get_data()

    mask_img = nib.load(mask_file)

    # Find a robust 10% threshold and apply it to the timeseries
    rmin, rmax = moss.percentiles(ts_data, [2, 98])
    thresh = rmin + 0.1 * (rmax + rmin)
    ts_data[ts_data < thresh] = 0
    ts_min = ts_data.min(axis=-1)
    mask = ts_min > 0

    # Dilate the resulting mask by one voxel
    dilator = sp.ndimage.generate_binary_structure(3, 3)
    mask = sp.ndimage.binary_dilation(mask, dilator)

    # Mask the timeseries and save it
    ts_data[~mask] = 0
    timeseries = os.path.abspath("timeseries_masked.nii.gz")
    new_ts = nib.Nifti1Image(ts_data,
                             ts_img.get_affine(),
                             ts_img.get_header())
    new_ts.to_filename(timeseries)

    # Save the mask image
    mask_file = os.path.abspath("functional_mask.nii.gz")
    new_mask = nib.Nifti1Image(mask,
                               mask_img.get_affine(),
                               mask_img.get_header())
    new_mask.to_filename(mask_file)

    # Make a new mean functional image and save it
    mean_file = os.path.abspath("mean_func.nii.gz")
    new_mean = nib.Nifti1Image(ts_data.mean(axis=-1),
                               ts_img.get_affine(),
                               ts_img.get_header())
    new_mean.to_filename(mean_file)

    return timeseries, mask_file, mean_file
Ejemplo n.º 9
0
def violin(vals,
           groupby=None,
           inner="box",
           color=None,
           positions=None,
           names=None,
           widths=.8,
           alpha=None,
           join_rm=False,
           kde_thresh=1e-2,
           inner_kws=None,
           ax=None,
           **kwargs):
    """Create a violin plot (a combination of boxplot and KDE plot).

    Parameters
    ----------
    vals : array or sequence of arrays
        data to plot
    groupby : grouping object
        if `vals` is a Series, this is used to group
    inner : box | sticks | points
        plot quartiles or individual sample values inside violin
    color : mpl color, sequence of colors, or seaborn palette name
        inner violin colors
    positions : number or sequence of numbers
        position of first violin or positions of each violin
    widths : float
        width of each violin at maximum density
    alpha : float, optional
        transparancy of violin fill
    join_rm : boolean, optional
        if True, positions in the input arrays are treated as repeated
        measures and are joined with a line plot
    names : list of strings, optional
        names to plot on x axis, otherwise plots numbers
    kde_thresh : float, optional
        proportion of maximum at which to threshold the KDE curve
    inner_kws : dict, optional
        keyword arugments for inner plot
    ax : matplotlib axis, optional
        axis to plot on, otherwise creates new one

    Returns
    -------
    ax : matplotlib axis
        axis with violin plot

    """
    if ax is None:
        ax = plt.gca()

    if isinstance(vals, pd.DataFrame):
        if names is None:
            names = vals.columns
        if vals.columns.name is not None:
            xlabel = vals.columns.name
        else:
            xlabel = None
        ylabel = None
        vals = vals.values

    elif isinstance(vals, pd.Series) and groupby is not None:
        if hasattr(groupby, "name"):
            xlabel = groupby.name
        if names is None:
            names = np.sort(pd.unique(groupby))
        ylabel = vals.name
        grouped_vals = pd.groupby(vals, groupby).values
        vals = grouped_vals.values
    else:
        xlabel = None
        ylabel = None

    if hasattr(vals, 'shape'):
        if len(vals.shape) == 1:
            if hasattr(vals[0], 'shape'):
                vals = list(vals)
            else:
                vals = [vals]
        elif len(vals.shape) == 2:
            nr, nc = vals.shape
            if nr == 1:
                vals = [vals]
            elif nc == 1:
                vals = [vals.ravel()]
            else:
                vals = [vals[:, i] for i in xrange(nc)]
        else:
            raise ValueError("Input x can have no more than 2 dimensions")
    if not hasattr(vals[0], '__len__'):
        vals = [vals]

    vals = [np.asarray(a, float) for a in vals]

    if color is None:
        colors = husl_palette(len(vals), l=.7)
    else:
        if hasattr(color, "__iter__") and not isinstance(color, tuple):
            colors = color
        else:
            try:
                color = mpl.colors.colorConverter.to_rgb(color)
                colors = [color for _ in vals]
            except ValueError:
                colors = color_palette(color, len(vals))

    colors = [mpl.colors.colorConverter.to_rgb(c) for c in colors]
    colors = [desaturate(c, .7) for c in colors]

    light_vals = [colorsys.rgb_to_hls(*c)[1] for c in colors]
    l = min(light_vals) * .6
    gray = (l, l, l)

    if inner_kws is None:
        inner_kws = {}

    if positions is None:
        positions = np.arange(1, len(vals) + 1)
    elif not hasattr(positions, "__iter__"):
        positions = np.arange(positions, len(vals) + positions)

    in_alpha = inner_kws.pop("alpha", .6 if inner == "points" else 1)
    in_alpha *= 1 if alpha is None else alpha
    in_color = inner_kws.pop("color", gray)
    in_marker = inner_kws.pop("marker", ".")
    in_lw = inner_kws.pop("lw", 1.5 if inner == "box" else .8)

    for i, a in enumerate(vals):
        x = positions[i]
        kde = stats.gaussian_kde(a)
        y = _kde_support(a, kde, 1000, kde_thresh)
        dens = kde(y)
        scl = 1 / (dens.max() / (widths / 2))
        dens *= scl

        ax.fill_betweenx(y, x - dens, x + dens, alpha=alpha, color=colors[i])
        if inner == "box":
            for quant in moss.percentiles(a, [25, 75]):
                q_x = kde(quant) * scl
                q_x = [x - q_x, x + q_x]
                ax.plot(q_x, [quant, quant],
                        color=in_color,
                        linestyle=":",
                        linewidth=in_lw,
                        **inner_kws)
            med = np.median(a)
            m_x = kde(med) * scl
            m_x = [x - m_x, x + m_x]
            ax.plot(m_x, [med, med],
                    color=in_color,
                    linestyle="--",
                    linewidth=in_lw,
                    **inner_kws)
        elif inner == "stick":
            x_vals = kde(a) * scl
            x_vals = [x - x_vals, x + x_vals]
            ax.plot(x_vals, [a, a],
                    color=in_color,
                    linewidth=in_lw,
                    alpha=in_alpha,
                    **inner_kws)
        elif inner == "points":
            x_vals = [x for _ in a]
            ax.plot(x_vals,
                    a,
                    in_marker,
                    color=in_color,
                    alpha=in_alpha,
                    mew=0,
                    **inner_kws)
        for side in [-1, 1]:
            ax.plot((side * dens) + x, y, c=gray, linewidth=1.5)

    if join_rm:
        ax.plot(range(1, len(vals) + 1), vals, color=in_color, alpha=2. / 3)

    ax.set_xticks(positions)
    if names is not None:
        if len(vals) != len(names):
            raise ValueError("Length of names list must match nuber of bins")
        ax.set_xticklabels(names)
    ax.set_xlim(positions[0] - .5, positions[-1] + .5)

    if xlabel is not None:
        ax.set_xlabel(xlabel)
    if ylabel is not None:
        ax.set_ylabel(ylabel)

    ax.xaxis.grid(False)
    return ax
Ejemplo n.º 10
0
def lmplot(x, y, data, color=None, row=None, col=None, col_wrap=None,
           x_estimator=None, x_ci=95, n_boot=5000, fit_reg=True,
           order=1, ci=95, logistic=False, truncate=False,
           x_partial=None, y_partial=None, x_jitter=None, y_jitter=None,
           sharex=True, sharey=True, palette="husl", size=None,
           scatter_kws=None, line_kws=None, palette_kws=None):
    """Plot a linear model from a DataFrame.

    Parameters
    ----------
    x, y : strings
        column names in `data` DataFrame for x and y variables
    data : DataFrame
        source of data for the model
    color : string, optional
        DataFrame column name to group the model by color
    row, col : strings, optional
        DataFrame column names to make separate plot facets
    col_wrap : int, optional
        wrap col variable at this width - cannot be used with row facet
    x_estimator : callable, optional
        Interpret X values as factor labels and use this function
        to plot the point estimate and bootstrapped CI
    x_ci : int optional
        size of confidence interval for x_estimator error bars
    n_boot : int, optional
        number of bootstrap iterations to perform
    fit_reg : bool, optional
        if True fit a regression model by color/row/col and plot
    order : int, optional
        order of the regression polynomial to fit (default = 1)
    ci : int, optional
        confidence interval for the regression line
    logistic : bool, optional
        fit the regression line with logistic regression
    truncate : bool, optional
        if True, only fit line from data min to data max
    {x, y}_partial : string or list of strings, optional
        regress these variables out of the factors before plotting
    {x, y}_jitter : float, optional
        parameters for uniformly distributed random noise added to positions
    sharex, sharey : bools, optional
        only relevant if faceting; passed to plt.subplots
    palette : seaborn color palette argument
        if using separate plots by color, draw with this color palette
    size : float, optional
        size (plots are square) for each plot facet
    {scatter, line}_kws : dictionary
        keyword arguments to pass to the underlying plot functions
    palette_kws : dictionary
        keyword arguments for seaborn.color_palette

    """
    # TODO
    # - legend when fit_line is False
    # - wrap title when wide

    # First sort out the general figure layout
    if size is None:
        size = mpl.rcParams["figure.figsize"][1]

    if col is None and col_wrap is not None:
        raise ValueError("Need column facet variable for `col_wrap`")
    if row is not None and col_wrap is not None:
        raise ValueError("Cannot facet rows when using `col_wrap`")

    nrow = 1 if row is None else len(data[row].unique())
    ncol = 1 if col is None else len(data[col].unique())

    if col_wrap is not None:
        ncol = col_wrap
        nrow = int(np.ceil(len(data[col].unique()) / col_wrap))

    f, axes = plt.subplots(nrow, ncol, sharex=sharex, sharey=sharey,
                           figsize=(size * ncol, size * nrow))
    axes = np.atleast_2d(axes).reshape(nrow, ncol)

    if nrow == 1 or col_wrap is not None:
        row_masks = [np.repeat(True, len(data))]
    else:
        row_vals = np.sort(data[row].unique())
        row_masks = [data[row] == val for val in row_vals]

    if ncol == 1:
        col_masks = [np.repeat(True, len(data))]
    else:
        col_vals = np.sort(data[col].unique())
        col_masks = [data[col] == val for val in col_vals]

    if x_partial is not None:
        if not isinstance(x_partial, list):
            x_partial = [x_partial]
    if y_partial is not None:
        if not isinstance(y_partial, list):
            y_partial = [y_partial]

    if palette_kws is None:
        palette_kws = {}

    # Sort out the plot colors
    color_factor = color
    if color is None:
        hue_masks = [np.repeat(True, len(data))]
        colors = ["#222222"]
    else:
        hue_vals = np.sort(data[color].unique())
        hue_masks = [data[color] == val for val in hue_vals]
        colors = color_palette(palette, len(hue_masks), **palette_kws)

    # Default keyword arguments for plot components
    if scatter_kws is None:
        scatter_kws = {}
    if line_kws is None:
        line_kws = {}

    # First walk through the facets and plot the scatters
    scatter_ms = scatter_kws.pop("ms", 4)
    scatter_mew = mew = scatter_kws.pop("mew", 0)
    scatter_alpha = mew = scatter_kws.pop("alpha", .77)
    for row_i, row_mask in enumerate(row_masks):
        for col_j, col_mask in enumerate(col_masks):
            if col_wrap is not None:
                f_row = col_j // ncol
                f_col = col_j % ncol
            else:
                f_row, f_col = row_i, col_j
            ax = axes[f_row, f_col]
            if f_row + 1 == nrow:
                ax.set_xlabel(x)
            if f_col == 0:
                ax.set_ylabel(y)

            # Title the plot if we are faceting
            title = ""
            if row is not None:
                title += "%s = %s" % (row, row_vals[row_i])
            if row is not None and col is not None:
                title += " | "
            if col is not None:
                title += "%s = %s" % (col, col_vals[col_j])
            ax.set_title(title)

            for hue_k, hue_mask in enumerate(hue_masks):
                color = colors[hue_k]
                data_ijk = data[row_mask & col_mask & hue_mask]

                if x_estimator is not None:
                    ms = scatter_kws.pop("ms", 7)
                    mew = scatter_kws.pop("mew", 0)
                    x_vals = data_ijk[x].unique()
                    y_vals = data_ijk[y]

                    if y_partial is not None:
                        for var in y_partial:
                            conf = data_ijk[var]
                            conf -= conf.mean()
                            y_mean = y_vals.mean()
                            y_vals = moss.vector_reject(y_vals - y_mean, conf)
                            y_vals += y_mean

                    y_grouped = [np.array(y_vals[data_ijk[x] == v])
                                 for v in x_vals]

                    y_est = [x_estimator(y_i) for y_i in y_grouped]
                    y_boots = [moss.bootstrap(np.array(y_i),
                                              func=x_estimator,
                                              n_boot=n_boot)
                               for y_i in y_grouped]
                    ci_lims = [50 - x_ci / 2., 50 + x_ci / 2.]
                    y_ci = [moss.percentiles(y_i, ci_lims) for y_i in y_boots]
                    y_error = ci_to_errsize(np.transpose(y_ci), y_est)

                    ax.plot(x_vals, y_est, "o", mew=mew, ms=ms,
                            color=color, **scatter_kws)
                    ax.errorbar(x_vals, y_est, y_error,
                                fmt=None, ecolor=color)
                else:
                    x_ = data_ijk[x]
                    y_ = data_ijk[y]

                    if x_partial is not None:
                        for var in x_partial:
                            conf = data_ijk[var]
                            conf -= conf.mean()
                            x_mean = x_.mean()
                            x_ = moss.vector_reject(x_ - x_mean, conf)
                            x_ += x_mean
                    if y_partial is not None:
                        for var in y_partial:
                            conf = data_ijk[var]
                            conf -= conf.mean()
                            y_mean = y_.mean()
                            y_ = moss.vector_reject(y_ - y_mean, conf)
                            y_ += y_mean

                    if x_jitter is not None:
                        x_ += np.random.uniform(-x_jitter, x_jitter, x_.shape)
                    if y_jitter is not None:
                        y_ += np.random.uniform(-y_jitter, y_jitter, y_.shape)
                    ax.plot(x_, y_, "o", color=color, alpha=scatter_alpha,
                            mew=scatter_mew, ms=scatter_ms, **scatter_kws)

    for ax_i in np.ravel(axes):
        ax_i.set_xmargin(.05)
        ax_i.autoscale_view()

    # Now walk through again and plot the regression estimate
    # and a confidence interval for the regression line
    if fit_reg:
        for row_i, row_mask in enumerate(row_masks):
            for col_j, col_mask in enumerate(col_masks):
                if col_wrap is not None:
                    f_row = col_j // ncol
                    f_col = col_j % ncol
                else:
                    f_row, f_col = row_i, col_j
                ax = axes[f_row, f_col]
                xlim = ax.get_xlim()

                for hue_k, hue_mask in enumerate(hue_masks):
                    color = colors[hue_k]
                    data_ijk = data[row_mask & col_mask & hue_mask]
                    x_vals = np.array(data_ijk[x])
                    y_vals = np.array(data_ijk[y])
                    if not len(x_vals):
                        continue

                    # Sort out the limit of the fit
                    if truncate:
                        xx = np.linspace(x_vals.min(),
                                         x_vals.max(), 100)
                    else:
                        xx = np.linspace(xlim[0], xlim[1], 100)
                    xx_ = sm.add_constant(xx, prepend=True)

                    # Inner function to bootstrap the regression
                    def _regress(x, y):
                        if logistic:
                            x_ = sm.add_constant(x, prepend=True)
                            fit = sm.GLM(y, x_,
                                         family=sm.families.Binomial()).fit()
                            reg = fit.predict(xx_)
                        else:
                            fit = np.polyfit(x, y, order)
                            reg = np.polyval(fit, xx)
                        return reg

                    # Remove nuisance variables with vector rejection
                    if x_partial is not None:
                        for var in x_partial:
                            conf = data_ijk[var]
                            conf -= conf.mean()
                            x_mean = x_vals.mean()
                            x_vals = moss.vector_reject(x_vals - x_mean, conf)
                            x_vals += x_mean
                    if y_partial is not None:
                        for var in y_partial:
                            conf = data_ijk[var]
                            conf -= conf.mean()
                            y_mean = y_vals.mean()
                            y_vals = moss.vector_reject(y_vals - y_mean, conf)
                            y_vals += y_mean

                    # Regression line confidence interval
                    if ci is not None:
                        ci_lims = [50 - ci / 2., 50 + ci / 2.]
                        boots = moss.bootstrap(x_vals, y_vals,
                                               func=_regress,
                                               n_boot=n_boot)
                        ci_band = moss.percentiles(boots, ci_lims, axis=0)
                        ax.fill_between(xx, *ci_band, color=color, alpha=.15)

                    # Regression line
                    reg = _regress(x_vals, y_vals)
                    if color_factor is None:
                        label = ""
                    else:
                        label = hue_vals[hue_k]
                    ax.plot(xx, reg, color=color,
                            label=str(label), **line_kws)
                    ax.set_xlim(xlim)

    # Plot the legend on the upper left facet and adjust the layout
    if color_factor is not None and color_factor not in [row, col]:
        axes[0, 0].legend(loc="best", title=color_factor)
    plt.tight_layout()
Ejemplo n.º 11
0
def violin(vals, inner="box", position=None, widths=.5, join_rm=False,
           names=None, ax=None, **kwargs):
    """Create a violin plot (a combination of boxplot and KDE plot.

    Parameters
    ----------
    vals : array or sequence of arrays
        data to plot
    inner : box | sticks | points
        plot quartiles or individual sample values inside violin
    positions : number or sequence of numbers
        position of first violin or positions of each violin
    widths : float
        width of each violin at maximum density
    join_rm : boolean, optional
        if True, positions in the input arrays are treated as repeated
        measures and are joined with a line plot
    names : list of strings, optional
        names to plot on x axis, otherwise plots numbers
    ax : matplotlib axis, optional
        axis to plot on, otherwise creates new one

    Returns
    -------
    ax : matplotlib axis
        axis with violin plot

    """
    if ax is None:
        ax = plt.subplot(111)

    if hasattr(vals, 'shape'):
        if len(vals.shape) == 1:
            if hasattr(vals[0], 'shape'):
                vals = list(vals)
            else:
                vals = [vals]
        elif len(vals.shape) == 2:
            nr, nc = vals.shape
            if nr == 1:
                vals = [vals]
            elif nc == 1:
                vals = [vals.ravel()]
            else:
                vals = [vals[:, i] for i in xrange(nc)]
        else:
            raise ValueError("Input x can have no more than 2 dimensions")
    if not hasattr(vals[0], '__len__'):
        vals = [vals]

    vals = [np.asarray(a, float) for a in vals]

    line, = ax.plot(vals[0].mean(), vals[0].mean(), **kwargs)
    color = line.get_color()
    line.remove()

    gray = "#555555"

    if position is None:
        position = np.arange(1, len(vals) + 1)
    elif not hasattr(position, "__iter__"):
        position = np.arange(position, len(vals) + position)
    for i, a in enumerate(vals):
        x = position[i]
        kde = stats.gaussian_kde(a)
        y = _kde_support(a, kde, 1000)
        dens = kde(y)
        scl = 1 / (dens.max() / (widths / 2))
        dens *= scl

        ax.fill_betweenx(y, x - dens, x + dens, alpha=.7, color=color)
        if inner == "box":
            for quant in moss.percentiles(a, [25, 75]):
                q_x = kde(quant) * scl
                q_x = [x - q_x, x + q_x]
                ax.plot(q_x, [quant, quant], gray,
                        linestyle=":", linewidth=1.5)
            med = np.median(a)
            m_x = kde(med) * scl
            m_x = [x - m_x, x + m_x]
            ax.plot(m_x, [med, med], gray,
                    linestyle="--", linewidth=1.2)
        elif inner == "stick":
            x_vals = kde(a) * scl
            x_vals = [x - x_vals, x + x_vals]
            ax.plot(x_vals, [a, a], gray, linewidth=.7, alpha=.7)
        elif inner == "points":
            x_vals = [x for i in a]
            ax.plot(x_vals, a, "o", color=gray, alpha=.3)
        for side in [-1, 1]:
            ax.plot((side * dens) + x, y, gray, linewidth=1)

    if join_rm:
        ax.plot(range(1, len(vals) + 1), vals,
                color=color, alpha=2. / 3)

    ax.set_xticks(position)
    if names is not None:
        if len(vals) != len(names):
            raise ValueError("Length of names list must match nuber of bins")
        ax.set_xticklabels(names)
    ax.set_xlim(position[0] - .5, position[-1] + .5)

    return ax
Ejemplo n.º 12
0
def violinplot(vals, groupby=None, inner="box", color=None, positions=None,
               names=None, order=None, kernel="gau", bw="scott", widths=.8,
               alpha=None, join_rm=False, gridsize=100, cut=3, inner_kws=None,
               ax=None, **kwargs):

    """Create a violin plot (a combination of boxplot and kernel density plot).

    Parameters
    ----------
    vals : DataFrame, Series, 2D array, or list of vectors.
        Data for plot. DataFrames and 2D arrays are assuemd to be "wide" with
        each column mapping to a box. Lists of data are assumed to have one
        element per box.  Can also provide one long Series in conjunction with
        a grouping element as the `groupy` parameter to reshape the data into
        several violins. Otherwise 1D data will produce a single violins.
    groupby : grouping object
        If `vals` is a Series, this is used to group into boxes by calling
        pd.groupby(vals, groupby).
    inner : box | sticks | points
        Plot quartiles or individual sample values inside violin.
    color : mpl color, sequence of colors, or seaborn palette name
        Inner violin colors
    positions : number or sequence of numbers
        Position of first violin or positions of each violin.
    names : list of strings, optional
        Names to plot on x axis; otherwise plots numbers. This will override
        names inferred from Pandas inputs.
    order : list of strings, optional
        If vals is a Pandas object with name information, you can control the
        order of the plot by providing the violin names in your preferred
        order.
    kernel : {'gau' | 'cos' | 'biw' | 'epa' | 'tri' | 'triw' }
        Code for shape of kernel to fit with.
    bw : {'scott' | 'silverman' | scalar}
        Name of reference method to determine kernel size, or size as a
        scalar.
    widths : float
        Width of each violin at maximum density.
    alpha : float, optional
        Transparancy of violin fill.
    join_rm : boolean, optional
        If True, positions in the input arrays are treated as repeated
        measures and are joined with a line plot.
    gridsize : int
        Number of discrete gridpoints to evaluate the density on.
    cut : scalar
        Draw the estimate to cut * bw from the extreme data points.
    inner_kws : dict, optional
        Keyword arugments for inner plot.
    ax : matplotlib axis, optional
        Axis to plot on, otherwise grab current axis.
    kwargs : additional parameters to fill_betweenx

    Returns
    -------
    ax : matplotlib axis
        Axis with violin plot.

    """
    if ax is None:
        ax = plt.gca()

    # Reshape and find labels for the plot
    vals, xlabel, ylabel, names = _box_reshape(vals, groupby, names, order)

    # Sort out the plot colors
    colors, gray = _box_colors(vals, color)

    # Initialize the kwarg dict for the inner plot
    if inner_kws is None:
        inner_kws = {}
    in_alpha = inner_kws.pop("alpha", .6 if inner == "points" else 1)
    in_alpha *= 1 if alpha is None else alpha
    in_color = inner_kws.pop("color", gray)
    in_marker = inner_kws.pop("marker", ".")
    in_lw = inner_kws.pop("lw", 1.5 if inner == "box" else .8)

    # Find where the violins are going
    if positions is None:
        positions = np.arange(1, len(vals) + 1)
    elif not hasattr(positions, "__iter__"):
        positions = np.arange(positions, len(vals) + positions)

    # Set the default linewidth if not provided in kwargs
    try:
        lw = kwargs[({"lw", "linewidth"} & set(kwargs)).pop()]
    except KeyError:
        lw = 1.5

    # Iterate over the variables
    for i, a in enumerate(vals):

        # Fit the KDE
        x = positions[i]
        kde = sm.nonparametric.KDEUnivariate(a)
        fft = kernel == "gau"
        kde.fit(bw=bw, kernel=kernel, gridsize=gridsize, cut=cut, fft=fft)
        y, dens = kde.support, kde.density
        scl = 1 / (dens.max() / (widths / 2))
        dens *= scl

        # Draw the violin
        ax.fill_betweenx(y, x - dens, x + dens, alpha=alpha, color=colors[i])
        if inner == "box":
            for quant in moss.percentiles(a, [25, 75]):
                q_x = kde.evaluate(quant) * scl
                q_x = [x - q_x, x + q_x]
                ax.plot(q_x, [quant, quant], color=in_color,
                        linestyle=":", linewidth=in_lw, **inner_kws)
            med = np.median(a)
            m_x = kde.evaluate(med) * scl
            m_x = [x - m_x, x + m_x]
            ax.plot(m_x, [med, med], color=in_color,
                    linestyle="--", linewidth=in_lw, **inner_kws)
        elif inner == "stick":
            x_vals = kde.evaluate(a) * scl
            x_vals = [x - x_vals, x + x_vals]
            ax.plot(x_vals, [a, a], color=in_color,
                    linewidth=in_lw, alpha=in_alpha, **inner_kws)
        elif inner == "points":
            x_vals = [x for _ in a]
            ax.plot(x_vals, a, in_marker, color=in_color,
                    alpha=in_alpha, mew=0, **inner_kws)
        for side in [-1, 1]:
            ax.plot((side * dens) + x, y, c=gray, lw=lw)

    # Draw the repeated measure bridges
    if join_rm:
        ax.plot(range(1, len(vals) + 1), vals,
                color=in_color, alpha=2. / 3)

    # Add in semantic labels
    ax.set_xticks(positions)
    if names is not None:
        if len(vals) != len(names):
            raise ValueError("Length of names list must match nuber of bins")
        ax.set_xticklabels(names)
    ax.set_xlim(positions[0] - .5, positions[-1] + .5)

    if xlabel is not None:
        ax.set_xlabel(xlabel)
    if ylabel is not None:
        ax.set_ylabel(ylabel)

    ax.xaxis.grid(False)
    return ax
Ejemplo n.º 13
0
def fixedfx_report(space, anatomy, zstat_files, r2_files, masks):
    """Plot the resulting data."""
    sns.set()
    bg = nib.load(anatomy).get_data()

    mask_data = [nib.load(f).get_data() for f in masks]
    mask = np.where(np.all(mask_data, axis=0), np.nan, 1)
    mask[bg < moss.percentiles(bg, 5)] = np.nan

    # Find the plot parameters
    xdata = np.flatnonzero(bg.any(axis=1).any(axis=1))
    xslice = slice(xdata.min(), xdata.max() + 1)
    ydata = np.flatnonzero(bg.any(axis=0).any(axis=1))
    yslice = slice(ydata.min(), ydata.max() + 1)
    zdata = np.flatnonzero(bg.any(axis=0).any(axis=0))
    zmin, zmax = zdata.min(), zdata.max()

    step = 2 if space == "mni" else 1
    offset = 4 if space == "mni" else 0

    n_slices = (zmax - zmin) // step
    n_row, n_col = n_slices // 8, 8
    start = n_slices % n_col // step + zmin + offset
    figsize = (10, 1.375 * n_row)
    slices = (start + np.arange(zmax - zmin))[::step][:n_slices]
    pltkws = dict(nrows=int(n_row), ncols=int(n_col),
                  figsize=figsize, facecolor="k")
    pngkws = dict(dpi=100, bbox_inches="tight", facecolor="k", edgecolor="k")

    vmin, vmax = 0, moss.percentiles(bg, 95)
    mask_cmap = mpl.colors.ListedColormap(["#160016"])

    report = []

    def add_colorbar(f, cmap, low, high, left, width, fmt):
        cbar = np.outer(np.arange(0, 1, .01), np.ones(10))
        cbar_ax = f.add_axes([left, 0, width, .03])
        cbar_ax.imshow(cbar.T, aspect="auto", cmap=cmap)
        cbar_ax.axis("off")
        f.text(left - .01, .018, fmt % low, ha="right", va="center",
               color="white", size=13, weight="demibold")
        f.text(left + width + .01, .018, fmt % high, ha="left",
               va="center", color="white", size=13, weight="demibold")

    # Plot the mask edges
    f, axes = plt.subplots(**pltkws)
    mask_colors = sns.husl_palette(len(mask_data))
    mask_colors.reverse()
    cmaps = [mpl.colors.ListedColormap([c]) for c in mask_colors]
    for i, ax in zip(slices, axes.ravel()):
        ax.imshow(bg[xslice, yslice, i].T,
                  cmap="gray", vmin=vmin, vmax=vmax)
        for j, m in enumerate(mask_data):
            if m[xslice, yslice, i].any():
                ax.contour(m[xslice, yslice, i].T,
                           cmap=cmaps[j], linewidths=.75)
        ax.axis("off")
    text_min = max(.15, .5 - len(mask_data) * .05)
    text_max = min(.85, .5 + len(mask_data) * .05)
    text_pos = np.linspace(text_min, text_max, len(mask_data))
    for i, color in enumerate(mask_colors):
        f.text(text_pos[i], .03, "Run %d" % (i + 1), color=color,
               size=11, weight="demibold", ha="center", va="center")
    mask_png = op.abspath("mask_overlap.png")
    plt.savefig(mask_png, **pngkws)
    report.append(mask_png)
    plt.close(f)

    # Now plot the R2 images
    for fname, cmap in zip(r2_files, ["GnBu_r", "YlGn_r"]):
        f, axes = plt.subplots(**pltkws)
        r2data = nib.load(fname).get_data()
        rmax = r2data[~np.isnan(r2data)].max()
        r2data[bg == 0] = np.nan
        for i, ax in zip(slices, axes.ravel()):
            ax.imshow(bg[xslice, yslice, i].T,
                      cmap="gray", vmin=vmin, vmax=vmax)
            ax.imshow(r2data[xslice, yslice, i].T, cmap=cmap,
                      vmin=0, vmax=rmax, alpha=.8)
            ax.imshow(mask[xslice, yslice, i].T, alpha=.5,
                      cmap=mask_cmap, interpolation="nearest")
            ax.axis("off")
        savename = op.abspath(op.basename(fname).replace(".nii.gz", ".png"))
        add_colorbar(f, cmap, 0, rmax, .35, .3, "%.2f")
        plt.savefig(savename, **pngkws)
        report.append(savename)
        plt.close(f)

    # Finally plot each zstat image
    for fname in zstat_files:
        zdata = nib.load(fname).get_data()
        zpos = zdata.copy()
        zneg = zdata.copy()
        zpos[zdata < 2.3] = np.nan
        zneg[zdata > -2.3] = np.nan
        zlow = 2.3
        zhigh = max(np.abs(zdata).max(), 3.71)
        f, axes = plt.subplots(**pltkws)
        for i, ax in zip(slices, axes.ravel()):
            ax.imshow(bg[xslice, yslice, i].T, cmap="gray",
                      vmin=vmin, vmax=vmax)
            ax.imshow(zpos[xslice, yslice, i].T, cmap="Reds_r",
                      vmin=zlow, vmax=zhigh)
            ax.imshow(zneg[xslice, yslice, i].T, cmap="Blues",
                      vmin=-zhigh, vmax=-zlow)
            ax.imshow(mask[xslice, yslice, i].T, alpha=.5,
                      cmap=mask_cmap, interpolation="nearest")
            ax.axis("off")
        add_colorbar(f, "Blues", -zhigh, -zlow, .18, .23, "%.1f")
        add_colorbar(f, "Reds_r", zlow, zhigh, .59, .23, "%.1f")

        contrast = fname.split("/")[-2]
        os.mkdir(contrast)
        savename = op.join(contrast, "zstat1.png")
        f.savefig(savename, **pngkws)
        report.append(op.abspath(contrast))
        plt.close(f)

    return report
Ejemplo n.º 14
0
def regplot(x, y, data=None, corr_func=stats.pearsonr, func_name=None,
            xlabel="", ylabel="", ci=95, size=None, annotloc=None, color=None,
            reg_kws=None, scatter_kws=None, dist_kws=None, text_kws=None):
    """Scatterplot with regresion line, marginals, and correlation value.

    Parameters
    ----------
    x : sequence or string
        Independent variable.
    y : sequence or string
        Dependent variable.
    data : dataframe, optional
        If dataframe is given, x, and y are interpreted as string keys
        for selecting to dataframe column names.
    corr_func : callable, optional
        Correlation function; expected to take two arrays and return a
        numeric or (statistic, pval) tuple.
    func_name : string, optional
        Use in lieu of function name for fit statistic annotation.
    xlabel, ylabel : string, optional
        Axis label names if inputs are not Pandas objects or to override.
    ci : int or None
        Confidence interval for the regression estimate.
    size: int
        Figure size (will be a square; only need one int).
    annotloc : two or three tuple
        Specified with (xpos, ypos [, horizontalalignment]).
    color : matplotlib color scheme
        Color of everything but the regression line; can be overridden by
        passing `color` to subfunc kwargs.
    {reg, scatter, dist, text}_kws: dicts
        Further keyword arguments for the constituent plots.

    """
    # Interperet inputs
    if data is not None:
        if not xlabel:
            xlabel = x
        if not ylabel:
            ylabel = y
        x = data[x].values
        y = data[y].values
    else:
        if hasattr(x, "name") and not xlabel:
            if x.name is not None:
                xlabel = x.name
        if hasattr(y, "name") and not ylabel:
            if y.name is not None:
                ylabel = y.name
        x = np.asarray(x)
        y = np.asarray(y)

    # Set up the figure and axes
    size = mpl.rcParams["figure.figsize"][1] if size is None else size
    fig = plt.figure(figsize=(size, size))
    ax_scatter = fig.add_axes([0.05, 0.05, 0.75, 0.75])
    ax_x_marg = fig.add_axes([0.05, 0.82, 0.75, 0.13])
    ax_y_marg = fig.add_axes([0.82, 0.05, 0.13, 0.75])

    # Plot the scatter
    if scatter_kws is None:
        scatter_kws = {}
    if color is not None and "color" not in scatter_kws:
        scatter_kws.update(color=color)
    marker = scatter_kws.pop("markerstyle", "o")
    mew = scatter_kws.pop("mew", 0)
    alpha_maker = stats.norm(0, 100)
    alpha = alpha_maker.pdf(len(x)) / alpha_maker.pdf(0)
    alpha = max(alpha, .1)
    alpha = scatter_kws.pop("alpha", alpha)
    ax_scatter.plot(x, y, marker, alpha=alpha, mew=mew, **scatter_kws)
    ax_scatter.set_xlabel(xlabel)
    ax_scatter.set_ylabel(ylabel)

    # Marginal plots using our distplot function
    if dist_kws is None:
        dist_kws = {}
    if color is not None and "color" not in dist_kws:
        dist_kws.update(color=color)
    dist_kws["axlabel"] = False
    distplot(x, ax=ax_x_marg, **dist_kws)
    distplot(y, ax=ax_y_marg, vertical=True, **dist_kws)
    for ax in [ax_x_marg, ax_y_marg]:
        ax.set_xticklabels([])
        ax.set_yticklabels([])

    # Regression line plot
    xlim = ax_scatter.get_xlim()
    a, b = np.polyfit(x, y, 1)
    if reg_kws is None:
        reg_kws = {}
    reg_color = reg_kws.pop("color", "#222222")
    yhat = np.polyval([a, b], xlim)
    ax_scatter.plot(xlim, yhat, color=reg_color, **reg_kws)

    # This is a hack to get the annotation to work
    reg = ax_scatter.plot(xlim, yhat, lw=0)

    # Bootstrapped regression standard error
    if ci is not None:
        xx = np.linspace(xlim[0], xlim[1], 100)

        def _bootstrap_reg(x, y):
            fit = np.polyfit(x, y, 1)
            return np.polyval(fit, xx)

        boots = moss.bootstrap(x, y, func=_bootstrap_reg)
        ci_lims = [50 - ci / 2., 50 + ci / 2.]
        ci_band = moss.percentiles(boots, ci_lims, axis=0)
        ax_scatter.fill_between(xx, *ci_band, color=reg_color, alpha=.15)
        ax_scatter.set_xlim(xlim)

    # Calcluate a fit statistic and p value
    if func_name is None:
        func_name = corr_func.__name__
    out = corr_func(x, y)
    try:
        s, p = out
        msg = "%s: %.2g (p=%.2g%s)" % (func_name, s, p, moss.sig_stars(p))
    except TypeError:
        s = corr_func(x, y)
        msg = "%s: %.3f" % (func_name, s)

    if text_kws is None:
        text_kws = {}
    ax_scatter.legend(reg, [msg], loc="best", prop=text_kws)

    # Set the axes on the marginal plots
    ax_x_marg.set_xlim(ax_scatter.get_xlim())
    ax_x_marg.set_yticks([])
    ax_y_marg.set_ylim(ax_scatter.get_ylim())
    ax_y_marg.set_xticks([])
Ejemplo n.º 15
0
def violinplot(vals,
               groupby=None,
               inner="box",
               color=None,
               positions=None,
               names=None,
               order=None,
               kernel="gau",
               bw="scott",
               widths=.8,
               alpha=None,
               join_rm=False,
               gridsize=100,
               cut=3,
               inner_kws=None,
               ax=None,
               **kwargs):
    """Create a violin plot (a combination of boxplot and kernel density plot).

    Parameters
    ----------
    vals : DataFrame, Series, 2D array, or list of vectors.
        Data for plot. DataFrames and 2D arrays are assuemd to be "wide" with
        each column mapping to a box. Lists of data are assumed to have one
        element per box.  Can also provide one long Series in conjunction with
        a grouping element as the `groupy` parameter to reshape the data into
        several violins. Otherwise 1D data will produce a single violins.
    groupby : grouping object
        If `vals` is a Series, this is used to group into boxes by calling
        pd.groupby(vals, groupby).
    inner : box | sticks | points
        Plot quartiles or individual sample values inside violin.
    color : mpl color, sequence of colors, or seaborn palette name
        Inner violin colors
    positions : number or sequence of numbers
        Position of first violin or positions of each violin.
    names : list of strings, optional
        Names to plot on x axis; otherwise plots numbers. This will override
        names inferred from Pandas inputs.
    order : list of strings, optional
        If vals is a Pandas object with name information, you can control the
        order of the plot by providing the violin names in your preferred
        order.
    kernel : {'gau' | 'cos' | 'biw' | 'epa' | 'tri' | 'triw' }
        Code for shape of kernel to fit with.
    bw : {'scott' | 'silverman' | scalar}
        Name of reference method to determine kernel size, or size as a
        scalar.
    widths : float
        Width of each violin at maximum density.
    alpha : float, optional
        Transparancy of violin fill.
    join_rm : boolean, optional
        If True, positions in the input arrays are treated as repeated
        measures and are joined with a line plot.
    gridsize : int
        Number of discrete gridpoints to evaluate the density on.
    cut : scalar
        Draw the estimate to cut * bw from the extreme data points.
    inner_kws : dict, optional
        Keyword arugments for inner plot.
    ax : matplotlib axis, optional
        Axis to plot on, otherwise grab current axis.
    kwargs : additional parameters to fill_betweenx

    Returns
    -------
    ax : matplotlib axis
        Axis with violin plot.

    """
    if ax is None:
        ax = plt.gca()

    # Reshape and find labels for the plot
    vals, xlabel, ylabel, names = _box_reshape(vals, groupby, names, order)

    # Sort out the plot colors
    colors, gray = _box_colors(vals, color)

    # Initialize the kwarg dict for the inner plot
    if inner_kws is None:
        inner_kws = {}
    in_alpha = inner_kws.pop("alpha", .6 if inner == "points" else 1)
    in_alpha *= 1 if alpha is None else alpha
    in_color = inner_kws.pop("color", gray)
    in_marker = inner_kws.pop("marker", ".")
    in_lw = inner_kws.pop("lw", 1.5 if inner == "box" else .8)

    # Find where the violins are going
    if positions is None:
        positions = np.arange(1, len(vals) + 1)
    elif not hasattr(positions, "__iter__"):
        positions = np.arange(positions, len(vals) + positions)

    # Set the default linewidth if not provided in kwargs
    try:
        lw = kwargs[({"lw", "linewidth"} & set(kwargs)).pop()]
    except KeyError:
        lw = 1.5

    # Iterate over the variables
    for i, a in enumerate(vals):

        # Fit the KDE
        x = positions[i]
        kde = sm.nonparametric.KDEUnivariate(a)
        fft = kernel == "gau"
        kde.fit(bw=bw, kernel=kernel, gridsize=gridsize, cut=cut, fft=fft)
        y, dens = kde.support, kde.density
        scl = 1 / (dens.max() / (widths / 2))
        dens *= scl

        # Draw the violin
        ax.fill_betweenx(y, x - dens, x + dens, alpha=alpha, color=colors[i])
        if inner == "box":
            for quant in moss.percentiles(a, [25, 75]):
                q_x = kde.evaluate(quant) * scl
                q_x = [x - q_x, x + q_x]
                ax.plot(q_x, [quant, quant],
                        color=in_color,
                        linestyle=":",
                        linewidth=in_lw,
                        **inner_kws)
            med = np.median(a)
            m_x = kde.evaluate(med) * scl
            m_x = [x - m_x, x + m_x]
            ax.plot(m_x, [med, med],
                    color=in_color,
                    linestyle="--",
                    linewidth=in_lw,
                    **inner_kws)
        elif inner == "stick":
            x_vals = kde.evaluate(a) * scl
            x_vals = [x - x_vals, x + x_vals]
            ax.plot(x_vals, [a, a],
                    color=in_color,
                    linewidth=in_lw,
                    alpha=in_alpha,
                    **inner_kws)
        elif inner == "points":
            x_vals = [x for _ in a]
            ax.plot(x_vals,
                    a,
                    in_marker,
                    color=in_color,
                    alpha=in_alpha,
                    mew=0,
                    **inner_kws)
        for side in [-1, 1]:
            ax.plot((side * dens) + x, y, c=gray, lw=lw)

    # Draw the repeated measure bridges
    if join_rm:
        ax.plot(range(1, len(vals) + 1), vals, color=in_color, alpha=2. / 3)

    # Add in semantic labels
    ax.set_xticks(positions)
    if names is not None:
        if len(vals) != len(names):
            raise ValueError("Length of names list must match nuber of bins")
        ax.set_xticklabels(names)
    ax.set_xlim(positions[0] - .5, positions[-1] + .5)

    if xlabel is not None:
        ax.set_xlabel(xlabel)
    if ylabel is not None:
        ax.set_ylabel(ylabel)

    ax.xaxis.grid(False)
    return ax
Ejemplo n.º 16
0
def lmplot(x,
           y,
           data,
           color=None,
           row=None,
           col=None,
           x_estimator=None,
           x_ci=95,
           fit_line=True,
           ci=95,
           truncate=False,
           sharex=True,
           sharey=True,
           palette="hls",
           size=None,
           scatter_kws=None,
           line_kws=None,
           palette_kws=None):
    """Plot a linear model from a DataFrame.

    Parameters
    ----------
    x, y : strings
        column names in `data` DataFrame for x and y variables
    data : DataFrame
        source of data for the model
    color : string, optional
        DataFrame column name to group the model by color
    row, col : strings, optional
        DataFrame column names to make separate plot facets
    x_estimator : callable, optional
        Interpret X values as factor labels and use this function
        to plot the point estimate and bootstrapped CI
    x_ci : int optional
        size of confidence interval for x_estimator error bars
    fit_line : bool, optional
        if True fit a regression line by color/row/col and plot
    ci : int, optional
        confidence interval for the regression line
    truncate : bool, optional
        if True, only fit line from data min to data max
    sharex, sharey : bools, optional
        only relevant if faceting; passed to plt.subplots
    palette : seaborn color palette argument
        if using separate plots by color, draw with this color palette
    size : float, optional
        size (plots are square) for each plot facet
    {scatter, line}_kws : dictionary
        keyword arguments to pass to the underlying plot functions
    palette_kws : dictionary
        keyword arguments for seaborn.color_palette

    """
    # TODO
    # - position_{dodge, jitter}
    # - legend when fit_line is False
    # - truncate fit
    # - wrap title when wide
    # - wrap columns

    # First sort out the general figure layout
    if size is None:
        size = mpl.rcParams["figure.figsize"][1]

    nrow = 1 if row is None else len(data[row].unique())
    ncol = 1 if col is None else len(data[col].unique())

    f, axes = plt.subplots(nrow,
                           ncol,
                           sharex=sharex,
                           sharey=sharey,
                           figsize=(size * ncol, size * nrow))
    axes = np.atleast_2d(axes).reshape(nrow, ncol)

    if nrow == 1:
        row_masks = [np.repeat(True, len(data))]
    else:
        row_vals = np.sort(data[row].unique())
        row_masks = [data[row] == val for val in row_vals]

    if ncol == 1:
        col_masks = [np.repeat(True, len(data))]
    else:
        col_vals = np.sort(data[col].unique())
        col_masks = [data[col] == val for val in col_vals]

    if palette_kws is None:
        palette_kws = {}

    # Sort out the plot colors
    color_factor = color
    if color is None:
        hue_masks = [np.repeat(True, len(data))]
        colors = ["#222222"]
    else:
        hue_vals = np.sort(data[color].unique())
        hue_masks = [data[color] == val for val in hue_vals]
        colors = color_palette(palette, len(hue_masks), **palette_kws)

    # Default keyword arguments for plot components
    if scatter_kws is None:
        scatter_kws = {}
    if line_kws is None:
        line_kws = {}

    # First walk through the facets and plot the scatters
    for row_i, row_mask in enumerate(row_masks):
        for col_j, col_mask in enumerate(col_masks):
            ax = axes[row_i, col_j]
            if not sharex or (row_i + 1 == len(row_masks)):
                ax.set_xlabel(x)
            if not sharey or col_j == 0:
                ax.set_ylabel(y)

            # Title the plot if we are faceting
            title = ""
            if row is not None:
                title += "%s = %s" % (row, row_vals[row_i])
            if row is not None and col is not None:
                title += " | "
            if col is not None:
                title += "%s = %s" % (col, col_vals[col_j])
            ax.set_title(title)

            for hue_k, hue_mask in enumerate(hue_masks):
                color = colors[hue_k]
                data_ijk = data[row_mask & col_mask & hue_mask]

                if x_estimator is not None:
                    ms = scatter_kws.pop("ms", 7)
                    mew = scatter_kws.pop("mew", 0)
                    x_vals = data_ijk[x].unique()
                    y_grouped = [
                        np.array(data_ijk[y][data_ijk[x] == v]) for v in x_vals
                    ]
                    y_est = [x_estimator(y_i) for y_i in y_grouped]
                    y_boots = [
                        moss.bootstrap(np.array(y_i), func=x_estimator)
                        for y_i in y_grouped
                    ]
                    ci_lims = [50 - x_ci / 2., 50 + x_ci / 2.]
                    y_ci = [moss.percentiles(y_i, ci_lims) for y_i in y_boots]
                    y_error = ci_to_errsize(np.transpose(y_ci), y_est)

                    ax.plot(x_vals,
                            y_est,
                            "o",
                            mew=mew,
                            ms=ms,
                            color=color,
                            **scatter_kws)
                    ax.errorbar(x_vals, y_est, y_error, fmt=None, ecolor=color)
                else:
                    ms = scatter_kws.pop("ms", 4)
                    mew = scatter_kws.pop("mew", 0)
                    ax.plot(data_ijk[x],
                            data_ijk[y],
                            "o",
                            color=color,
                            mew=mew,
                            ms=ms,
                            **scatter_kws)

    for ax_i in np.ravel(axes):
        ax_i.set_xmargin(.05)
        ax_i.autoscale_view()

    # Now walk through again and plot the regression estimate
    # and a confidence interval for the regression line
    if fit_line:
        for row_i, row_mask in enumerate(row_masks):
            for col_j, col_mask in enumerate(col_masks):
                ax = axes[row_i, col_j]
                xlim = ax.get_xlim()

                for hue_k, hue_mask in enumerate(hue_masks):
                    color = colors[hue_k]
                    data_ijk = data[row_mask & col_mask & hue_mask]
                    x_vals = np.array(data_ijk[x])
                    y_vals = np.array(data_ijk[y])

                    # Sort out the limit of the fit
                    if truncate:
                        xx = np.linspace(x_vals.min(), x_vals.max(), 100)
                    else:
                        xx = np.linspace(xlim[0], xlim[1], 100)

                    # Inner function to bootstrap the regression
                    def _bootstrap_reg(x, y):
                        fit = np.polyfit(x, y, 1)
                        return np.polyval(fit, xx)

                    # Regression line confidence interval
                    if ci is not None:
                        ci_lims = [50 - ci / 2., 50 + ci / 2.]
                        boots = moss.bootstrap(x_vals,
                                               y_vals,
                                               func=_bootstrap_reg)
                        ci_band = moss.percentiles(boots, ci_lims, axis=0)
                        ax.fill_between(xx, *ci_band, color=color, alpha=.15)

                    fit = np.polyfit(x_vals, y_vals, 1)
                    reg = np.polyval(fit, xx)
                    if color_factor is None:
                        label = ""
                    else:
                        label = hue_vals[hue_k]
                    ax.plot(xx, reg, color=color, label=str(label), **line_kws)
                    ax.set_xlim(xlim)

    # Plot the legend on the upper left facet and adjust the layout
    if color_factor is not None:
        axes[0, 0].legend(loc="best", title=color_factor)
    plt.tight_layout()
Ejemplo n.º 17
0
def violin(vals,
           inner="box",
           position=None,
           widths=.5,
           join_rm=False,
           names=None,
           ax=None,
           **kwargs):
    """Create a violin plot (a combination of boxplot and KDE plot.

    Parameters
    ----------
    vals : array or sequence of arrays
        data to plot
    inner : box | sticks | points
        plot quartiles or individual sample values inside violin
    positions : number or sequence of numbers
        position of first violin or positions of each violin
    widths : float
        width of each violin at maximum density
    join_rm : boolean, optional
        if True, positions in the input arrays are treated as repeated
        measures and are joined with a line plot
    names : list of strings, optional
        names to plot on x axis, otherwise plots numbers
    ax : matplotlib axis, optional
        axis to plot on, otherwise creates new one

    Returns
    -------
    ax : matplotlib axis
        axis with violin plot

    """
    if ax is None:
        ax = plt.subplot(111)

    if hasattr(vals, 'shape'):
        if len(vals.shape) == 1:
            if hasattr(vals[0], 'shape'):
                vals = list(vals)
            else:
                vals = [vals]
        elif len(vals.shape) == 2:
            nr, nc = vals.shape
            if nr == 1:
                vals = [vals]
            elif nc == 1:
                vals = [vals.ravel()]
            else:
                vals = [vals[:, i] for i in xrange(nc)]
        else:
            raise ValueError("Input x can have no more than 2 dimensions")
    if not hasattr(vals[0], '__len__'):
        vals = [vals]

    vals = [np.asarray(a, float) for a in vals]

    line, = ax.plot(vals[0].mean(), vals[0].mean(), **kwargs)
    color = line.get_color()
    line.remove()

    gray = "#555555"

    if position is None:
        position = np.arange(1, len(vals) + 1)
    elif not hasattr(position, "__iter__"):
        position = np.arange(position, len(vals) + position)
    for i, a in enumerate(vals):
        x = position[i]
        kde = stats.gaussian_kde(a)
        y = _kde_support(a, kde, 1000)
        dens = kde(y)
        scl = 1 / (dens.max() / (widths / 2))
        dens *= scl

        ax.fill_betweenx(y, x - dens, x + dens, alpha=.7, color=color)
        if inner == "box":
            for quant in moss.percentiles(a, [25, 75]):
                q_x = kde(quant) * scl
                q_x = [x - q_x, x + q_x]
                ax.plot(q_x, [quant, quant],
                        gray,
                        linestyle=":",
                        linewidth=1.5)
            med = np.median(a)
            m_x = kde(med) * scl
            m_x = [x - m_x, x + m_x]
            ax.plot(m_x, [med, med], gray, linestyle="--", linewidth=1.2)
        elif inner == "stick":
            x_vals = kde(a) * scl
            x_vals = [x - x_vals, x + x_vals]
            ax.plot(x_vals, [a, a], gray, linewidth=.7, alpha=.7)
        elif inner == "points":
            x_vals = [x for i in a]
            ax.plot(x_vals, a, "o", color=gray, alpha=.3)
        for side in [-1, 1]:
            ax.plot((side * dens) + x, y, gray, linewidth=1)

    if join_rm:
        ax.plot(range(1, len(vals) + 1), vals, color=color, alpha=2. / 3)

    ax.set_xticks(position)
    if names is not None:
        if len(vals) != len(names):
            raise ValueError("Length of names list must match nuber of bins")
        ax.set_xticklabels(names)
    ax.set_xlim(position[0] - .5, position[-1] + .5)

    return ax
Ejemplo n.º 18
0
def regplot(x, y, data=None, corr_func=stats.pearsonr, xlabel="", ylabel="",
            ci=95, size=None, annotloc=None, color=None, reg_kws=None,
            scatter_kws=None, dist_kws=None, text_kws=None):
    """Scatterplot with regreesion line, marginals, and correlation value.

    Parameters
    ----------
    x : sequence
        independent variables
    y : sequence
        dependent variables
    data : dataframe, optional
        if dataframe is given, x, and y are interpreted as
        string keys mapping to dataframe column names
    corr_func : callable, optional
        correlation function; expected to take two arrays
        and return a (statistic, pval) tuple
    xlabel, ylabel : string, optional
        label names
    ci : int or None
        confidence interval for the regression line
    size: int
        figure size (will be a square; only need one int)
    annotloc : two or three tuple
        (xpos, ypos [, horizontalalignment])
    color : matplotlib color scheme
        color of everything but the regression line
        overridden by passing `color` to subfunc kwargs
    {reg, scatter, dist, text}_kws: dicts
        further keyword arguments for the constituent plots


    """
    # Interperet inputs
    if data is not None:
        xlabel, ylabel = x, y
        x = np.array(data[x])
        y = np.array(data[y])

    # Set up the figure and axes
    size = 6 if size is None else size
    fig = plt.figure(figsize=(size, size))
    ax_scatter = fig.add_axes([0.05, 0.05, 0.75, 0.75])
    ax_x_marg = fig.add_axes([0.05, 0.82, 0.75, 0.13])
    ax_y_marg = fig.add_axes([0.82, 0.05, 0.13, 0.75])

    # Plot the scatter
    if scatter_kws is None:
        scatter_kws = {}
    if color is not None and "color" not in scatter_kws:
        scatter_kws.update(color=color)
    marker = scatter_kws.pop("markerstyle", "o")
    alpha_maker = stats.norm(0, 100)
    alpha = alpha_maker.pdf(len(x)) / alpha_maker.pdf(0)
    alpha = max(alpha, .1)
    alpha = scatter_kws.pop("alpha", alpha)
    ax_scatter.plot(x, y, marker, alpha=alpha, mew=0, **scatter_kws)
    ax_scatter.set_xlabel(xlabel)
    ax_scatter.set_ylabel(ylabel)

    # Marginal plots using our distplot function
    if dist_kws is None:
        dist_kws = {}
    if color is not None and "color" not in dist_kws:
        dist_kws.update(color=color)
    if "legend" not in dist_kws:
        dist_kws["legend"] = False
    distplot(x, ax=ax_x_marg, **dist_kws)
    distplot(y, ax=ax_y_marg, vertical=True, **dist_kws)
    for ax in [ax_x_marg, ax_y_marg]:
        ax.set_xticklabels([])
        ax.set_yticklabels([])

    # Regression line plot
    xlim = ax_scatter.get_xlim()
    a, b = np.polyfit(x, y, 1)
    if reg_kws is None:
        reg_kws = {}
    reg_color = reg_kws.pop("color", "#222222")
    ax_scatter.plot(xlim, np.polyval([a, b], xlim),
                    color=reg_color, **reg_kws)

    # Bootstrapped regression standard error
    if ci is not None:
        xx = np.linspace(xlim[0], xlim[1], 100)

        def _bootstrap_reg(x, y):
            fit = np.polyfit(x, y, 1)
            return np.polyval(fit, xx)

        boots = moss.bootstrap(x, y, func=_bootstrap_reg)
        ci_lims = [50 - ci / 2., 50 + ci / 2.]
        ci_band = moss.percentiles(boots, ci_lims, axis=0)
        ax_scatter.fill_between(xx, *ci_band, color=reg_color, alpha=.15)
        ax_scatter.set_xlim(xlim)

    # Calcluate a correlation statistic and p value
    r, p = corr_func(x, y)
    msg = "%s: %.3f (p=%.3g%s)" % (corr_func.__name__, r, p, moss.sig_stars(p))
    if annotloc is None:
        xmin, xmax = xlim
        x_range = xmax - xmin
        if r < 0:
            xloc, align = xmax - x_range * .02, "right"
        else:
            xloc, align = xmin + x_range * .02, "left"
        ymin, ymax = ax_scatter.get_ylim()
        y_range = ymax - ymin
        yloc = ymax - y_range * .02
    else:
        if len(annotloc) == 3:
            xloc, yloc, align = annotloc
        else:
            xloc, yloc = annotloc
            align = "left"
    if text_kws is None:
        text_kws = {}
    ax_scatter.text(xloc, yloc, msg, ha=align, va="top", **text_kws)

    # Set the axes on the marginal plots
    ax_x_marg.set_xlim(ax_scatter.get_xlim())
    ax_x_marg.set_yticks([])
    ax_y_marg.set_ylim(ax_scatter.get_ylim())
    ax_y_marg.set_xticks([])
Ejemplo n.º 19
0
def tsplot(x, data, err_style="ci_band", ci=68, interpolate=True,
           estimator=np.mean, n_boot=10000, smooth=False,
           err_palette=None, ax=None, err_kws=None, **kwargs):
    """Plot timeseries from a set of observations.

    Parameters
    ----------
    x : n_tp array
        x values
    data : n_obs x n_tp array
        array of timeseries data where first axis is observations. other
        objects (e.g. DataFrames) are converted to an array if possible
    err_style : string or list of strings
        names of ways to plot uncertainty across observations from set of
       {ci_band, ci_bars, boot_traces, book_kde, obs_traces, obs_points}
    ci : int or list of ints
        confidence interaval size(s). if a list, it will stack the error
        plots for each confidence interval
    estimator : callable
        function to determine centralt tendency and to pass to bootstrap
        must take an ``axis`` argument
    n_boot : int
        number of bootstrap iterations
    smooth : boolean
        whether to perform a smooth bootstrap (resample from KDE)
    ax : axis object, optional
        plot in given axis; if None creates a new figure
    err_kws : dict, optional
        keyword argument dictionary passed through to matplotlib
        function generating the error plot
    kwargs : further keyword arguments for main call to plot()

    Returns
    -------
    ax : matplotlib axis
        axis with plot data

    """
    if ax is None:
        ax = plt.gca()

    if err_kws is None:
        err_kws = {}

    # Bootstrap the data for confidence intervals
    data = np.asarray(data)
    boot_data = moss.bootstrap(data, n_boot=n_boot, smooth=smooth,
                               axis=0, func=estimator)
    ci_list = hasattr(ci, "__iter__")
    if not ci_list:
        ci = [ci]
    ci_vals = [(50 - w / 2, 50 + w / 2) for w in ci]
    cis = [moss.percentiles(boot_data, v, axis=0) for v in ci_vals]
    central_data = estimator(data, axis=0)

    # Plot the timeseries line to get its color
    line, = ax.plot(x, central_data, **kwargs)
    color = line.get_color()
    line.remove()
    kwargs.pop("color", None)

    # Use subroutines to plot the uncertainty
    if not hasattr(err_style, "__iter__"):
        err_style = [err_style]
    for style in err_style:

        # Grab the function from the global environment
        try:
            plot_func = globals()["_plot_%s" % style]
        except KeyError:
            raise ValueError("%s is not a valid err_style" % style)

        # Possibly set up to plot each observation in a different color
        if err_palette is not None and "obs" in style:
            orig_color = color
            color = color_palette(err_palette, len(data), desat=.99)

        plot_kwargs = dict(ax=ax, x=x, data=data,
                           boot_data=boot_data,
                           central_data=central_data,
                           color=color, err_kws=err_kws)

        for ci_i in cis:
            plot_kwargs["ci"] = ci_i
            plot_func(**plot_kwargs)

        if err_palette is not None and "obs" in style:
            color = orig_color
    # Replot the central trace so it is prominent
    marker = kwargs.pop("marker", "" if interpolate else "o")
    linestyle = kwargs.pop("linestyle", "-" if interpolate else "")
    ax.plot(x, central_data, color=color,
            marker=marker, linestyle=linestyle, **kwargs)

    return ax
Ejemplo n.º 20
0
w_boot = zeros((iterations, len(w_ols)))
for i in xrange(iterations):

    # Get an index vector to sample with replacement
    samp = randint(0, n_obs, n_obs)

    # Resample data and model
    samp_X = X[samp, :]
    samp_y = y[samp]

    # Fit the model on this iteration
    w_boot[i, :] = ols_fit(samp_X, samp_y)

# Get the 95% confidence interval for each weight (across each column in
# the design matrix)
w_ols_ci = moss.percentiles(w_boot, [2.5, 97.5], axis=0)

# The bar() function expects errorbar coordinates to be relative to bar height
# We have a convenience function in our plotting library to convert these
ebar_coords = seaborn.ci_to_errsize(w_ols_ci, w_ols)

# Plot the confidence intervals as error bars on our barplot from above
bar(arange(5) + .1, w, .4, label="actual weights")
bar(arange(5) + .5,
    w_ols,
    .4,
    yerr=ebar_coords,
    color=colors[1],
    ecolor="gray",
    label="estimated weights")
Ejemplo n.º 21
0
def violin(vals, groupby=None, inner="box", color=None, positions=None,
           names=None, widths=.8, alpha=None, join_rm=False, kde_thresh=1e-2,
           inner_kws=None, ax=None, **kwargs):
    """Create a violin plot (a combination of boxplot and KDE plot).

    Parameters
    ----------
    vals : array or sequence of arrays
        data to plot
    groupby : grouping object
        if `vals` is a Series, this is used to group
    inner : box | sticks | points
        plot quartiles or individual sample values inside violin
    color : mpl color, sequence of colors, or seaborn palette name
        inner violin colors
    positions : number or sequence of numbers
        position of first violin or positions of each violin
    widths : float
        width of each violin at maximum density
    alpha : float, optional
        transparancy of violin fill
    join_rm : boolean, optional
        if True, positions in the input arrays are treated as repeated
        measures and are joined with a line plot
    names : list of strings, optional
        names to plot on x axis, otherwise plots numbers
    kde_thresh : float, optional
        proportion of maximum at which to threshold the KDE curve
    inner_kws : dict, optional
        keyword arugments for inner plot
    ax : matplotlib axis, optional
        axis to plot on, otherwise creates new one

    Returns
    -------
    ax : matplotlib axis
        axis with violin plot

    """
    if ax is None:
        ax = plt.gca()

    if isinstance(vals, pd.DataFrame):
        if names is None:
            names = vals.columns
        if vals.columns.name is not None:
            xlabel = vals.columns.name
        else:
            xlabel = None
        ylabel = None
        vals = vals.values

    elif isinstance(vals, pd.Series) and groupby is not None:
        if hasattr(groupby, "name"):
            xlabel = groupby.name
        ylabel = vals.name
        grouped_vals = pd.groupby(vals, groupby).values
        if names is None:
            names = grouped_vals.index
        vals = grouped_vals.values
    else:
        xlabel = None
        ylabel = None

    if hasattr(vals, 'shape'):
        if len(vals.shape) == 1:
            if hasattr(vals[0], 'shape'):
                vals = list(vals)
            else:
                vals = [vals]
        elif len(vals.shape) == 2:
            nr, nc = vals.shape
            if nr == 1:
                vals = [vals]
            elif nc == 1:
                vals = [vals.ravel()]
            else:
                vals = [vals[:, i] for i in xrange(nc)]
        else:
            raise ValueError("Input x can have no more than 2 dimensions")
    if not hasattr(vals[0], '__len__'):
        vals = [vals]

    vals = [np.asarray(a, float) for a in vals]

    if color is None:
        colors = husl_palette(len(vals), l=.7)
    else:
        if hasattr(color, "__iter__") and not isinstance(color, tuple):
            colors = color
        else:
            try:
                color = mpl.colors.colorConverter.to_rgb(color)
                colors = [color for _ in vals]
            except ValueError:
                colors = color_palette(color, len(vals))

    colors = [mpl.colors.colorConverter.to_rgb(c) for c in colors]
    colors = [desaturate(c, .7) for c in colors]

    light_vals = [colorsys.rgb_to_hls(*c)[1] for c in colors]
    l = min(light_vals) * .6
    gray = (l, l, l)

    if inner_kws is None:
        inner_kws = {}

    if positions is None:
        positions = np.arange(1, len(vals) + 1)
    elif not hasattr(positions, "__iter__"):
        positions = np.arange(positions, len(vals) + positions)

    in_alpha = inner_kws.pop("alpha", .6 if inner == "points" else 1)
    in_alpha *= 1 if alpha is None else alpha
    in_color = inner_kws.pop("color", gray)
    in_marker = inner_kws.pop("marker", ".")
    in_lw = inner_kws.pop("lw", 1.5 if inner == "box" else .8)

    for i, a in enumerate(vals):
        x = positions[i]
        kde = stats.gaussian_kde(a)
        y = _kde_support(a, kde, 1000, kde_thresh)
        dens = kde(y)
        scl = 1 / (dens.max() / (widths / 2))
        dens *= scl

        ax.fill_betweenx(y, x - dens, x + dens, alpha=alpha, color=colors[i])
        if inner == "box":
            for quant in moss.percentiles(a, [25, 75]):
                q_x = kde(quant) * scl
                q_x = [x - q_x, x + q_x]
                ax.plot(q_x, [quant, quant], color=in_color,
                        linestyle=":", linewidth=in_lw, **inner_kws)
            med = np.median(a)
            m_x = kde(med) * scl
            m_x = [x - m_x, x + m_x]
            ax.plot(m_x, [med, med], color=in_color,
                    linestyle="--", linewidth=in_lw, **inner_kws)
        elif inner == "stick":
            x_vals = kde(a) * scl
            x_vals = [x - x_vals, x + x_vals]
            ax.plot(x_vals, [a, a], color=in_color,
                    linewidth=in_lw, alpha=in_alpha, **inner_kws)
        elif inner == "points":
            x_vals = [x for _ in a]
            ax.plot(x_vals, a, in_marker, color=in_color,
                    alpha=in_alpha, mew=0, **inner_kws)
        for side in [-1, 1]:
            ax.plot((side * dens) + x, y, c=gray, linewidth=1.5)

    if join_rm:
        ax.plot(range(1, len(vals) + 1), vals,
                color=in_color, alpha=2. / 3)

    ax.set_xticks(positions)
    if names is not None:
        if len(vals) != len(names):
            raise ValueError("Length of names list must match nuber of bins")
        ax.set_xticklabels(names)
    ax.set_xlim(positions[0] - .5, positions[-1] + .5)

    if xlabel is not None:
        ax.set_xlabel(xlabel)
    if ylabel is not None:
        ax.set_ylabel(ylabel)

    ax.xaxis.grid(False)
    return ax
Ejemplo n.º 22
0
def regplot(x, y, data=None, corr_func=stats.pearsonr, func_name=None,
            xlabel="", ylabel="", ci=95, size=None, annotloc=None, color=None,
            reg_kws=None, scatter_kws=None, dist_kws=None, text_kws=None):
    """Scatterplot with regresion line, marginals, and correlation value.

    Parameters
    ----------
    x : sequence or string
        Independent variable.
    y : sequence or string
        Dependent variable.
    data : dataframe, optional
        If dataframe is given, x, and y are interpreted as string keys
        for selecting to dataframe column names.
    corr_func : callable, optional
        Correlation function; expected to take two arrays and return a
        numeric or (statistic, pval) tuple.
    func_name : string, optional
        Use in lieu of function name for fit statistic annotation.
    xlabel, ylabel : string, optional
        Axis label names if inputs are not Pandas objects or to override.
    ci : int or None
        Confidence interval for the regression estimate.
    size: int
        Figure size (will be a square; only need one int).
    annotloc : two or three tuple
        Specified with (xpos, ypos [, horizontalalignment]).
    color : matplotlib color scheme
        Color of everything but the regression line; can be overridden by
        passing `color` to subfunc kwargs.
    {reg, scatter, dist, text}_kws: dicts
        Further keyword arguments for the constituent plots.

    """
    # Interperet inputs
    if data is not None:
        if not xlabel:
            xlabel = x
        if not ylabel:
            ylabel = y
        x = data[x].values
        y = data[y].values
    else:
        if hasattr(x, "name") and not xlabel:
            if x.name is not None:
                xlabel = x.name
        if hasattr(y, "name") and not ylabel:
            if y.name is not None:
                ylabel = y.name
        x = np.asarray(x)
        y = np.asarray(y)

    # Set up the figure and axes
    size = mpl.rcParams["figure.figsize"][0] if size is None else size
    fig = plt.figure(figsize=(size, size))
    ax_scatter = fig.add_axes([0.05, 0.05, 0.75, 0.75])
    ax_x_marg = fig.add_axes([0.05, 0.82, 0.75, 0.13])
    ax_y_marg = fig.add_axes([0.82, 0.05, 0.13, 0.75])

    # Plot the scatter
    if scatter_kws is None:
        scatter_kws = {}
    if color is not None and "color" not in scatter_kws:
        scatter_kws.update(color=color)
    marker = scatter_kws.pop("markerstyle", "o")
    alpha_maker = stats.norm(0, 100)
    alpha = alpha_maker.pdf(len(x)) / alpha_maker.pdf(0)
    alpha = max(alpha, .1)
    alpha = scatter_kws.pop("alpha", alpha)
    ax_scatter.plot(x, y, marker, alpha=alpha, mew=0, **scatter_kws)
    ax_scatter.set_xlabel(xlabel)
    ax_scatter.set_ylabel(ylabel)

    # Marginal plots using our distplot function
    if dist_kws is None:
        dist_kws = {}
    if color is not None and "color" not in dist_kws:
        dist_kws.update(color=color)
    dist_kws["xlabel"] = False
    distplot(x, ax=ax_x_marg, **dist_kws)
    distplot(y, ax=ax_y_marg, vertical=True, **dist_kws)
    for ax in [ax_x_marg, ax_y_marg]:
        ax.set_xticklabels([])
        ax.set_yticklabels([])

    # Regression line plot
    xlim = ax_scatter.get_xlim()
    a, b = np.polyfit(x, y, 1)
    if reg_kws is None:
        reg_kws = {}
    reg_color = reg_kws.pop("color", "#222222")
    ax_scatter.plot(xlim, np.polyval([a, b], xlim),
                    color=reg_color, **reg_kws)

    # Bootstrapped regression standard error
    if ci is not None:
        xx = np.linspace(xlim[0], xlim[1], 100)

        def _bootstrap_reg(x, y):
            fit = np.polyfit(x, y, 1)
            return np.polyval(fit, xx)

        boots = moss.bootstrap(x, y, func=_bootstrap_reg)
        ci_lims = [50 - ci / 2., 50 + ci / 2.]
        ci_band = moss.percentiles(boots, ci_lims, axis=0)
        ax_scatter.fill_between(xx, *ci_band, color=reg_color, alpha=.15)
        ax_scatter.set_xlim(xlim)

    # Calcluate a fit statistic and p value
    if func_name is None:
        func_name = corr_func.__name__
    out = corr_func(x, y)
    try:
        s, p = out
        msg = "%s: %.3f (p=%.3g%s)" % (func_name, s, p, moss.sig_stars(p))
    except TypeError:
        s = corr_func(x, y)
        msg = "%s: %.3f" % (func_name, s)

    if annotloc is None:
        xmin, xmax = xlim
        x_range = xmax - xmin
        # Assume the fit statistic is correlation-esque for some
        # intuition on where the fit annotation should go
        if s < 0:
            xloc, align = xmax - x_range * .02, "right"
        else:
            xloc, align = xmin + x_range * .02, "left"
        ymin, ymax = ax_scatter.get_ylim()
        y_range = ymax - ymin
        yloc = ymax - y_range * .02
    else:
        if len(annotloc) == 3:
            xloc, yloc, align = annotloc
        else:
            xloc, yloc = annotloc
            align = "left"
    if text_kws is None:
        text_kws = {}
    ax_scatter.text(xloc, yloc, msg, ha=align, va="top", **text_kws)

    # Set the axes on the marginal plots
    ax_x_marg.set_xlim(ax_scatter.get_xlim())
    ax_x_marg.set_yticks([])
    ax_y_marg.set_ylim(ax_scatter.get_ylim())
    ax_y_marg.set_xticks([])
Ejemplo n.º 23
0
Archivo: model.py Proyecto: toddt/lyman
def report_model(timeseries, sigmasquareds_file, zstat_files, r2_files):
    """Build the model report images, mostly from axial montages."""
    sns.set()

    # Load the timeseries, get a mean image
    ts_img = nib.load(timeseries)
    ts_aff, ts_header = ts_img.get_affine(), ts_img.get_header()
    ts_data = ts_img.get_data()
    mean_data = ts_data.mean(axis=-1)
    mlow, mhigh = 0, moss.percentiles(mean_data, 98)

    # Get the plot params. OMG I need a general function for this
    n_slices = mean_data.shape[-1]
    n_row, n_col = n_slices // 8, 8
    start = n_slices % n_col // 2
    figsize = (10, 1.4 * n_row)
    spkws = dict(nrows=n_row, ncols=n_col, figsize=figsize, facecolor="k")
    savekws = dict(dpi=100, bbox_inches="tight", facecolor="k", edgecolor="k")

    def add_colorbar(f, cmap, low, high, left, width, fmt):
        cbar = np.outer(np.arange(0, 1, .01), np.ones(10))
        cbar_ax = f.add_axes([left, 0, width, .03])
        cbar_ax.imshow(cbar.T, aspect="auto", cmap=cmap)
        cbar_ax.axis("off")
        f.text(left - .01, .018, fmt % low, ha="right", va="center",
               color="white", size=13, weight="demibold")
        f.text(left + width + .01, .018, fmt % high, ha="left",
               va="center", color="white", size=13, weight="demibold")

    report = []

    # Plot the residual image (sigmasquareds)
    ss = nib.load(sigmasquareds_file).get_data()
    sslow, sshigh = moss.percentiles(ss, [2, 98])
    ss[mean_data == 0] = np.nan
    f, axes = plt.subplots(**spkws)
    for i, ax in enumerate(axes.ravel(), start):
        ax.imshow(mean_data[..., i].T, cmap="gray",
                  vmin=mlow, vmax=mhigh, interpolation="nearest")
        ax.imshow(ss[..., i].T, cmap="PuRd_r",
                  vmin=sslow, vmax=sshigh, alpha=.7)
        ax.axis("off")
    add_colorbar(f, "PuRd_r", sslow, sshigh, .35, .3, "%d")
    ss_png = op.abspath("sigmasquareds.png")
    f.savefig(ss_png, **savekws)
    plt.close(f)
    report.append(ss_png)

    # Now plot each zstat file
    for z_i, zname in enumerate(zstat_files, 1):
        zdata = nib.load(zname).get_data()
        pos = zdata.copy()
        pos[pos < 2.3] = np.nan
        neg = zdata.copy()
        neg[neg > -2.3] = np.nan
        zlow = 2.3
        zhigh = max(np.abs(zdata).max(), 3.71)
        f, axes = plt.subplots(**spkws)
        for i, ax in enumerate(axes.ravel()):
            ax.imshow(mean_data[..., i].T, cmap="gray",
                      vmin=mlow, vmax=mhigh)
            ax.imshow(pos[..., i].T, cmap="Reds_r",
                      vmin=zlow, vmax=zhigh)
            ax.imshow(neg[..., i].T, cmap="Blues",
                      vmin=-zhigh, vmax=-zlow)
            ax.axis("off")
        add_colorbar(f, "Blues", -zhigh, -zlow, .15, .3, "%.1f")
        add_colorbar(f, "Reds_r", zlow, zhigh, .55, .3, "%.1f")

        fname = op.abspath("zstat%d.png" % z_i)
        f.savefig(fname, **savekws)
        plt.close(f)
        report.append(fname)

    # Now the r_2 files
    for rname, cmap in zip(r2_files, ["GnBu_r", "YlGn_r", "OrRd_r"]):
        data = nib.load(rname).get_data()
        rhigh = moss.percentiles(np.nan_to_num(data), 99)
        f, axes = plt.subplots(**spkws)
        for i, ax in enumerate(axes.ravel(), start):
            ax.imshow(mean_data[..., i].T, cmap="gray",
                      vmin=mlow, vmax=mhigh, interpolation="nearest")
            ax.imshow(data[..., i].T, cmap=cmap, vmin=0, vmax=rhigh, alpha=.7)
            ax.axis("off")
        add_colorbar(f, cmap, 0, rhigh, .35, .3, "%.2f")

        fname = op.abspath(op.basename(rname).replace(".nii.gz", ".png"))
        f.savefig(fname, **savekws)
        plt.close(f)
        report.append(fname)

    return report
Ejemplo n.º 24
0
def tsplot(x,
           data,
           err_style=["ci_band"],
           ci=68,
           interpolate=True,
           estimator=np.mean,
           n_boot=10000,
           smooth=False,
           err_palette=None,
           ax=None,
           **kwargs):
    """Plot timeseries from a set of observations.

    Parameters
    ----------
    x : n_tp array
        x values
    data : n_obs x n_tp array
        array of timeseries data where first axis is e.g. subjects
    err_style : list of strings
        names of ways to plot uncertainty across observations from set of
       {ci_band, ci_bars, boot_traces, book_kde, obs_traces, obs_points}
    ci : int or list of ints
        confidence interaval size(s). if a list, it will stack the error
        plots for each confidence interval
    estimator : callable
        function to determine centralt tendency and to pass to bootstrap
        must take an ``axis`` argument
    n_boot : int
        number of bootstrap iterations
    smooth : boolean
        whether to perform a smooth bootstrap (resample from KDE)
    ax : axis object, optional
        plot in given axis; if None creates a new figure
    kwargs : further keyword arguments for main call to plot()

    Returns
    -------
    ax : matplotlib axis
        axis with plot data

    """
    if ax is None:
        ax = plt.subplot(111)

    # Bootstrap the data for confidence intervals
    boot_data = moss.bootstrap(data,
                               n_boot=n_boot,
                               smooth=smooth,
                               axis=0,
                               func=estimator)
    ci_list = hasattr(ci, "__iter__")
    if not ci_list:
        ci = [ci]
    ci_vals = [(50 - w / 2, 50 + w / 2) for w in ci]
    cis = [moss.percentiles(boot_data, ci, axis=0) for ci in ci_vals]
    central_data = estimator(data, axis=0)

    # Plot the timeseries line to get its color
    line, = ax.plot(x, central_data, **kwargs)
    color = line.get_color()
    line.remove()
    kwargs.pop("color", None)

    # Use subroutines to plot the uncertainty
    for style in err_style:

        # Grab the function from the global environment
        try:
            plot_func = globals()["_plot_%s" % style]
        except KeyError:
            raise ValueError("%s is not a valid err_style" % style)

        # Possibly set up to plot each observation in a different color
        if err_palette is not None and "obs" in style:
            orig_color = color
            color = color_palette(err_palette, len(data), desat=.99)

        plot_kwargs = dict(ax=ax,
                           x=x,
                           data=data,
                           boot_data=boot_data,
                           central_data=central_data,
                           color=color)

        for ci_i in cis:
            plot_kwargs["ci"] = ci_i
            plot_func(**plot_kwargs)

        if err_palette is not None and "obs" in style:
            color = orig_color
    # Replot the central trace so it is prominent
    marker = kwargs.pop("marker", "" if interpolate else "o")
    linestyle = kwargs.pop("linestyle", "-" if interpolate else "")
    ax.plot(x,
            central_data,
            color=color,
            marker=marker,
            linestyle=linestyle,
            **kwargs)

    return ax
Ejemplo n.º 25
0
def lmplot(x, y, data, color=None, row=None, col=None, col_wrap=None,
           x_estimator=None, x_ci=95, x_bins=None, n_boot=5000, fit_reg=True,
           order=1, ci=95, logistic=False, truncate=False,
           x_partial=None, y_partial=None, x_jitter=None, y_jitter=None,
           sharex=True, sharey=True, palette="husl", size=None,
           scatter_kws=None, line_kws=None, palette_kws=None):
    """Plot a linear model with faceting, color binning, and other options.

    Parameters
    ----------
    x, y : strings
        Column names in `data` DataFrame for x and y variables.
    data : DataFrame
        Dource of data for the model.
    color : string, optional
        DataFrame column name to group the model by color.
    row, col : strings, optional
        DataFrame column names to make separate plot facets.
    col_wrap : int, optional
        Wrap col variable at this width - cannot be used with row facet.
    x_estimator : callable, optional
        Interpret X values as factor labels and use this function
        to plot the point estimate and bootstrapped CI.
    x_ci : int optional
        Size of confidence interval for x_estimator error bars.
    x_bins : sequence of floats, optional
        Bin the x variable with these values. Implies that x_estimator is
        mean, unless otherwise provided.
    n_boot : int, optional
        Number of bootstrap iterations to perform.
    fit_reg : bool, optional
        If True fit a regression model by color/row/col and plot.
    order : int, optional
        Order of the regression polynomial to fit.
    ci : int, optional
        Confidence interval for the regression line.
    logistic : bool, optional
        Fit the regression line with logistic regression.
    truncate : bool, optional
        If True, only fit line from data min to data max.
    {x, y}_partial : string or list of strings, optional
        Regress these variables out of the factors before plotting.
    {x, y}_jitter : float, optional
        Parameters for uniformly distributed random noise added to positions.
    sharex, sharey : bools, optional
        Only relevant if faceting; passed to plt.subplots.
    palette : seaborn color palette argument
        If using separate plots by color, draw with this color palette.
    size : float, optional
        Size (plots are square) for each plot facet.
    {scatter, line}_kws : dictionary
        Keyword arguments to pass to the underlying plot functions.
    palette_kws : dictionary
        Keyword arguments for seaborn.color_palette.

    """
    # TODO
    # - legend when fit_line is False

    # First sort out the general figure layout
    if size is None:
        size = mpl.rcParams["figure.figsize"][1]

    if col is None and col_wrap is not None:
        raise ValueError("Need column facet variable for `col_wrap`")
    if row is not None and col_wrap is not None:
        raise ValueError("Cannot facet rows when using `col_wrap`")

    nrow = 1 if row is None else len(data[row].unique())
    ncol = 1 if col is None else len(data[col].unique())

    if col_wrap is not None:
        ncol = col_wrap
        nrow = int(np.ceil(len(data[col].unique()) / col_wrap))

    f, axes = plt.subplots(nrow, ncol, sharex=sharex, sharey=sharey,
                           figsize=(size * ncol, size * nrow))
    axes = np.atleast_2d(axes).reshape(nrow, ncol)

    if nrow == 1 or col_wrap is not None:
        row_masks = [np.repeat(True, len(data))]
    else:
        row_vals = np.sort(data[row].unique())
        row_masks = [data[row] == val for val in row_vals]

    if ncol == 1:
        col_masks = [np.repeat(True, len(data))]
    else:
        col_vals = np.sort(data[col].unique())
        col_masks = [data[col] == val for val in col_vals]

    if x_bins is not None:
        x_estimator = np.mean if x_estimator is None else x_estimator
        x_bins = np.c_[x_bins]

    if x_partial is not None:
        if not isinstance(x_partial, list):
            x_partial = [x_partial]
    if y_partial is not None:
        if not isinstance(y_partial, list):
            y_partial = [y_partial]

    if palette_kws is None:
        palette_kws = {}

    # Sort out the plot colors
    color_factor = color
    if color is None:
        hue_masks = [np.repeat(True, len(data))]
        colors = ["#222222"]
    else:
        hue_vals = np.sort(data[color].unique())
        hue_masks = [data[color] == val for val in hue_vals]
        colors = color_palette(palette, len(hue_masks), **palette_kws)

    # Default keyword arguments for plot components
    if scatter_kws is None:
        scatter_kws = {}
    if line_kws is None:
        line_kws = {}

    # First walk through the facets and plot the scatters
    scatter_ms = scatter_kws.pop("ms", 4)
    scatter_mew = mew = scatter_kws.pop("mew", 0)
    scatter_alpha = mew = scatter_kws.pop("alpha", .77)
    for row_i, row_mask in enumerate(row_masks):
        for col_j, col_mask in enumerate(col_masks):
            if col_wrap is not None:
                f_row = col_j // ncol
                f_col = col_j % ncol
            else:
                f_row, f_col = row_i, col_j
            ax = axes[f_row, f_col]
            if f_row + 1 == nrow:
                ax.set_xlabel(x)
            if f_col == 0:
                ax.set_ylabel(y)

            # Title the plot if we are faceting
            title = ""
            if row is not None:
                title += "%s = %s" % (row, row_vals[row_i])
            if row is not None and col is not None:
                title += " | "
            if col is not None:
                title += "%s = %s" % (col, col_vals[col_j])
            if size < 3:
                title = title.replace(" | ", "\n")
            ax.set_title(title)

            for hue_k, hue_mask in enumerate(hue_masks):
                color = colors[hue_k]
                data_ijk = data[row_mask & col_mask & hue_mask]

                if x_estimator is not None:
                    ms = scatter_kws.pop("ms", 7)
                    mew = scatter_kws.pop("mew", 0)
                    if x_bins is None:
                        x_vals = data_ijk[x].unique()
                        x_data = data_ijk[x]
                    else:
                        dist = distance.cdist(np.c_[data_ijk[x]], x_bins)
                        x_vals = x_bins.ravel()
                        x_data = x_bins[np.argmin(dist, axis=1)].ravel()

                    y_vals = data_ijk[y]

                    if y_partial is not None:
                        for var in y_partial:
                            conf = data_ijk[var]
                            conf -= conf.mean()
                            y_mean = y_vals.mean()
                            y_vals = moss.vector_reject(y_vals - y_mean, conf)
                            y_vals += y_mean

                    y_grouped = [np.array(y_vals[x_data == v])
                                 for v in x_vals]

                    y_est = [x_estimator(y_i) for y_i in y_grouped]
                    y_boots = [moss.bootstrap(np.array(y_i),
                                              func=x_estimator,
                                              n_boot=n_boot)
                               for y_i in y_grouped]
                    ci_lims = [50 - x_ci / 2., 50 + x_ci / 2.]
                    y_ci = [moss.percentiles(y_i, ci_lims) for y_i in y_boots]
                    y_error = ci_to_errsize(np.transpose(y_ci), y_est)

                    ax.plot(x_vals, y_est, "o", mew=mew, ms=ms,
                            color=color, **scatter_kws)
                    ax.errorbar(x_vals, y_est, y_error,
                                fmt=None, ecolor=color)
                else:
                    x_ = data_ijk[x]
                    y_ = data_ijk[y]

                    if x_partial is not None:
                        for var in x_partial:
                            conf = data_ijk[var]
                            conf -= conf.mean()
                            x_mean = x_.mean()
                            x_ = moss.vector_reject(x_ - x_mean, conf)
                            x_ += x_mean
                    if y_partial is not None:
                        for var in y_partial:
                            conf = data_ijk[var]
                            conf -= conf.mean()
                            y_mean = y_.mean()
                            y_ = moss.vector_reject(y_ - y_mean, conf)
                            y_ += y_mean

                    if x_jitter is not None:
                        x_ += np.random.uniform(-x_jitter, x_jitter, x_.shape)
                    if y_jitter is not None:
                        y_ += np.random.uniform(-y_jitter, y_jitter, y_.shape)
                    ax.plot(x_, y_, "o", color=color, alpha=scatter_alpha,
                            mew=scatter_mew, ms=scatter_ms, **scatter_kws)

    for ax_i in np.ravel(axes):
        ax_i.set_xmargin(.05)
        ax_i.autoscale_view()

    # Now walk through again and plot the regression estimate
    # and a confidence interval for the regression line
    if fit_reg:
        for row_i, row_mask in enumerate(row_masks):
            for col_j, col_mask in enumerate(col_masks):
                if col_wrap is not None:
                    f_row = col_j // ncol
                    f_col = col_j % ncol
                else:
                    f_row, f_col = row_i, col_j
                ax = axes[f_row, f_col]
                xlim = ax.get_xlim()

                for hue_k, hue_mask in enumerate(hue_masks):
                    color = colors[hue_k]
                    data_ijk = data[row_mask & col_mask & hue_mask]
                    x_vals = np.array(data_ijk[x])
                    y_vals = np.array(data_ijk[y])
                    if not len(x_vals):
                        continue

                    # Sort out the limit of the fit
                    if truncate:
                        xx = np.linspace(x_vals.min(),
                                         x_vals.max(), 100)
                    else:
                        xx = np.linspace(xlim[0], xlim[1], 100)
                    xx_ = sm.add_constant(xx, prepend=True)

                    # Inner function to bootstrap the regression
                    def _regress(x, y):
                        if logistic:
                            x_ = sm.add_constant(x, prepend=True)
                            fit = sm.GLM(y, x_,
                                         family=sm.families.Binomial()).fit()
                            reg = fit.predict(xx_)
                        else:
                            fit = np.polyfit(x, y, order)
                            reg = np.polyval(fit, xx)
                        return reg

                    # Remove nuisance variables with vector rejection
                    if x_partial is not None:
                        for var in x_partial:
                            conf = data_ijk[var]
                            conf -= conf.mean()
                            x_mean = x_vals.mean()
                            x_vals = moss.vector_reject(x_vals - x_mean, conf)
                            x_vals += x_mean
                    if y_partial is not None:
                        for var in y_partial:
                            conf = data_ijk[var]
                            conf -= conf.mean()
                            y_mean = y_vals.mean()
                            y_vals = moss.vector_reject(y_vals - y_mean, conf)
                            y_vals += y_mean

                    # Regression line confidence interval
                    if ci is not None:
                        ci_lims = [50 - ci / 2., 50 + ci / 2.]
                        boots = moss.bootstrap(x_vals, y_vals,
                                               func=_regress,
                                               n_boot=n_boot)
                        ci_band = moss.percentiles(boots, ci_lims, axis=0)
                        ax.fill_between(xx, *ci_band, color=color, alpha=.15)

                    # Regression line
                    reg = _regress(x_vals, y_vals)
                    if color_factor is None:
                        label = ""
                    else:
                        label = hue_vals[hue_k]
                    ax.plot(xx, reg, color=color,
                            label=str(label), **line_kws)
                    ax.set_xlim(xlim)

    # Plot the legend on the upper left facet and adjust the layout
    if color_factor is not None and color_factor not in [row, col]:
        axes[0, 0].legend(loc="best", title=color_factor)
    plt.tight_layout()
Ejemplo n.º 26
0
def regplot(x,
            y,
            data=None,
            corr_func=stats.pearsonr,
            xlabel="",
            ylabel="",
            ci=95,
            size=None,
            annotloc=None,
            color=None,
            reg_kws=None,
            scatter_kws=None,
            dist_kws=None,
            text_kws=None):
    """Scatterplot with regreesion line, marginals, and correlation value.

    Parameters
    ----------
    x : sequence
        independent variables
    y : sequence
        dependent variables
    data : dataframe, optional
        if dataframe is given, x, and y are interpreted as
        string keys mapping to dataframe column names
    corr_func : callable, optional
        correlation function; expected to take two arrays
        and return a (statistic, pval) tuple
    xlabel, ylabel : string, optional
        label names
    ci : int or None
        confidence interval for the regression line
    size: int
        figure size (will be a square; only need one int)
    annotloc : two or three tuple
        (xpos, ypos [, horizontalalignment])
    color : matplotlib color scheme
        color of everything but the regression line
        overridden by passing `color` to subfunc kwargs
    {reg, scatter, dist, text}_kws: dicts
        further keyword arguments for the constituent plots


    """
    # Interperet inputs
    if data is not None:
        xlabel, ylabel = x, y
        x = np.array(data[x])
        y = np.array(data[y])

    # Set up the figure and axes
    size = 6 if size is None else size
    fig = plt.figure(figsize=(size, size))
    ax_scatter = fig.add_axes([0.05, 0.05, 0.75, 0.75])
    ax_x_marg = fig.add_axes([0.05, 0.82, 0.75, 0.13])
    ax_y_marg = fig.add_axes([0.82, 0.05, 0.13, 0.75])

    # Plot the scatter
    if scatter_kws is None:
        scatter_kws = {}
    if color is not None and "color" not in scatter_kws:
        scatter_kws.update(color=color)
    marker = scatter_kws.pop("markerstyle", "o")
    alpha_maker = stats.norm(0, 100)
    alpha = alpha_maker.pdf(len(x)) / alpha_maker.pdf(0)
    alpha = max(alpha, .1)
    alpha = scatter_kws.pop("alpha", alpha)
    ax_scatter.plot(x, y, marker, alpha=alpha, mew=0, **scatter_kws)
    ax_scatter.set_xlabel(xlabel)
    ax_scatter.set_ylabel(ylabel)

    # Marginal plots using our distplot function
    if dist_kws is None:
        dist_kws = {}
    if color is not None and "color" not in dist_kws:
        dist_kws.update(color=color)
    if "legend" not in dist_kws:
        dist_kws["legend"] = False
    distplot(x, ax=ax_x_marg, **dist_kws)
    distplot(y, ax=ax_y_marg, vertical=True, **dist_kws)
    for ax in [ax_x_marg, ax_y_marg]:
        ax.set_xticklabels([])
        ax.set_yticklabels([])

    # Regression line plot
    xlim = ax_scatter.get_xlim()
    a, b = np.polyfit(x, y, 1)
    if reg_kws is None:
        reg_kws = {}
    reg_color = reg_kws.pop("color", "#222222")
    ax_scatter.plot(xlim, np.polyval([a, b], xlim), color=reg_color, **reg_kws)

    # Bootstrapped regression standard error
    if ci is not None:
        xx = np.linspace(xlim[0], xlim[1], 100)

        def _bootstrap_reg(x, y):
            fit = np.polyfit(x, y, 1)
            return np.polyval(fit, xx)

        boots = moss.bootstrap(x, y, func=_bootstrap_reg)
        ci_lims = [50 - ci / 2., 50 + ci / 2.]
        ci_band = moss.percentiles(boots, ci_lims, axis=0)
        ax_scatter.fill_between(xx, *ci_band, color=reg_color, alpha=.15)
        ax_scatter.set_xlim(xlim)

    # Calcluate a correlation statistic and p value
    r, p = corr_func(x, y)
    msg = "%s: %.3f (p=%.3g%s)" % (corr_func.__name__, r, p, moss.sig_stars(p))
    if annotloc is None:
        xmin, xmax = xlim
        x_range = xmax - xmin
        if r < 0:
            xloc, align = xmax - x_range * .02, "right"
        else:
            xloc, align = xmin + x_range * .02, "left"
        ymin, ymax = ax_scatter.get_ylim()
        y_range = ymax - ymin
        yloc = ymax - y_range * .02
    else:
        if len(annotloc) == 3:
            xloc, yloc, align = annotloc
        else:
            xloc, yloc = annotloc
            align = "left"
    if text_kws is None:
        text_kws = {}
    ax_scatter.text(xloc, yloc, msg, ha=align, va="top", **text_kws)

    # Set the axes on the marginal plots
    ax_x_marg.set_xlim(ax_scatter.get_xlim())
    ax_x_marg.set_yticks([])
    ax_y_marg.set_ylim(ax_scatter.get_ylim())
    ax_y_marg.set_xticks([])
Ejemplo n.º 27
0
from scipy.optimize import curve_fit

scatter(x_pop, y_pop, facecolor='none')
l, r = xlim()
xx = linspace(l, r, 200)
# Loop over each bootstrap iteration and compute that models prediction
# for all of the x values
y_model = []
for i, w in enumerate(w_ols_boot):
    y_i = dot(xx, w_ols_boot[i][0]) + w_ols_boot[i][1]
    y_model.append(y_i)

# Plot the linear prediction and transparent error bars
pcts = [16, 84]
plot(xx, median(y_model, axis=0))
lin_ci = moss.percentiles(y_model, pcts, axis=0)
fill_between(xx, *lin_ci, color=colors[0], alpha=.2)

# Now derive the nonlinear predictions and plot
y_model_nlin = []
for i, w in enumerate(w_nonlin_boot):
    y_i = modelfunc(xx, *w)
    y_model_nlin.append(y_i)
plot(xx, median(y_model_nlin, axis=0))
nlin_ci = moss.percentiles(y_model_nlin, pcts, axis=0)
fill_between(xx, *nlin_ci, color=colors[1], alpha=.2)

scatter(x_pop, y_pop, facecolor='none')
plot(xx, mean(y_model_nlin, 0), label="mean", color=colors[3])
plot(xx, median(y_model_nlin, 0), label="median", color=colors[4])
legend(loc="best")
Ejemplo n.º 28
0
ax = plt.subplot(gs[:2, 0])
plt.title("lmplot()")

n = 80
c = "#222222"
rs = np.random.RandomState(5)
x = rs.normal(4, 1, n)
y = 2 + 1.5 * x + rs.normal(0, 3, n)
ax.plot(x, y, "o", c=c, alpha=.8)

xx = np.linspace(1, 7, 100)
lmpred = lambda x, y: np.polyval(np.polyfit(x, y, 1), xx)
yy = lmpred(x, y)
ax.plot(xx, yy, c=c)
boots = moss.bootstrap(x, y, func=lmpred, n_boot=100)
ci = moss.percentiles(boots, [2.5, 97.5], 0)
ax.fill_between(xx, *ci, alpha=.15, color=c)

# Timeseries plot
# ---------------

ax = plt.subplot(gs[:2, 1])
plt.title("tsplot()")

n = 20
t = 10
ax.set_xlim(0, t)
x = np.linspace(0, t, 100)
s = np.array([stats.gamma.pdf(x, a) for a in [3, 5, 7]])
d = s[:, np.newaxis, :]
rs = np.random.RandomState(24)
Ejemplo n.º 29
0
def lmplot(x, y, data, color=None, row=None, col=None,
           x_estimator=None, x_ci=95,
           fit_line=True, ci=95, truncate=False,
           sharex=True, sharey=True, palette="hls", size=None,
           scatter_kws=None, line_kws=None, palette_kws=None):
    """Plot a linear model from a DataFrame.

    Parameters
    ----------
    x, y : strings
        column names in `data` DataFrame for x and y variables
    data : DataFrame
        source of data for the model
    color : string, optional
        DataFrame column name to group the model by color
    row, col : strings, optional
        DataFrame column names to make separate plot facets
    x_estimator : callable, optional
        Interpret X values as factor labels and use this function
        to plot the point estimate and bootstrapped CI
    x_ci : int optional
        size of confidence interval for x_estimator error bars
    fit_line : bool, optional
        if True fit a regression line by color/row/col and plot
    ci : int, optional
        confidence interval for the regression line
    truncate : bool, optional
        if True, only fit line from data min to data max
    sharex, sharey : bools, optional
        only relevant if faceting; passed to plt.subplots
    palette : seaborn color palette argument
        if using separate plots by color, draw with this color palette
    size : float, optional
        size (plots are square) for each plot facet
    {scatter, line}_kws : dictionary
        keyword arguments to pass to the underlying plot functions
    palette_kws : dictionary
        keyword arguments for seaborn.color_palette

    """
    # TODO
    # - position_{dodge, jitter}
    # - legend when fit_line is False
    # - truncate fit
    # - wrap title when wide
    # - wrap columns

    # First sort out the general figure layout
    if size is None:
        size = mpl.rcParams["figure.figsize"][1]

    nrow = 1 if row is None else len(data[row].unique())
    ncol = 1 if col is None else len(data[col].unique())

    f, axes = plt.subplots(nrow, ncol, sharex=sharex, sharey=sharey,
                           figsize=(size * ncol, size * nrow))
    axes = np.atleast_2d(axes).reshape(nrow, ncol)

    if nrow == 1:
        row_masks = [np.repeat(True, len(data))]
    else:
        row_vals = np.sort(data[row].unique())
        row_masks = [data[row] == val for val in row_vals]

    if ncol == 1:
        col_masks = [np.repeat(True, len(data))]
    else:
        col_vals = np.sort(data[col].unique())
        col_masks = [data[col] == val for val in col_vals]

    if palette_kws is None:
        palette_kws = {}

    # Sort out the plot colors
    color_factor = color
    if color is None:
        hue_masks = [np.repeat(True, len(data))]
        colors = ["#222222"]
    else:
        hue_vals = np.sort(data[color].unique())
        hue_masks = [data[color] == val for val in hue_vals]
        colors = color_palette(palette, len(hue_masks), **palette_kws)

    # Default keyword arguments for plot components
    if scatter_kws is None:
        scatter_kws = {}
    if line_kws is None:
        line_kws = {}

    # First walk through the facets and plot the scatters
    for row_i, row_mask in enumerate(row_masks):
        for col_j, col_mask in enumerate(col_masks):
            ax = axes[row_i, col_j]
            if not sharex or (row_i + 1 == len(row_masks)):
                ax.set_xlabel(x)
            if not sharey or col_j == 0:
                ax.set_ylabel(y)

            # Title the plot if we are faceting
            title = ""
            if row is not None:
                title += "%s = %s" % (row, row_vals[row_i])
            if row is not None and col is not None:
                title += " | "
            if col is not None:
                title += "%s = %s" % (col, col_vals[col_j])
            ax.set_title(title)

            for hue_k, hue_mask in enumerate(hue_masks):
                color = colors[hue_k]
                data_ijk = data[row_mask & col_mask & hue_mask]

                if x_estimator is not None:
                    ms = scatter_kws.pop("ms", 7)
                    mew = scatter_kws.pop("mew", 0)
                    x_vals = data_ijk[x].unique()
                    y_grouped = [np.array(data_ijk[y][data_ijk[x] == v])
                                 for v in x_vals]
                    y_est = [x_estimator(y_i) for y_i in y_grouped]
                    y_boots = [moss.bootstrap(np.array(y_i), func=x_estimator)
                               for y_i in y_grouped]
                    ci_lims = [50 - x_ci / 2., 50 + x_ci / 2.]
                    y_ci = [moss.percentiles(y_i, ci_lims) for y_i in y_boots]
                    y_error = ci_to_errsize(np.transpose(y_ci), y_est)

                    ax.plot(x_vals, y_est, "o", mew=mew, ms=ms,
                            color=color, **scatter_kws)
                    ax.errorbar(x_vals, y_est, y_error,
                                fmt=None, ecolor=color)
                else:
                    ms = scatter_kws.pop("ms", 4)
                    mew = scatter_kws.pop("mew", 0)
                    ax.plot(data_ijk[x], data_ijk[y], "o",
                            color=color, mew=mew, ms=ms, **scatter_kws)

    for ax_i in np.ravel(axes):
        ax_i.set_xmargin(.05)
        ax_i.autoscale_view()

    # Now walk through again and plot the regression estimate
    # and a confidence interval for the regression line
    if fit_line:
        for row_i, row_mask in enumerate(row_masks):
            for col_j, col_mask in enumerate(col_masks):
                ax = axes[row_i, col_j]
                xlim = ax.get_xlim()

                for hue_k, hue_mask in enumerate(hue_masks):
                    color = colors[hue_k]
                    data_ijk = data[row_mask & col_mask & hue_mask]
                    x_vals = np.array(data_ijk[x])
                    y_vals = np.array(data_ijk[y])

                    # Sort out the limit of the fit
                    if truncate:
                        xx = np.linspace(x_vals.min(),
                                         x_vals.max(), 100)
                    else:
                        xx = np.linspace(xlim[0], xlim[1], 100)

                    # Inner function to bootstrap the regression
                    def _bootstrap_reg(x, y):
                        fit = np.polyfit(x, y, 1)
                        return np.polyval(fit, xx)

                    # Regression line confidence interval
                    if ci is not None:
                        ci_lims = [50 - ci / 2., 50 + ci / 2.]
                        boots = moss.bootstrap(x_vals, y_vals,
                                               func=_bootstrap_reg)
                        ci_band = moss.percentiles(boots, ci_lims, axis=0)
                        ax.fill_between(xx, *ci_band, color=color, alpha=.15)

                    fit = np.polyfit(x_vals, y_vals, 1)
                    reg = np.polyval(fit, xx)
                    if color_factor is None:
                        label = ""
                    else:
                        label = hue_vals[hue_k]
                    ax.plot(xx, reg, color=color,
                            label=str(label), **line_kws)
                    ax.set_xlim(xlim)

    # Plot the legend on the upper left facet and adjust the layout
    if color_factor is not None:
        axes[0, 0].legend(loc="best", title=color_factor)
    plt.tight_layout()