def subplot_fn(**kwargs):
    x = kwargs["x"]
    y = kwargs["y"]
    data = kwargs["data"]
    utils.assert_no_duplicates_in_condition(
        data, group_by=["DatasetName", "ModelName"])
    ax = plt.gca()
    kwargs["color"] = utils.assert_and_get_constant(data.family_color)
    ax = sns.regplot(ax=ax, **kwargs)
    plotter = RegressionPlotter(x, y, data=data)

    # Get regression parameters and show in plot:
    grid = np.linspace(data[x].min(), data[x].max(), 100)
    beta_plot, beta_boots = plotter.get_params(grid)
    beta_plot = np.array(beta_plot)
    beta_boots = np.array(beta_boots)
    intercept = 10 ** np.median(beta_boots[0, :])
    intercept_ci = 10 ** sns_utils.ci(beta_boots[0, :])
    slope = np.median(beta_boots[1, :])
    slope_ci = sns_utils.ci(beta_boots[1, :])
    s = (f"a = {intercept:1.2f} ({intercept_ci[0]:1.2f}, "
         f"{intercept_ci[1]:1.2f})\nk = {slope:1.2f} ({slope_ci[0]:1.2f}, "
         f"{slope_ci[1]:1.2f})")
    ax.text(0.04, 0.96, s, va="top", ha="left", transform=ax.transAxes,
            fontsize=4, color=(0.3, 0.3, 0.3),
            bbox=dict(facecolor="w", alpha=0.8, boxstyle="square,pad=0.1"))
Example #2
0
def bootstrapped_ci(x, func, n_boot, which_ci=95, axis=None):
    """
    Get the confidence interval (CI) of a metric using bootstrapping.

    Parameters
    ----------
    x : array-like
        a sample.
    func : callable (function object)
        the function that estimated the metric (for example np.mean, np.median, ...).
    n_boot : int
        number of sub-samples to use for the bootstrap estimate.
    which_ci : float, optional
        A number between 0 and 100 that defines the confidence interval.
        The default is 95, which means that there is 95% probability the metric
        will be inside the limits of the confidence interval.
    axis : int or None, optional
        Will pass axis to func as a keyword argument.
        The default is None.

    Returns
    -------
    TYPE
        DESCRIPTION.

    """
    from seaborn.algorithms import bootstrap
    from seaborn.utils import ci

    boot_distribution = bootstrap(x, func=func, n_boot=n_boot, axis=axis)

    return ci(boot_distribution, which=which_ci, axis=axis)
Example #3
0
		def bootstrapped_cis(vals):

			if len(vals) <= 1:
				return null_ci

			boots = bootstrap(vals, func=func, n_boot=n_boot, seed=seed)
			cis = utils.ci(boots, ci)
			return pd.Series(cis, ["low", "high"])
Example #4
0
    def estimate_statistic(self, estimator, ci, n_boot):

        if self.hue_names is None:
            statistic = []
            confint = []
        else:
            statistic = [[] for _ in self.plot_data]
            confint = [[] for _ in self.plot_data]

        for i, group_data in enumerate(self.plot_data):

            # Option 1: we have a single layer of grouping
            # --------------------------------------------

            if self.plot_hues is None:

                if self.plot_units is None:
                    stat_data = remove_na(group_data)
                    unit_data = None
                else:
                    unit_data = self.plot_units[i]
                    have = pd.notnull(np.c_[group_data, unit_data]).all(axis=1)
                    stat_data = group_data[have]
                    unit_data = unit_data[have]

                # Estimate a statistic from the vector of data
                if not stat_data.size:
                    statistic.append(np.nan)
                else:
                    statistic.append(estimator(stat_data))

                # Get a confidence interval for this estimate
                if ci is not None:

                    if stat_data.size < 2:
                        confint.append([np.nan, np.nan])
                        continue

                    if ci == "sd":

                        estimate = estimator(stat_data)
                        sd = np.std(stat_data)
                        confint.append((estimate - sd, estimate + sd))

                    elif ci == "range":
                        confint.append((np.min(stat_data), np.max(stat_data)))

                    else:

                        boots = bootstrap(stat_data,
                                          func=estimator,
                                          n_boot=n_boot,
                                          units=unit_data)
                        confint.append(utils.ci(boots, ci))

            # Option 2: we are grouping by a hue layer
            # ----------------------------------------

            else:
                for j, hue_level in enumerate(self.hue_names):

                    if not self.plot_hues[i].size:
                        statistic[i].append(np.nan)
                        if ci is not None:
                            confint[i].append((np.nan, np.nan))
                        continue

                    hue_mask = self.plot_hues[i] == hue_level
                    if self.plot_units is None:
                        stat_data = remove_na(group_data[hue_mask])
                        unit_data = None
                    else:
                        group_units = self.plot_units[i]
                        have = pd.notnull(np.c_[group_data,
                                                group_units]).all(axis=1)
                        stat_data = group_data[hue_mask & have]
                        unit_data = group_units[hue_mask & have]

                    # Estimate a statistic from the vector of data
                    if not stat_data.size:
                        statistic[i].append(np.nan)
                    else:
                        statistic[i].append(estimator(stat_data))

                    # Get a confidence interval for this estimate
                    if ci is not None:

                        if stat_data.size < 2:
                            confint[i].append([np.nan, np.nan])
                            continue

                        if ci == "sd":

                            estimate = estimator(stat_data)
                            sd = np.std(stat_data)
                            confint[i].append((estimate - sd, estimate + sd))

                        elif ci == "range":
                            confint[i].append(
                                (np.min(stat_data), np.max(stat_data)))

                        else:

                            boots = bootstrap(stat_data,
                                              func=estimator,
                                              n_boot=n_boot,
                                              units=unit_data)
                            confint[i].append(utils.ci(boots, ci))

        # Save the resulting values for plotting
        self.statistic = np.array(statistic)
        self.confint = np.array(confint)
Example #5
0
def tsplot(data,
           time=None,
           unit=None,
           condition=None,
           value=None,
           err_style="ci_band",
           ci=68,
           interpolate=True,
           color=None,
           estimator=np.mean,
           n_boot=5000,
           err_palette=None,
           err_kws=None,
           legend=True,
           ax=None,
           **kwargs):
    """Plot one or more timeseries with flexible representation of uncertainty.

    This function can take data specified either as a long-form (tidy)
    DataFrame or as an ndarray with dimensions for sampling unit, time, and
    (optionally) condition. The interpretation of some of the other parameters
    changes depending on the type of object passed as data.

    Parameters
    ----------
    data : DataFrame or ndarray
        Data for the plot. Should either be a "long form" dataframe or an
        array with dimensions (unit, time, condition). In both cases, the
        condition field/dimension is optional. The type of this argument
        determines the interpretation of the next few parameters.
    time : string or series-like
        Either the name of the field corresponding to time in the data
        DataFrame or x values for a plot when data is an array. If a Series,
        the name will be used to label the x axis.
    unit : string
        Field in the data DataFrame identifying the sampling unit (e.g.
        subject, neuron, etc.). The error representation will collapse over
        units at each time/condition observation. This has no role when data
        is an array.
    value : string
        Either the name of the field corresponding to the data values in
        the data DataFrame (i.e. the y coordinate) or a string that forms
        the y axis label when data is an array.
    condition : string or Series-like
        Either the name of the field identifying the condition an observation
        falls under in the data DataFrame, or a sequence of names with a length
        equal to the size of the third dimension of data. There will be a
        separate trace plotted for each condition. If condition is a Series
        with a name attribute, the name will form the title for the plot
        legend (unless legend is set to False).
    err_style : string or list of strings or None
        Names of ways to plot uncertainty across units from set of
        {ci_band, ci_bars, boot_traces, boot_kde, unit_traces, unit_points}.
        Can use one or more than one method.
    ci : float or list of floats in [0, 100]
        Confidence interaval size(s). If a list, it will stack the error
        plots for each confidence interval. Only relevant for error styles
        with "ci" in the name.
    interpolate : boolean
        Whether to do a linear interpolation between each timepoint when
        plotting. The value of this parameter also determines the marker
        used for the main plot traces, unless marker is specified as a keyword
        argument.
    color : seaborn palette or matplotlib color name or dictionary
        Palette or color for the main plots and error representation (unless
        plotting by unit, which can be separately controlled with err_palette).
        If a dictionary, should map condition name to color spec.
    estimator : callable
        Function to determine central tendency and to pass to bootstrap
        must take an ``axis`` argument.
    n_boot : int
        Number of bootstrap iterations.
    err_palette: seaborn palette
        Palette name or list of colors used when plotting data for each unit.
    err_kws : dict, optional
        Keyword argument dictionary passed through to matplotlib function
        generating the error plot,
    ax : axis object, optional
        Plot in given axis; if None creates a new figure
    kwargs :
        Other keyword arguments are passed to main plot() call

    Returns
    -------
    ax : matplotlib axis
        axis with plot data

    """
    # Sort out default values for the parameters
    if ax is None:
        ax = plt.gca()

    if err_kws is None:
        err_kws = {}

    # Handle different types of input data
    if isinstance(data, pd.DataFrame):

        xlabel = time
        ylabel = value

        # Condition is optional
        if condition is None:
            condition = pd.Series(np.ones(len(data)))
            legend = False
            legend_name = None
            n_cond = 1
        else:
            legend = True and legend
            legend_name = condition
            n_cond = len(data[condition].unique())

    else:
        data = np.asarray(data)

        # Data can be a timecourse from a single unit or
        # several observations in one condition
        if data.ndim == 1:
            data = data[np.newaxis, :, np.newaxis]
        elif data.ndim == 2:
            data = data[:, :, np.newaxis]
        n_unit, n_time, n_cond = data.shape

        # Units are experimental observations. Maybe subjects, or neurons
        if unit is None:
            units = np.arange(n_unit)
        unit = "unit"
        units = np.repeat(units, n_time * n_cond)
        ylabel = None

        # Time forms the xaxis of the plot
        if time is None:
            times = np.arange(n_time)
        else:
            times = np.asarray(time)
        xlabel = None
        if hasattr(time, "name"):
            xlabel = time.name
        time = "time"
        times = np.tile(np.repeat(times, n_cond), n_unit)

        # Conditions split the timeseries plots
        if condition is None:
            conds = list(range(n_cond))
            legend = False
            if isinstance(color, dict):
                err = "Must have condition names if using color dict."
                raise ValueError(err)
        else:
            conds = np.asarray(condition)
            legend = True and legend
            if hasattr(condition, "name"):
                legend_name = condition.name
            else:
                legend_name = None
        condition = "cond"
        conds = np.tile(conds, n_unit * n_time)

        # Value forms the y value in the plot
        if value is None:
            ylabel = None
        else:
            ylabel = value
        value = "value"

        # Convert to long-form DataFrame
        data = pd.DataFrame(
            dict(value=data.ravel(), time=times, unit=units, cond=conds))

    # Set up the err_style and ci arguments for teh loop below
    if isinstance(err_style, string_types):
        err_style = [err_style]
    elif err_style is None:
        err_style = []
    if not hasattr(ci, "__iter__"):
        ci = [ci]

    # Set up the color palette
    if color is None:
        current_palette = mpl.rcParams["axes.color_cycle"]
        if len(current_palette) < n_cond:
            colors = color_palette("husl", n_cond)
        else:
            colors = color_palette(n_colors=n_cond)
    elif isinstance(color, dict):
        colors = [color[c] for c in data[condition].unique()]
    else:
        try:
            colors = color_palette(color, n_cond)
        except ValueError:
            color = mpl.colors.colorConverter.to_rgb(color)
            colors = [color] * n_cond

    # Do a groupby with condition and plot each trace
    for c, (cond, df_c) in enumerate(data.groupby(condition, sort=False)):

        df_c = df_c.pivot(unit, time, value)
        x = df_c.columns.values.astype(np.float)
        # Bootstrap the data for confidence intervals
        #boot_data = algo.bootstrap(df_c.values, n_boot=n_boot,
        #                           axis=0, func=estimator)
        boot_data = df_c.values
        cis = [utils.ci(boot_data, v, axis=0) for v in ci]

        central_data = estimator(df_c.values, axis=0)

        # Get the color for this condition
        color = colors[c]

        # Use subroutines to plot the uncertainty
        for style in err_style:

            # Allow for null style (only plot central tendency)
            if style is None:
                continue

            # Grab the function from the global environment
            try:
                plot_func = globals()["_plot_%s" % style]
            except KeyError:
                raise ValueError("%s is not a valid err_style" % style)

            # Possibly set up to plot each observation in a different color
            if err_palette is not None and "unit" in style:
                orig_color = color
                color = color_palette(err_palette, len(df_c.values))

            # Pass all parameters to the error plotter as keyword args
            plot_kwargs = dict(ax=ax,
                               x=x,
                               data=df_c.values,
                               boot_data=boot_data,
                               central_data=central_data,
                               color=color,
                               err_kws=err_kws)

            # Plot the error representation, possibly for multiple cis
            for ci_i in cis:
                plot_kwargs["ci"] = ci_i
                plot_func(**plot_kwargs)

            if err_palette is not None and "unit" in style:
                color = orig_color

        # Plot the central trace
        kwargs.setdefault("marker", "" if interpolate else "o")
        ls = kwargs.pop("ls", "-" if interpolate else "")
        kwargs.setdefault("linestyle", ls)
        label = cond if legend else "_nolegend_"
        ax.plot(x, central_data, color=color, label=label, **kwargs)

    # Pad the sides of the plot only when not interpolating
    ax.set_xlim(x.min(), x.max())
    x_diff = x[1] - x[0]
    if not interpolate:
        ax.set_xlim(x.min() - x_diff, x.max() + x_diff)

    # Add the plot labels
    if xlabel is not None:
        ax.set_xlabel(xlabel)
    if ylabel is not None:
        ax.set_ylabel(ylabel)
    if legend:
        ax.legend(loc=0, title=legend_name)

    return ax
Example #6
0
def regplot(x,
            y,
            data=None,
            model=None,
            ci=95.,
            scatter_color=None,
            model_color='k',
            ax=None,
            scatter_kws={},
            regplot_kws={},
            cmap=None,
            cax=None,
            clabel=None,
            xlabel=False,
            ylabel=False,
            colorbar=False,
            **kwargs):
    if model is None:
        import statsmodels.api as sm
        model = sm.OLS
    from seaborn import utils
    from seaborn import algorithms as algo
    if ax is None:
        fig, ax = plt.subplots()
    if data is None:
        _x = x
        _y = y
    else:
        _x = data[x]
        _y = data[y]
    grid = np.linspace(_x.min(), _x.max(), 100)

    X = np.c_[np.ones(len(_x)), _x]
    G = np.c_[np.ones(len(grid)), grid]

    results = model(_y, X, **kwargs).fit()

    def reg_func(xx, yy):
        yhat = model(yy, xx, **kwargs).fit().predict(G)
        return yhat

    yhat = results.predict(G)
    yhat_boots = algo.bootstrap(X, _y, func=reg_func, n_boot=1000, units=None)
    err_bands = utils.ci(yhat_boots, ci, axis=0)
    ax.plot(grid, yhat, color=model_color, **regplot_kws)
    sc = ax.scatter(_x, _y, c=scatter_color, **scatter_kws)
    ax.fill_between(grid, *err_bands, facecolor=model_color, alpha=.15)
    if colorbar:
        cb = plt.colorbar(mappable=sc, cax=cax, ax=ax)
        cb.ax.yaxis.set_ticks_position('right')
        if clabel: cb.set_label(clabel)

    if xlabel:
        if isinstance(xlabel, str):
            ax.set_xlabel(xlabel)
        else:
            ax.set_xlabel(x)
    if ylabel:
        if isinstance(ylabel, str):
            ax.set_ylabel(ylabel)
        else:
            ax.set_ylabel(y)
    return results
Example #7
0
    for axis, attribute, title in zip(axs, attributes, titles):
        N = 6
        men = [
            df[df.hate == "hateful"], df[df.hate == "normal"],
            df[df.hate_neigh], df[df.normal_neigh], df[df.is_63_2 == True],
            df[df.is_63_2 == False]
        ]
        tmp = []
        medians, medians_ci = [], []
        averages, averages_ci = [], []

        for category in men:
            boots = bootstrap(category[attribute],
                              func=np.nanmean,
                              n_boot=1000)
            ci_tmp = ci(boots)
            average = (ci_tmp[0] + ci_tmp[1]) / 2
            ci_average = (ci_tmp[1] - ci_tmp[0]) / 2
            averages.append(average)
            averages_ci.append(ci_average)
            boots = bootstrap(category[attribute],
                              func=np.nanmedian,
                              n_boot=1000)
            ci_tmp = ci(boots)
            median = (ci_tmp[0] + ci_tmp[1]) / 2
            ci_median = (ci_tmp[1] - ci_tmp[0]) / 2
            medians.append(median)
            medians_ci.append(ci_median)

            tmp.append(category[attribute].values)