Beispiel #1
0
def distplot(a,
             bins=None,
             hist=True,
             kde=True,
             rug=False,
             fit=None,
             hist_kws=None,
             kde_kws=None,
             rug_kws=None,
             fit_kws=None,
             color=None,
             vertical=False,
             xlabel=None,
             ax=None):
    """Flexibly plot a distribution of observations.

    Parameter
    a : (squeezable to) 1d array
        Observed data.
    bins : argument for matplotlib hist(), or None
        Specification of hist bins, or None to use Freedman-Diaconis rule.
    hist : bool, default True
        Whether to plot a (normed) histogram.
    kde : bool, default True
        Whether to plot a gaussian kernel density estimate.
    rug : bool, default False
        Whether to draw a rugplot on the support axis.
    fit : random variable object
        An object with `fit` method, returning a tuple that can be passed to a
        `pdf` method a positional arguments following an grid of values to
        evaluate the pdf on.
    {hist, kde, rug, fit}_kws : dictionaries
        Keyword arguments for underlying plotting functions.
    color : matplotlib color, optional
        Color to plot everything but the fitted curve in.
    vertical : bool, default False
        If True, oberved values are on y-axis.
    xlabel : string, False, or None
        Name for the x axis label. if None, will try to get it from a.name
        if False, do not set the x label.
    ax : matplotlib axis, optional
        if provided, plot on this axis

    Returns
    -------
    ax : matplotlib axis

    """
    if ax is None:
        ax = plt.gca()

    # Intelligently label the axis
    label_x = bool(xlabel)
    if xlabel is None and hasattr(a, "name"):
        xlabel = a.name
        if xlabel is not None:
            label_x = True

    # Make a a 1-d array
    a = np.asarray(a).squeeze()

    # Handle dictionary defaults
    if hist_kws is None:
        hist_kws = dict()
    if kde_kws is None:
        kde_kws = dict()
    if rug_kws is None:
        rug_kws = dict()
    if fit_kws is None:
        fit_kws = dict()

    # Get the color from the current color cycle
    if color is None:
        if vertical:
            line, = ax.plot(0, a.mean())
        else:
            line, = ax.plot(a.mean(), 0)
        color = line.get_color()
        line.remove()

    if hist:
        if bins is None:
            # From http://stats.stackexchange.com/questions/798/
            h = 2 * moss.iqr(a) * len(a)**-(1 / 3)
            bins = (a.max() - a.min()) / h
        hist_alpha = hist_kws.pop("alpha", 0.4)
        orientation = "horizontal" if vertical else "vertical"
        hist_color = hist_kws.pop("color", color)
        ax.hist(a,
                bins,
                normed=True,
                color=hist_color,
                alpha=hist_alpha,
                orientation=orientation,
                **hist_kws)

    if kde:
        kde_color = kde_kws.pop("color", color)
        kdeplot(a, vertical=vertical, color=kde_color, ax=ax, **kde_kws)

    if rug:
        rug_color = rug_kws.pop("color", color)
        axis = "y" if vertical else "x"
        rugplot(a, axis=axis, color=rug_color, ax=ax, **rug_kws)

    if fit is not None:
        fit_color = fit_kws.pop("color", "#282828")
        npts = fit_kws.pop("npts", 1000)
        support_thresh = fit_kws.pop("support_thresh", 1e-4)
        params = fit.fit(a)
        pdf = lambda x: fit.pdf(x, *params)
        x = _kde_support(a, pdf, npts, support_thresh)
        y = pdf(x)
        if vertical:
            x, y = y, x
        ax.plot(x, y, color=fit_color, **fit_kws)

    if label_x:
        ax.set_xlabel(xlabel)

    return ax
Beispiel #2
0
def _freedman_diaconis_bins(a):
    """Calculate number of hist bins using Freedman-Diaconis rule."""
    # From http://stats.stackexchange.com/questions/798/
    a = np.asarray(a)
    h = 2 * moss.iqr(a) / (len(a) ** (1 / 3))
    return np.ceil((a.max() - a.min()) / h)
Beispiel #3
0
def distplot(a, bins=None, hist=True, kde=True, rug=False, fit=None,
             hist_kws=None, kde_kws=None, rug_kws=None, fit_kws=None,
             color=None, vertical=False, legend=False, xlabel=None, ax=None):
    """Flexibly plot a distribution of observations.

    Parameters
    ----------
    a : (squeezable to) 1d array
        observed data
    bins : argument for matplotlib hist(), or None
        specification of bins or None to use Freedman-Diaconis rule
    hist : bool, default True
        whether to plot a (normed) histogram
    kde : bool, defualt True
        whether to plot a gaussian kernel density estimate
    rug : bool, default False
        whether to draw a rugplot on the support axis
    fit : random variable object
        object with `fit` method returning a tuple that can be
        passed to a `pdf` method a positional arguments following
        an array of values to evaluate the pdf at
    {hist, kde, rug, fit}_kws : dictionaries
        keyword arguments for underlying plotting functions
    color : matplotlib color, optional
        color to plot everything but the fitted curve in
    vertical : bool, default False
        if True, oberved values are on y-axis
    legend : bool, default True
        if True, add a legend to the plot with what the plotted lines are
    xlabel : string, False, or None
        name for the x axis label. if None, will try to get it from a.name
        if False, do not set the x label
    ax : matplotlib axis, optional
        if provided, plot on this axis

    Returns
    -------
    ax : matplotlib axis

    """
    if ax is None:
        ax = plt.gca()

    # Intelligently label the axis
    label_x = bool(xlabel)
    if xlabel is None and hasattr(a, "name"):
        xlabel = a.name
        if xlabel is not None:
            label_x = True

    # Make a a 1-d array
    a = np.asarray(a).squeeze()

    # Handle dictionary defaults
    if hist_kws is None:
        hist_kws = dict()
    if kde_kws is None:
        kde_kws = dict()
    if rug_kws is None:
        rug_kws = dict()
    if fit_kws is None:
        fit_kws = dict()

    # Get the color from the current color cycle
    if color is None:
        if vertical:
            line, = ax.plot(0, a.mean())
        else:
            line, = ax.plot(a.mean(), 0)
        color = line.get_color()
        line.remove()

    if hist:
        if bins is None:
            # From http://stats.stackexchange.com/questions/798/
            h = 2 * moss.iqr(a) * len(a) ** -(1 / 3)
            bins = (a.max() - a.min()) / h
        hist_alpha = hist_kws.pop("alpha", 0.4)
        orientation = "horizontal" if vertical else "vertical"
        hist_color = hist_kws.pop("color", color)
        ax.hist(a, bins, normed=True, color=hist_color, alpha=hist_alpha,
                orientation=orientation, **hist_kws)

    if kde:
        kde_color = kde_kws.pop("color", color)
        kde_kws["label"] = "kde"
        kdeplot(a, vertical=vertical, color=kde_color, ax=ax, **kde_kws)

    if rug:
        rug_color = rug_kws.pop("color", color)
        axis = "y" if vertical else "x"
        rugplot(a, axis=axis, color=rug_color, ax=ax, **rug_kws)

    if fit is not None:
        fit_color = fit_kws.pop("color", "#282828")
        npts = fit_kws.pop("npts", 1000)
        support_thresh = fit_kws.pop("support_thresh", 1e-4)
        params = fit.fit(a)
        pdf = lambda x: fit.pdf(x, *params)
        x = _kde_support(a, pdf, npts, support_thresh)
        y = pdf(x)
        if vertical:
            x, y = y, x
        fit_kws["label"] = fit.name
        ax.plot(x, y, color=fit_color, **fit_kws)

    if legend:
        ax.legend(loc="best")

    if label_x:
        ax.set_xlabel(xlabel)

    return ax
Beispiel #4
0
def distplot(a, bins=None, hist=True, kde=True, rug=False, fit=None,
             hist_kws=None, kde_kws=None, rug_kws=None, fit_kws=None,
             color=None, vertical=False, axlabel=None, ax=None):
    """Flexibly plot a distribution of observations.

    Parameter
    a : (squeezable to) 1d array
        Observed data.
    bins : argument for matplotlib hist(), or None
        Specification of hist bins, or None to use Freedman-Diaconis rule.
    hist : bool, default True
        Whether to plot a (normed) histogram.
    kde : bool, default True
        Whether to plot a gaussian kernel density estimate.
    rug : bool, default False
        Whether to draw a rugplot on the support axis.
    fit : random variable object
        An object with `fit` method, returning a tuple that can be passed to a
        `pdf` method a positional arguments following an grid of values to
        evaluate the pdf on.
    {hist, kde, rug, fit}_kws : dictionaries
        Keyword arguments for underlying plotting functions.
    color : matplotlib color, optional
        Color to plot everything but the fitted curve in.
    vertical : bool, default False
        If True, oberved values are on y-axis.
    axlabel : string, False, or None
        Name for the support axis label. If None, will try to get it
        from a.namel if False, do not set a label.
    ax : matplotlib axis, optional
        if provided, plot on this axis

    Returns
    -------
    ax : matplotlib axis

    """
    if ax is None:
        ax = plt.gca()

    # Intelligently label the support axis
    label_ax = bool(axlabel)
    if axlabel is None and hasattr(a, "name"):
        axlabel = a.name
        if axlabel is not None:
            label_ax = True

    # Make a a 1-d array
    a = np.asarray(a).squeeze()

    # Handle dictionary defaults
    if hist_kws is None:
        hist_kws = dict()
    if kde_kws is None:
        kde_kws = dict()
    if rug_kws is None:
        rug_kws = dict()
    if fit_kws is None:
        fit_kws = dict()

    # Get the color from the current color cycle
    if color is None:
        if vertical:
            line, = ax.plot(0, a.mean())
        else:
            line, = ax.plot(a.mean(), 0)
        color = line.get_color()
        line.remove()

    if hist:
        if bins is None:
            # From http://stats.stackexchange.com/questions/798/
            h = 2 * moss.iqr(a) * len(a) ** -(1 / 3)
            bins = (a.max() - a.min()) / h
        hist_alpha = hist_kws.pop("alpha", 0.4)
        orientation = "horizontal" if vertical else "vertical"
        hist_color = hist_kws.pop("color", color)
        ax.hist(a, bins, normed=True, color=hist_color, alpha=hist_alpha,
                orientation=orientation, **hist_kws)

    if kde:
        kde_color = kde_kws.pop("color", color)
        kdeplot(a, vertical=vertical, color=kde_color, ax=ax, **kde_kws)

    if rug:
        rug_color = rug_kws.pop("color", color)
        axis = "y" if vertical else "x"
        rugplot(a, axis=axis, color=rug_color, ax=ax, **rug_kws)

    if fit is not None:
        fit_color = fit_kws.pop("color", "#282828")
        gridsize = fit_kws.pop("gridsize", 500)
        cut = fit_kws.pop("cut", 3)
        clip = fit_kws.pop("clip", (-np.inf, np.inf))
        bw = sm.nonparametric.bandwidths.bw_scott(a)
        x = _kde_support(a, bw, gridsize, cut, clip)
        params = fit.fit(a)
        pdf = lambda x: fit.pdf(x, *params)
        y = pdf(x)
        if vertical:
            x, y = y, x
        ax.plot(x, y, color=fit_color, **fit_kws)

    if label_ax:
        if vertical:
            ax.set_ylabel(axlabel)
        else:
            ax.set_xlabel(axlabel)

    return ax
Beispiel #5
0
def _freedman_diaconis_bins(a):
    """Calculate number of hist bins using Freedman-Diaconis rule."""
    # From http://stats.stackexchange.com/questions/798/
    a = np.asarray(a)
    h = 2 * moss.iqr(a) / (len(a)**(1 / 3))
    return np.ceil((a.max() - a.min()) / h)