Beispiel #1
1
def test_freedman_bin_width(N=10000, rseed=0):
    np.random.seed(rseed)
    X = np.random.normal(size=N)
    with catch_warnings(AstroMLDeprecationWarning):
        delta = freedman_bin_width(X)

    v25, v75 = np.percentile(X, [25, 75])

    assert_allclose(delta, 2 * (v75 - v25) / N ** (1. / 3))
Beispiel #2
0
def hist(x, bins=10, range=None, *args, **kwargs):
    """Enhanced histogram

    This is a histogram function that enables the use of more sophisticated
    algorithms for determining bins.  Aside from the `bins` argument allowing
    a string specified how bins are computed, the parameters are the same
    as pylab.hist().

    Parameters
    ----------
    x : array_like
        array of data to be histogrammed

    bins : int or list or str (optional)
        If bins is a string, then it must be one of:
        'blocks' : use bayesian blocks for dynamic bin widths
        'knuth' : use Knuth's rule to determine bins
        'scott' : use Scott's rule to determine bins
        'freedman' : use the Freedman-diaconis rule to determine bins

    range : tuple or None (optional)
        the minimum and maximum range for the histogram.  If not specified,
        it will be (x.min(), x.max())

    ax : Axes instance (optional)
        specify the Axes on which to draw the histogram.  If not specified,
        then the current active axes will be used.

    **kwargs :
        other keyword arguments are described in pylab.hist().
    """
    x = np.asarray(x)

    if 'ax' in kwargs:
        ax = kwargs['ax']
        del kwargs['ax']
    else:
        ax = plt.gca()

    # if range is specified, we need to truncate the data for
    # the bin-finding routines
    if (range is not None and (bins in [
            'blocks', 'knuth', 'knuths', 'scott', 'scotts', 'freedman',
            'freedmans'
    ])):
        x = x[(x >= range[0]) & (x <= range[1])]

    if bins in ['blocks']:
        bins = bayesian_blocks(x)
    elif bins in ['knuth', 'knuths']:
        dx, bins = knuth_bin_width(x, True)
    elif bins in ['scott', 'scotts']:
        dx, bins = scotts_bin_width(x, True)
    elif bins in ['freedman', 'freedmans']:
        dx, bins = freedman_bin_width(x, True)
    elif isinstance(bins, str):
        raise ValueError("unrecognized bin code: '%s'" % bins)

    return ax.hist(x, bins, range, **kwargs)
Beispiel #3
0
def hist(x, bins=10, range=None, *args, **kwargs):
    """Enhanced histogram

    This is a histogram function that enables the use of more sophisticated
    algorithms for determining bins.  Aside from the `bins` argument allowing
    a string specified how bins are computed, the parameters are the same
    as pylab.hist().

    Parameters
    ----------
    x : array_like
        array of data to be histogrammed

    bins : int or list or str (optional)
        If bins is a string, then it must be one of:
        'blocks' : use bayesian blocks for dynamic bin widths
        'knuth' : use Knuth's rule to determine bins
        'scott' : use Scott's rule to determine bins
        'freedman' : use the Freedman-diaconis rule to determine bins

    range : tuple or None (optional)
        the minimum and maximum range for the histogram.  If not specified,
        it will be (x.min(), x.max())

    ax : Axes instance (optional)
        specify the Axes on which to draw the histogram.  If not specified,
        then the current active axes will be used.

    **kwargs :
        other keyword arguments are described in pylab.hist().
    """
    x = np.asarray(x)

    if 'ax' in kwargs:
        ax = kwargs['ax']
        del kwargs['ax']
    else:
        ax = plt.gca()

    # if range is specified, we need to truncate the data for
    # the bin-finding routines
    if (range is not None and (bins in ['blocks',
                                        'knuth', 'knuths',
                                        'scott', 'scotts',
                                        'freedman', 'freedmans'])):
        x = x[(x >= range[0]) & (x <= range[1])]

    if bins in ['blocks']:
        bins = bayesian_blocks(x)
    elif bins in ['knuth', 'knuths']:
        dx, bins = knuth_bin_width(x, True)
    elif bins in ['scott', 'scotts']:
        dx, bins = scotts_bin_width(x, True)
    elif bins in ['freedman', 'freedmans']:
        dx, bins = freedman_bin_width(x, True)
    elif isinstance(bins, str):
        raise ValueError("unrecognized bin code: '%s'" % bins)

    return ax.hist(x, bins, range, **kwargs)
Beispiel #4
0
def test_freedman_bin_width(N=10000, rseed=0):
    np.random.seed(rseed)
    X = np.random.normal(size=N)
    with catch_warnings(AstroMLDeprecationWarning):
        delta = freedman_bin_width(X)

    v25, v75 = np.percentile(X, [25, 75])

    assert_allclose(delta, 2 * (v75 - v25) / N ** (1. / 3))
    def _size_bins(self,hist,bin_tool,**kwargs):
        """Wrapper for astroML routines to choose optimal bin widths."""

        if bin_tool == 'freedman':
            _,bins = density_estimation.freedman_bin_width(hist,return_bins=True)
        elif bin_tool == 'scotts':
            _,bins = density_estimation.scotts_bin_width(hist,return_bins=True)
        elif bin_tool == 'knuth':
            _,bins = density_estimation.knuth_bin_width(hist,return_bins=True, disp=False)
        elif bin_tool == 'blocks':
            bins = density_estimation.bayesian_blocks(hist,**kwargs)
        elif type(bin_tool) == type(int()) or type(bin_tool) == type(np.int64()) or type(bin_tool) == type(np.int32()):
            bins=bin_tool
        else:
            self.logger.warning("Unrecognized bin_tool option. Using Freedman-Diaconis rule.")
            _,bins = density_estimation.freedman_bin_width(hist,return_bins=True)

        return bins
Beispiel #6
0
def test_freedman_bin_width(N=10000, rseed=0):
    np.random.seed(rseed)
    X = np.random.normal(size=N)
    delta = freedman_bin_width(X)

    indices = np.argsort(X)
    i25 = indices[N // 4 - 1]
    i75 = indices[(3 * N) // 4 - 1]

    assert_allclose(delta, 2 * (X[i75] - X[i25]) / N ** (1. / 3))
Beispiel #7
0
def test_freedman_bin_width(N=10000, rseed=0):
    np.random.seed(rseed)
    X = np.random.normal(size=N)
    delta = freedman_bin_width(X)

    indices = np.argsort(X)
    i25 = indices[N / 4 - 1]
    i75 = indices[(3 * N) / 4 - 1]

    assert_allclose(delta, 2 * (X[i75] - X[i25]) / N**(1. / 3))
def hist(x,
         bins=10,
         fitness='events',
         gamma=None,
         p0=0.05,
         errorbars=None,
         suppress_zero=False,
         *args,
         **kwargs):
    """Enhanced histogram, based on `hist` function from astroML.

    This is a histogram function that enables the use of more sophisticated
    algorithms for determining bins.  Aside from the `bins` argument allowing
    a string specified how bins are computed, additional scaling, errorbar, and
    plotting methods are introduced.  All other kwargs can be used as in `pylab.hist()`.

    Parameters
    ----------
    x : array_like
        Array of data to be histogrammed

    bins : int or list or str (optional)
        If bins is a string, then it must be one of:
        'blocks' : use bayesian blocks for dynamic bin widths
        'knuth' : use Knuth's rule to determine bins
        'scott' : use Scott's rule to determine bins
        'freedman' : use the Freedman-diaconis rule to determine bins

    fitness : str
        Param used for Bayesian Blocks binning.

    gamma: Number
        Param used for Bayesian Blocks binning, ignored if `p0` is present

    p0 : Number
        Fake rate value for Bayesian Blocks binning, supersedes `gamma`

    ax : Axes instance (optional)
        Specify the Axes on which to draw the histogram.  If not specified,
        then the current active axes will be used.

    scale : Number or str (optional)
        If Number, all bin contents are multiplied by the given value.
        If str:
            'binwidth' : every bin content is divided by the bin width.

    **kwargs :
        Overloaded kwargs variants:
            histtype:
                'markers' : plot the bin contents as markers, centered on the bin centers.
                If this method is chosen, all additional kwargs for `pylab.plot()` can be used.

        Other keyword arguments are described in `pylab.hist()`.
    """
    # do initial checks and set-up for overloaded arguments
    x = np.asarray(x)

    if isinstance(bins, str) and "weights" in kwargs:
        warnings.warn(
            "weights argument is not supported for this binning method: it will be ignored."
        )
        kwargs.pop('weights')

    if 'ax' in kwargs:
        ax = kwargs.pop('ax')
    else:
        ax = plt.gca()

    # if range is specified, we need to truncate the data for
    # the bin-finding routines
    if ('range' in kwargs and kwargs['range'] is not None and (bins in [
            'blocks', 'knuth', 'knuths', 'scott', 'scotts', 'freedman',
            'freedmans'
    ])):
        x = x[(x >= kwargs['range'][0]) & (x <= kwargs['range'][1])]

    if bins in ['block', 'blocks']:
        bins = bayesian_blocks(t=x, fitness=fitness, p0=p0, gamma=gamma)
    elif bins in ['knuth', 'knuths']:
        dx, bins = knuth_bin_width(x, True, disp=False)
    elif bins in ['scott', 'scotts']:
        dx, bins = scotts_bin_width(x, True)
    elif bins in ['freedman', 'freedmans']:
        dx, bins = freedman_bin_width(x, True)
    elif isinstance(bins, str):
        raise ValueError("unrecognized bin code: '%s'" % bins)

    if 'scale' in kwargs:
        scale = kwargs.pop('scale')
    else:
        scale = None
    if scale and "stacked" in kwargs:
        warnings.warn(
            "scaling is not currently supported for stacked histograms: scaling will be ignored."
        )
        scale = None

    if 'histtype' in kwargs and kwargs['histtype'] == 'marker':
        marker = kwargs.pop('histtype')
    else:
        marker = None

# generate histogram-like object
    vis_objects = None
    vis_objects_err = None
    if marker:
        if 'normed' in kwargs:
            normed = kwargs.pop('normed')
        else:
            normed = False
        if 'marker' in kwargs:
            markerstyle = kwargs.pop('marker')
        else:
            markerstyle = '.'
        if 'linestyle' in kwargs:
            linestyle = kwargs.pop('linestyle')
        else:
            linestyle = ''
        hrange = None
        if 'range' in kwargs:
            hrange = kwargs.pop('range')

        bin_content, bins = np.histogram(x, bins, density=normed, range=hrange)
        bin_content = np.asarray(bin_content, dtype=float)
        if normed:
            bin_content_raw, _ = np.histogram(x,
                                              bins,
                                              density=False,
                                              range=hrange)
            bin_content_raw = np.asarray(bin_content_raw)
        else:
            bin_content_raw = bin_content
        width = (bins[1:] - bins[:-1])
        bin_centers = bins[:-1] + width * 0.5
        # bin_error = np.sqrt(bin_content)
        err_low = np.asarray(
            [poisson_error(bc, suppress_zero)[0] for bc in bin_content_raw])
        err_hi = np.asarray(
            [poisson_error(bc, suppress_zero)[1] for bc in bin_content_raw])
        err_scale = bin_content / bin_content_raw
        err_low *= err_scale
        err_hi *= err_scale
        bin_error = [err_low, err_hi]
        if errorbars:
            vis_objects_err = ax.errorbar(bin_centers,
                                          bin_content,
                                          linestyle=linestyle,
                                          marker=markerstyle,
                                          yerr=bin_error,
                                          **kwargs)
        else:
            vis_objects = ax.plot(bin_centers,
                                  bin_content,
                                  linestyle=linestyle,
                                  marker=markerstyle,
                                  **kwargs)
        if 'color' in kwargs:
            kwargs.pop('color')

    else:
        if 'normed' in kwargs:
            normed = kwargs.pop('normed')
        else:
            normed = False
        hrange = None
        if 'range' in kwargs:
            hrange = kwargs.pop('range')

        bin_content, bins, vis_objects = ax.hist(x,
                                                 bins,
                                                 range=hrange,
                                                 normed=normed,
                                                 **kwargs)
        if 'color' in kwargs:
            kwargs.pop('color')
        bin_content = np.asarray(bin_content, dtype=float)
        if normed:
            bin_content_raw, _ = np.histogram(x,
                                              bins,
                                              density=False,
                                              range=hrange)
            bin_content_raw = np.asarray(bin_content_raw)
        else:
            bin_content_raw = bin_content

        err_low = np.asarray(
            [poisson_error(bc, suppress_zero)[0] for bc in bin_content_raw])
        err_hi = np.asarray(
            [poisson_error(bc, suppress_zero)[1] for bc in bin_content_raw])
        err_scale = bin_content / bin_content_raw
        err_low *= err_scale
        err_hi *= err_scale
        bin_error = [err_low, err_hi]
        width = (bins[1:] - bins[:-1])
        bin_centers = bins[:-1] + width * 0.5
        vis_color = vis_objects[0].get_facecolor()
        if 'histtype' in kwargs:
            if kwargs['histtype'] == 'step':
                vis_color = vis_objects[0].get_edgecolor()
            kwargs.pop('histtype')
        if 'weights' in kwargs:
            kwargs.pop('weights')
        if 'label' in kwargs:
            kwargs.pop('label')
        if 'linewidth' in kwargs:
            kwargs.pop('linewidth')
        if errorbars:
            vis_objects_err = ax.errorbar(bin_centers,
                                          bin_content,
                                          linestyle='',
                                          marker='.',
                                          yerr=bin_error,
                                          linewidth=2,
                                          color=vis_color,
                                          **kwargs)


# perform any scaling if necessary, including redrawing of the scaled objects
    if scale:
        bin_content_scaled = []
        if vis_objects is not None:
            if isinstance(vis_objects[0], matplotlib.patches.Rectangle):
                if scale == 'binwidth':
                    for i, bc in enumerate(bin_content):
                        width = (bins[i + 1] - bins[i])
                        bin_content_scaled.append(bin_content[i] / width)
                        plt.setp(vis_objects[i], 'height',
                                 vis_objects[i].get_height() / width)
                elif isinstance(scale, Number):
                    for i, bc in enumerate(bin_content):
                        bin_content_scaled.append(bin_content[i] * scale)
                        plt.setp(vis_objects[i], 'height',
                                 vis_objects[i].get_height() * scale)
                else:
                    warnings.warn("scale argument value `", scale,
                                  "` not supported: it will be ignored.")

            elif isinstance(vis_objects[0], matplotlib.patches.Polygon):
                xy = vis_objects[0].get_xy()
                j = 0
                if scale == 'binwidth':
                    for i, bc in enumerate(bin_content):
                        width = (bins[i + 1] - bins[i])
                        bin_content_scaled.append(bin_content[i] / width)
                        xy[j + 1, 1] = bin_content_scaled[i]
                        xy[j + 2, 1] = bin_content_scaled[i]
                        j += 2
                elif isinstance(scale, Number):
                    for i, bc in enumerate(bin_content):
                        bin_content_scaled.append(bin_content[i] * scale)
                        xy[j + 1, 1] = bin_content_scaled[i]
                        xy[j + 2, 1] = bin_content_scaled[i]
                        j += 2
                else:
                    warnings.warn("scale argument value `", scale,
                                  "` not supported: it will be ignored.")
                plt.setp(vis_objects[0], 'xy', xy)

        if vis_objects_err is not None:
            if scale == 'binwidth':
                for i, bc in enumerate(bin_content):
                    width = (bins[i + 1] - bins[i])
                    if len(bin_content_scaled) != len(bin_content):
                        bin_content_scaled.append(bin_content[i] / width)
                    bin_error[0][i] /= width
                    bin_error[1][i] /= width
            elif isinstance(scale, Number):
                for i, bc in enumerate(bin_content):
                    if len(bin_content_scaled) != len(bin_content):
                        bin_content_scaled.append(bin_content[i] * scale)
                    bin_error[0][i] *= scale
                    bin_error[1][i] *= scale
            else:
                warnings.warn("scale argument value `", scale,
                              "` not supported: it will be ignored.")
            bin_content_scaled = np.asarray(bin_content_scaled)
            vis_objects_err[0].set_ydata(bin_content_scaled)

            vis_objects_err[1][0].set_ydata(bin_content_scaled - bin_error[0])
            vis_objects_err[1][1].set_ydata(bin_content_scaled + bin_error[1])
            #vis_objects_err[1][0].set_ydata(bin_error[0])
            #vis_objects_err[1][1].set_ydata(bin_error[1])
            tmplines = vis_objects_err[2][0].get_segments()
            for i, bc in enumerate(bin_content_scaled):
                tmplines[i][0][1] = bin_content_scaled[i] - bin_error[0][i]
                tmplines[i][1][1] = bin_content_scaled[i] + bin_error[1][i]
                #tmplines[i][0][1] = bin_error[0][i]
                #tmplines[i][1][1] = bin_error[1][i]
            vis_objects_err[2][0].set_segments(tmplines)

        ax.relim()
        ax.autoscale_view(False, False, True)

    try:
        bc = bin_content_scaled
    except:
        bc = bin_content
    return bc, bins, vis_objects