Exemple #1
0
def find_histogram_params(num_buckets, min_value, max_value, multiplier):
    """
    Compute the parameters to use for the histogram. Using the provided
    arguments, ensure that the generated histogram encapsolates the desired range.

    :param int num_buckets: The number of buckets the histogram should contain.
    :param float min_value: The minimum value allowed to be in the histogram inclusive.
    :param float max_value: The maximum value allowed to be in the histogram inclusive.
    :param int multipler: The multiplier we should use to preserve the desired precision.
    """

    scaled_min = 0 if min_value is None else multiplier * min_value
    scaled_max = 0 if max_value is None else multiplier * max_value

    # align the first bin with the minimum value
    start_offset = int(scaled_min)

    # finding the bounds might result in None if there isn't sufficient data
    if min_value is None or max_value is None:
        return HistogramParams(num_buckets, 1, start_offset, multiplier)

    bucket_size = nice_int((scaled_max - scaled_min) / float(num_buckets))

    if bucket_size == 0:
        bucket_size = 1

    # adjust the first bin to a nice value
    start_offset = int(scaled_min / bucket_size) * bucket_size

    # Sometimes the max value lies on the bucket boundary, and since the end
    # of the bucket is exclusive, it gets excluded. To account for that, we
    # increase the width of the buckets to cover the max value.
    if start_offset + num_buckets * bucket_size <= scaled_max:
        bucket_size = nice_int(bucket_size + 1)

    # compute the bin for max value and adjust the number of buckets accordingly
    # to minimize unnecessary empty bins at the tail
    last_bin = int(
        (scaled_max - start_offset) / bucket_size) * bucket_size + start_offset
    num_buckets = (last_bin - start_offset) // bucket_size + 1

    return HistogramParams(num_buckets, bucket_size, start_offset, multiplier)
Exemple #2
0
def test_nice_int():
    specs = [
        (0, 1, 0),
        (1, 2, 1),
        (2, 3, 2),
        (3, 6, 5),
        (6, 11, 10),
        (11, 21, 20),
        (21, 26, 25),
        (26, 51, 50),
        (51, 101, 100),
        (101, 121, 120),
        (121, 201, 200),
        (201, 251, 250),
        (251, 501, 500),
        (501, 751, 750),
        (751, 1001, 1000),
        (1001, 1201, 1200),
        (1201, 2001, 2000),
        (2001, 2501, 2500),
        (2501, 5001, 5000),
        (5001, 7501, 7500),
        (7501, 10001, 10000),
        (10001, 12001, 12000),
        (12001, 20001, 20000),
        (20001, 25001, 25000),
        (25001, 50001, 50000),
        (50001, 75001, 75000),
        (75001, 100001, 100000),
    ]

    for start, stop, expected in specs:
        for x in range(start, stop):
            assert nice_int(x) == expected, "{} was rounded to {}, not {}".format(
                x, nice_int(x), expected
            )
            assert nice_int(-x) == -expected, "{} was rounded to {}, not {}".format(
                -x, nice_int(-x), -expected
            )