def find_histogram_params(num_buckets, min_value, max_value, multiplier): """ Compute the parameters to use for the histogram. Using the provided arguments, ensure that the generated histogram encapsolates the desired range. :param int num_buckets: The number of buckets the histogram should contain. :param float min_value: The minimum value allowed to be in the histogram inclusive. :param float max_value: The maximum value allowed to be in the histogram inclusive. :param int multipler: The multiplier we should use to preserve the desired precision. """ scaled_min = 0 if min_value is None else multiplier * min_value scaled_max = 0 if max_value is None else multiplier * max_value # align the first bin with the minimum value start_offset = int(scaled_min) # finding the bounds might result in None if there isn't sufficient data if min_value is None or max_value is None: return HistogramParams(num_buckets, 1, start_offset, multiplier) bucket_size = nice_int((scaled_max - scaled_min) / float(num_buckets)) if bucket_size == 0: bucket_size = 1 # adjust the first bin to a nice value start_offset = int(scaled_min / bucket_size) * bucket_size # Sometimes the max value lies on the bucket boundary, and since the end # of the bucket is exclusive, it gets excluded. To account for that, we # increase the width of the buckets to cover the max value. if start_offset + num_buckets * bucket_size <= scaled_max: bucket_size = nice_int(bucket_size + 1) # compute the bin for max value and adjust the number of buckets accordingly # to minimize unnecessary empty bins at the tail last_bin = int( (scaled_max - start_offset) / bucket_size) * bucket_size + start_offset num_buckets = (last_bin - start_offset) // bucket_size + 1 return HistogramParams(num_buckets, bucket_size, start_offset, multiplier)
def test_nice_int(): specs = [ (0, 1, 0), (1, 2, 1), (2, 3, 2), (3, 6, 5), (6, 11, 10), (11, 21, 20), (21, 26, 25), (26, 51, 50), (51, 101, 100), (101, 121, 120), (121, 201, 200), (201, 251, 250), (251, 501, 500), (501, 751, 750), (751, 1001, 1000), (1001, 1201, 1200), (1201, 2001, 2000), (2001, 2501, 2500), (2501, 5001, 5000), (5001, 7501, 7500), (7501, 10001, 10000), (10001, 12001, 12000), (12001, 20001, 20000), (20001, 25001, 25000), (25001, 50001, 50000), (50001, 75001, 75000), (75001, 100001, 100000), ] for start, stop, expected in specs: for x in range(start, stop): assert nice_int(x) == expected, "{} was rounded to {}, not {}".format( x, nice_int(x), expected ) assert nice_int(-x) == -expected, "{} was rounded to {}, not {}".format( -x, nice_int(-x), -expected )