def test_translate_to_histogram_with_max_input(self): counter = DataflowDistributionCounter() counter.add_input(INT64_MAX) histogram = Mock(firstBucketOffset=None, bucketCounts=None) counter.translate_to_histogram(histogram) self.assertEquals(histogram.firstBucketOffset, 57) self.assertEquals(histogram.bucketCounts, [1])
def test_calculate_bucket_index_within_max_long(self): counter = DataflowDistributionCounter() bucket = 1 power_of_ten = 1 while power_of_ten <= INT64_MAX: for multiplier in [1, 2, 5]: value = multiplier * power_of_ten actual_bucket = counter.calculate_bucket_index(value - 1) self.assertEquals(actual_bucket, bucket - 1) bucket += 1 power_of_ten *= 10
def test_calculate_bucket_index_within_max_long(self): counter = DataflowDistributionCounter() bucket = 1 power_of_ten = 1 while power_of_ten <= INT64_MAX: for multiplier in [1, 2, 5]: value = multiplier * power_of_ten actual_bucket = counter.calculate_bucket_index(value - 1) self.assertEqual(actual_bucket, bucket - 1) bucket += 1 power_of_ten *= 10
def test_add_input(self): counter = DataflowDistributionCounter() expected_buckets = [1, 3, 0, 0, 0, 0, 0, 0, 1, 1] expected_sum = 1510 expected_first_bucket_index = 1 expected_count = 6 expected_min = 1 expected_max = 1000 for element in [1, 500, 2, 3, 1000, 4]: counter.add_input(element) histogram = Mock(firstBucketOffset=None, bucketCounts=None) counter.translate_to_histogram(histogram) self.assertEquals(counter.sum, expected_sum) self.assertEquals(counter.count, expected_count) self.assertEquals(counter.min, expected_min) self.assertEquals(counter.max, expected_max) self.assertEquals(histogram.firstBucketOffset, expected_first_bucket_index) self.assertEquals(histogram.bucketCounts, expected_buckets)
def run_benchmark(num_runs=100, num_input=10000, seed=time.time()): total_time = 0 random.seed(seed) lower_bound = 0 upper_bound = sys.maxint inputs = generate_input_values(num_input, lower_bound, upper_bound) from apache_beam.transforms import DataflowDistributionCounter print("Number of runs:", num_runs) print("Input size:", num_input) print("Input sequence from %d to %d" % (lower_bound, upper_bound)) print("Random seed:", seed) for i in range(num_runs): counter = DataflowDistributionCounter() start = time.time() counter.add_inputs_for_test(inputs) time_cost = time.time() - start print("Run %d: Total time cost %g sec" % (i + 1, time_cost)) total_time += time_cost / num_input print("Per element update time cost:", total_time / num_runs)
def run_benchmark(num_runs=100, num_input=10000, seed=time.time()): total_time = 0 random.seed(seed) lower_bound = 0 upper_bound = sys.maxsize inputs = generate_input_values(num_input, lower_bound, upper_bound) from apache_beam.transforms import DataflowDistributionCounter print("Number of runs:", num_runs) print("Input size:", num_input) print("Input sequence from %d to %d" % (lower_bound, upper_bound)) print("Random seed:", seed) for i in range(num_runs): counter = DataflowDistributionCounter() start = time.time() counter.add_inputs_for_test(inputs) time_cost = time.time() - start print("Run %d: Total time cost %g sec" % (i+1, time_cost)) total_time += time_cost / num_input print("Per element update time cost:", total_time / num_runs)
def test_translate_distribution_counter(self): counter_update = DataflowDistributionCounter() counter_update.add_input(1) counter_update.add_input(3) metric_proto = dataflow.CounterUpdate() apiclient.translate_distribution(counter_update, metric_proto) histogram = mock.Mock(firstBucketOffset=None, bucketCounts=None) counter_update.translate_to_histogram(histogram) self.assertEqual(metric_proto.distribution.min.lowBits, counter_update.min) self.assertEqual(metric_proto.distribution.max.lowBits, counter_update.max) self.assertEqual(metric_proto.distribution.sum.lowBits, counter_update.sum) self.assertEqual(metric_proto.distribution.count.lowBits, counter_update.count) self.assertEqual(metric_proto.distribution.histogram.bucketCounts, histogram.bucketCounts) self.assertEqual(metric_proto.distribution.histogram.firstBucketOffset, histogram.firstBucketOffset)
def test_translate_to_histogram_with_max_input(self): counter = DataflowDistributionCounter() counter.add_input(INT64_MAX) histogram = Mock(firstBucketOffset=None, bucketCounts=None) counter.translate_to_histogram(histogram) self.assertEqual(histogram.firstBucketOffset, 57) self.assertEqual(histogram.bucketCounts, [1])
def test_add_input(self): counter = DataflowDistributionCounter() expected_buckets = [1, 3, 0, 0, 0, 0, 0, 0, 1, 1] expected_sum = 1510 expected_first_bucket_index = 1 expected_count = 6 expected_min = 1 expected_max = 1000 for element in [1, 500, 2, 3, 1000, 4]: counter.add_input(element) histogram = Mock(firstBucketOffset=None, bucketCounts=None) counter.translate_to_histogram(histogram) self.assertEqual(counter.sum, expected_sum) self.assertEqual(counter.count, expected_count) self.assertEqual(counter.min, expected_min) self.assertEqual(counter.max, expected_max) self.assertEqual(histogram.firstBucketOffset, expected_first_bucket_index) self.assertEqual(histogram.bucketCounts, expected_buckets)
def test_translate_distribution_using_dataflow_distribution_counter(self): counter_update = DataflowDistributionCounter() counter_update.add_input(1) counter_update.add_input(3) metric_proto = dataflow.CounterUpdate() apiclient.translate_distribution(counter_update, metric_proto) histogram = mock.Mock(firstBucketOffset=None, bucketCounts=None) counter_update.translate_to_histogram(histogram) self.assertEqual(metric_proto.distribution.min.lowBits, counter_update.min) self.assertEqual(metric_proto.distribution.max.lowBits, counter_update.max) self.assertEqual(metric_proto.distribution.sum.lowBits, counter_update.sum) self.assertEqual( metric_proto.distribution.count.lowBits, counter_update.count) self.assertEqual( metric_proto.distribution.histogram.bucketCounts, histogram.bucketCounts) self.assertEqual( metric_proto.distribution.histogram.firstBucketOffset, histogram.firstBucketOffset)
def test_calculate_bucket_index_with_input_0(self): counter = DataflowDistributionCounter() index = counter.calculate_bucket_index(0) self.assertEquals(index, 0)
def test_calculate_bucket_index_with_input_0(self): counter = DataflowDistributionCounter() index = counter.calculate_bucket_index(0) self.assertEqual(index, 0)