def test_translate_to_histogram_with_max_input(self):
     counter = DataflowDistributionCounter()
     counter.add_input(INT64_MAX)
     histogram = Mock(firstBucketOffset=None, bucketCounts=None)
     counter.translate_to_histogram(histogram)
     self.assertEqual(histogram.firstBucketOffset, 57)
     self.assertEqual(histogram.bucketCounts, [1])
 def test_calculate_bucket_index_within_max_long(self):
     counter = DataflowDistributionCounter()
     bucket = 1
     power_of_ten = 1
     while power_of_ten <= INT64_MAX:
         for multiplier in [1, 2, 5]:
             value = multiplier * power_of_ten
             actual_bucket = counter.calculate_bucket_index(value - 1)
             self.assertEqual(actual_bucket, bucket - 1)
             bucket += 1
         power_of_ten *= 10
 def test_add_input(self):
   counter = DataflowDistributionCounter()
   expected_buckets = [1, 3, 0, 0, 0, 0, 0, 0, 1, 1]
   expected_sum = 1510
   expected_first_bucket_index = 1
   expected_count = 6
   expected_min = 1
   expected_max = 1000
   for element in [1, 500, 2, 3, 1000, 4]:
     counter.add_input(element)
   histogram = Mock(firstBucketOffset=None, bucketCounts=None)
   counter.translate_to_histogram(histogram)
   self.assertEqual(counter.sum, expected_sum)
   self.assertEqual(counter.count, expected_count)
   self.assertEqual(counter.min, expected_min)
   self.assertEqual(counter.max, expected_max)
   self.assertEqual(histogram.firstBucketOffset, expected_first_bucket_index)
   self.assertEqual(histogram.bucketCounts, expected_buckets)
Esempio n. 4
0
def run_benchmark(num_runs=100, num_input=10000, seed=time.time()):
    total_time = 0
    random.seed(seed)
    lower_bound = 0
    upper_bound = sys.maxint
    inputs = generate_input_values(num_input, lower_bound, upper_bound)
    from apache_beam.transforms import DataflowDistributionCounter
    print("Number of runs:", num_runs)
    print("Input size:", num_input)
    print("Input sequence from %d to %d" % (lower_bound, upper_bound))
    print("Random seed:", seed)
    for i in range(num_runs):
        counter = DataflowDistributionCounter()
        start = time.time()
        counter.add_inputs_for_test(inputs)
        time_cost = time.time() - start
        print("Run %d: Total time cost %g sec" % (i + 1, time_cost))
        total_time += time_cost / num_input
    print("Per element update time cost:", total_time / num_runs)
Esempio n. 5
0
 def test_translate_distribution_using_dataflow_distribution_counter(self):
   counter_update = DataflowDistributionCounter()
   counter_update.add_input(1)
   counter_update.add_input(3)
   metric_proto = dataflow.CounterUpdate()
   apiclient.translate_distribution(counter_update, metric_proto)
   histogram = mock.Mock(firstBucketOffset=None, bucketCounts=None)
   counter_update.translate_to_histogram(histogram)
   self.assertEqual(metric_proto.distribution.min.lowBits, counter_update.min)
   self.assertEqual(metric_proto.distribution.max.lowBits, counter_update.max)
   self.assertEqual(metric_proto.distribution.sum.lowBits, counter_update.sum)
   self.assertEqual(
       metric_proto.distribution.count.lowBits, counter_update.count)
   self.assertEqual(
       metric_proto.distribution.histogram.bucketCounts,
       histogram.bucketCounts)
   self.assertEqual(
       metric_proto.distribution.histogram.firstBucketOffset,
       histogram.firstBucketOffset)
 def test_calculate_bucket_index_with_input_0(self):
     counter = DataflowDistributionCounter()
     index = counter.calculate_bucket_index(0)
     self.assertEqual(index, 0)