コード例 #1
0
  def test_quantiles_merge_accumulators(self):
    # This test exercises merging multiple buffers and approximation accuracy.
    # The max_num_elements is set to a small value to trigger buffers collapse
    # and interpolation. Under the conditions below, buffer_size=125 and
    # num_buffers=4, so we're only allowed to keep half of the input values.
    num_accumulators = 100
    num_quantiles = 5
    eps = 0.01
    max_num_elements = 1000
    combine_fn = ApproximateQuantilesCombineFn.create(
        num_quantiles, eps, max_num_elements)
    combine_fn_weighted = ApproximateQuantilesCombineFn.create(
        num_quantiles, eps, max_num_elements, weighted=True)
    data = list(range(1000))
    weights = list(reversed(range(1000)))
    step = math.ceil(len(data) / num_accumulators)
    accumulators = []
    accumulators_weighted = []
    for i in range(num_accumulators):
      accumulator = combine_fn.create_accumulator()
      accumulator_weighted = combine_fn_weighted.create_accumulator()
      for element, weight in zip(data[i*step:(i+1)*step],
                                 weights[i*step:(i+1)*step]):
        accumulator = combine_fn.add_input(accumulator, element)
        accumulator_weighted = combine_fn_weighted.add_input(
            accumulator_weighted, (element, weight))
      accumulators.append(accumulator)
      accumulators_weighted.append(accumulator_weighted)
    accumulator = combine_fn.merge_accumulators(accumulators)
    accumulator_weighted = combine_fn_weighted.merge_accumulators(
        accumulators_weighted)
    quantiles = combine_fn.extract_output(accumulator)
    quantiles_weighted = combine_fn_weighted.extract_output(
        accumulator_weighted)

    # In fact, the final accuracy is much higher than eps, but we test for a
    # minimal accuracy here.
    for q, actual_q in zip(quantiles, [0, 249, 499, 749, 999]):
      self.assertAlmostEqual(q, actual_q, delta=max_num_elements * eps)
    for q, actual_q in zip(quantiles_weighted, [0, 133, 292, 499, 999]):
      self.assertAlmostEqual(q, actual_q, delta=max_num_elements * eps)
コード例 #2
0
ファイル: stats_test.py プロジェクト: ostrokach/beam
 def test_correctness(self, epsilon, maxInputSize, *args):
     """
 Verify that buffers are correct according to the two constraint equations.
 """
     combine_fn = ApproximateQuantilesCombineFn.create(
         num_quantiles=10, max_num_elements=maxInputSize, epsilon=epsilon)
     b = combine_fn._num_buffers
     k = combine_fn._buffer_size
     n = maxInputSize
     self.assertLessEqual((b - 2) * (1 << (b - 2)) + 0.5, (epsilon * n),
                          '(b-2)2^(b-2) + 1/2 <= eN')
     self.assertGreaterEqual((k * 2)**(b - 1), n, 'k2^(b-1) >= N')
コード例 #3
0
  def test_efficiency(
      self, epsilon, maxInputSize, expectedNumBuffers, expectedBufferSize):
    """
    Verify the buffers are efficiently calculated according to the reference
    table values.
    """

    combine_fn = ApproximateQuantilesCombineFn.create(
        num_quantiles=10, max_num_elements=maxInputSize, epsilon=epsilon)
    self.assertEqual(
        expectedNumBuffers, combine_fn._num_buffers, "Number of buffers")
    self.assertEqual(expectedBufferSize, combine_fn._buffer_size, "Buffer size")