예제 #1
0
    def test_tuple_combine_fn_batched_merge(self):
        num_combine_fns = 10
        max_num_accumulators_in_memory = 30
        # Maximum number of accumulator tuples in memory - 1 for the merge result.
        merge_accumulators_batch_size = (
            max_num_accumulators_in_memory // num_combine_fns - 1)
        num_accumulator_tuples_to_merge = 20

        class CountedAccumulator:
            count = 0
            oom = False

            def __init__(self):
                if CountedAccumulator.count > max_num_accumulators_in_memory:
                    CountedAccumulator.oom = True
                else:
                    CountedAccumulator.count += 1

        class CountedAccumulatorCombineFn(beam.CombineFn):
            def create_accumulator(self):
                return CountedAccumulator()

            def merge_accumulators(self, accumulators):
                CountedAccumulator.count += 1
                for _ in accumulators:
                    CountedAccumulator.count -= 1

        combine_fn = combine.TupleCombineFn(
            *[CountedAccumulatorCombineFn() for _ in range(num_combine_fns)],
            merge_accumulators_batch_size=merge_accumulators_batch_size)
        combine_fn.merge_accumulators(
            combine_fn.create_accumulator()
            for _ in range(num_accumulator_tuples_to_merge))
        assert not CountedAccumulator.oom
예제 #2
0
 def test_tuple_combine_fn(self):
     with TestPipeline() as p:
         result = (p
                   | Create([('a', 100, 0.0), ('b', 10, -1), ('c', 1, 100)])
                   | beam.CombineGlobally(
                       combine.TupleCombineFn(max, combine.MeanCombineFn(),
                                              sum)).without_defaults())
         assert_that(result, equal_to([('c', 111.0 / 3, 99.0)]))
예제 #3
0
 def test_tuple_combine_fn_without_defaults(self):
     with TestPipeline() as p:
         result = (p
                   | Create([1, 1, 2, 3])
                   | beam.CombineGlobally(
                       combine.TupleCombineFn(
                           min, combine.MeanCombineFn(),
                           max).with_common_input()).without_defaults())
         assert_that(result, equal_to([(1, 7.0 / 4, 3)]))
예제 #4
0
 def test_basic_combiners_display_data(self):
   transform = beam.CombineGlobally(
       combine.TupleCombineFn(max, combine.MeanCombineFn(), sum))
   dd = DisplayData.create_from(transform)
   expected_items = [
       DisplayDataItemMatcher('combine_fn', combine.TupleCombineFn),
       DisplayDataItemMatcher('combiners', "['max', 'MeanCombineFn', 'sum']")
   ]
   hc.assert_that(dd.items, hc.contains_inanyorder(*expected_items))
예제 #5
0
 def test_empty_tuple_combine_fn(self):
     with TestPipeline() as p:
         result = (p
                   | Create([(), (), ()])
                   | beam.CombineGlobally(combine.TupleCombineFn()))
         assert_that(result, equal_to([()]))