def test_top_shorthands(self): with TestPipeline() as pipeline: pcoll = pipeline | 'start' >> Create( [6, 3, 1, 1, 9, 1, 5, 2, 0, 6]) result_top = pcoll | 'top' >> beam.CombineGlobally( combine.Largest(5)) result_bot = pcoll | 'bot' >> beam.CombineGlobally( combine.Smallest(4)) assert_that(result_top, equal_to([[9, 6, 6, 5, 3]]), label='assert:top') assert_that(result_bot, equal_to([[0, 1, 1, 1]]), label='assert:bot') pcoll = pipeline | 'start-perkey' >> Create( [('a', x) for x in [6, 3, 1, 1, 9, 1, 5, 2, 0, 6]]) result_ktop = pcoll | 'top-perkey' >> beam.CombinePerKey( combine.Largest(5)) result_kbot = pcoll | 'bot-perkey' >> beam.CombinePerKey( combine.Smallest(4)) assert_that(result_ktop, equal_to([('a', [9, 6, 6, 5, 3])]), label='ktop') assert_that(result_kbot, equal_to([('a', [0, 1, 1, 1])]), label='kbot')
def test_combine_per_key_top_display_data(self): def individual_test_per_key_dd(combineFn): transform = beam.CombinePerKey(combineFn) dd = DisplayData.create_from(transform) expected_items = [ DisplayDataItemMatcher('combine_fn', combineFn.__class__), DisplayDataItemMatcher('n', combineFn._n), DisplayDataItemMatcher('compare', combineFn._compare.__name__)] hc.assert_that(dd.items, hc.contains_inanyorder(*expected_items)) individual_test_per_key_dd(combine.Largest(5)) individual_test_per_key_dd(combine.Smallest(3)) individual_test_per_key_dd(combine.TopCombineFn(8)) individual_test_per_key_dd(combine.Largest(5))
def expand(self, pcoll): # These CombinePerKey stages will be packed if and only if # translations.pack_combiners is enabled in the TestPipeline runner. assert_that(pcoll | 'mean-perkey' >> combiners.Mean.PerKey(), equal_to([('a', 3.4)]), label='assert-mean-perkey') assert_that(pcoll | 'count-perkey' >> combiners.Count.PerKey(), equal_to([('a', 10)]), label='assert-count-perkey') assert_that(pcoll | 'largest-perkey' >> core.CombinePerKey( combiners.Largest(1)), equal_to([('a', [9])]), label='assert-largest-perkey')
def expand(self, pcoll): # These CombineGlobally stages will be packed if and only if # translations.eliminate_common_key_with_void and # translations.pack_combiners are enabled in the TestPipeline runner. assert_that( pcoll | 'mean-globally' >> combiners.Mean.Globally(), equal_to([3.4]), label='assert-mean-globally') assert_that( pcoll | 'count-globally' >> combiners.Count.Globally(), equal_to([10]), label='assert-count-globally') assert_that( pcoll | 'largest-globally' >> core.CombineGlobally(combiners.Largest(1)), equal_to([[9]]), label='assert-largest-globally')
def test_optimize_multiple_combine_globally(self): pipeline = beam.Pipeline() vals = [6, 3, 1, 1, 9, 1, 5, 2, 0, 6] pcoll = pipeline | Create(vals) _ = pcoll | 'mean-globally' >> combiners.Mean.Globally() _ = pcoll | 'count-globally' >> combiners.Count.Globally() _ = pcoll | 'largest-globally' >> core.CombineGlobally(combiners.Largest(1)) pipeline_proto = pipeline.to_runner_api() optimized_pipeline_proto = translations.optimize_pipeline( pipeline_proto, [ translations.pack_combiners, ], known_runner_urns=frozenset(), partial=True) # Tests that Pipeline.from_runner_api() does not throw an exception. runner = runners.DirectRunner() beam.Pipeline.from_runner_api( optimized_pipeline_proto, runner, pipeline_options.PipelineOptions())
def expand(self, pcoll): _ = pcoll | 'mean-perkey' >> combiners.Mean.PerKey() _ = pcoll | 'count-perkey' >> combiners.Count.PerKey() _ = pcoll | 'largest-perkey' >> core.CombinePerKey(combiners.Largest(1))
def expand(self, pcoll): _ = pcoll | 'mean-globally' >> combiners.Mean.Globally() _ = pcoll | 'count-globally' >> combiners.Count.Globally() _ = pcoll | 'largest-globally' >> core.CombineGlobally( combiners.Largest(1))