def test_pack_combiners_with_missing_environment_capability(self): class MultipleCombines(beam.PTransform): def expand(self, pcoll): _ = pcoll | 'mean-perkey' >> combiners.Mean.PerKey() _ = pcoll | 'count-perkey' >> combiners.Count.PerKey() _ = pcoll | 'largest-perkey' >> core.CombinePerKey(combiners.Largest(1)) pipeline = beam.Pipeline() vals = [6, 3, 1, 1, 9, 1, 5, 2, 0, 6] _ = pipeline | Create([('a', x) for x in vals]) | MultipleCombines() environment = environments.DockerEnvironment(capabilities=()) pipeline_proto = pipeline.to_runner_api(default_environment=environment) _, stages = translations.create_and_optimize_stages( pipeline_proto, [translations.pack_combiners], known_runner_urns=frozenset()) combine_per_key_stages = [] for stage in stages: for transform in stage.transforms: if transform.spec.urn == common_urns.composites.COMBINE_PER_KEY.urn: combine_per_key_stages.append(stage) # Combiner packing should be skipped because the environment is missing # the beam:combinefn:packed_python:v1 capability. self.assertEqual(len(combine_per_key_stages), 3) for combine_per_key_stage in combine_per_key_stages: self.assertNotIn('Packed', combine_per_key_stage.name) self.assertNotIn( 'Packed', combine_per_key_stage.transforms[0].unique_name)
def create_stages( self, pipeline_proto # type: beam_runner_api_pb2.Pipeline ): # type: (...) -> Tuple[translations.TransformContext, List[translations.Stage]] return translations.create_and_optimize_stages( copy.deepcopy(pipeline_proto), phases=[ translations.annotate_downstream_side_inputs, translations.fix_side_input_pcoll_coders, translations.lift_combiners, translations.expand_sdf, translations.expand_gbk, translations.sink_flattens, translations.greedily_fuse, translations.read_to_impulse, translations.impulse_to_input, translations.sort_stages, translations.setup_timer_mapping, translations.populate_data_channel_coders, ], known_runner_urns=frozenset([ common_urns.primitives.FLATTEN.urn, common_urns.primitives.GROUP_BY_KEY.urn ]), use_state_iterables=self._use_state_iterables)
def test_pack_combiners(self): class MultipleCombines(beam.PTransform): def expand(self, pcoll): _ = pcoll | 'mean-perkey' >> combiners.Mean.PerKey() _ = pcoll | 'count-perkey' >> combiners.Count.PerKey() _ = pcoll | 'largest-perkey' >> core.CombinePerKey(combiners.Largest(1)) pipeline = beam.Pipeline() vals = [6, 3, 1, 1, 9, 1, 5, 2, 0, 6] _ = pipeline | Create([('a', x) for x in vals ]) | 'multiple-combines' >> MultipleCombines() environment = environments.DockerEnvironment.from_options( pipeline_options.PortableOptions(sdk_location='container')) pipeline_proto = pipeline.to_runner_api(default_environment=environment) _, stages = translations.create_and_optimize_stages( pipeline_proto, [translations.pack_combiners], known_runner_urns=frozenset()) combine_per_key_stages = [] for stage in stages: for transform in stage.transforms: if transform.spec.urn == common_urns.composites.COMBINE_PER_KEY.urn: combine_per_key_stages.append(stage) self.assertEqual(len(combine_per_key_stages), 1) self.assertIn('Packed', combine_per_key_stages[0].name) self.assertIn('Packed', combine_per_key_stages[0].transforms[0].unique_name) self.assertIn('multiple-combines', combine_per_key_stages[0].parent) self.assertNotIn('-perkey', combine_per_key_stages[0].parent)
def test_pack_global_combiners(self): class MultipleCombines(beam.PTransform): def expand(self, pcoll): _ = pcoll | 'mean-globally' >> combiners.Mean.Globally() _ = pcoll | 'count-globally' >> combiners.Count.Globally() pipeline = beam.Pipeline() vals = [6, 3, 1, 1, 9, 1, 5, 2, 0, 6] _ = pipeline | Create(vals) | 'multiple-combines' >> MultipleCombines() environment = environments.DockerEnvironment.from_options( pipeline_options.PortableOptions(sdk_location='container')) pipeline_proto = pipeline.to_runner_api( default_environment=environment) _, stages = translations.create_and_optimize_stages( pipeline_proto, [ translations.eliminate_common_key_with_none, translations.pack_combiners, ], known_runner_urns=frozenset()) key_with_void_stages = [ stage for stage in stages if 'KeyWithVoid' in stage.name ] self.assertEqual(len(key_with_void_stages), 1) self.assertIn('multiple-combines', key_with_void_stages[0].parent) self.assertNotIn('-globally', key_with_void_stages[0].parent) combine_per_key_stages = [] for stage in stages: for transform in stage.transforms: if transform.spec.urn == common_urns.composites.COMBINE_PER_KEY.urn: combine_per_key_stages.append(stage) self.assertEqual(len(combine_per_key_stages), 1) self.assertIn('/Pack', combine_per_key_stages[0].name) self.assertIn('multiple-combines', combine_per_key_stages[0].parent) self.assertNotIn('-globally', combine_per_key_stages[0].parent)
def test_eliminate_common_key_with_void(self): pipeline = beam.Pipeline() pcoll = pipeline | 'Start' >> beam.Create([1, 2, 3]) _ = pcoll | 'TestKeyWithNoneA' >> beam.ParDo(core._KeyWithNone()) _ = pcoll | 'TestKeyWithNoneB' >> beam.ParDo(core._KeyWithNone()) pipeline_proto = pipeline.to_runner_api() _, stages = translations.create_and_optimize_stages( pipeline_proto, [translations.eliminate_common_key_with_none], known_runner_urns=frozenset()) key_with_none_stages = [ stage for stage in stages if 'TestKeyWithNone' in stage.name ] self.assertEqual(len(key_with_none_stages), 1)
def test_eliminate_common_key_with_void(self): class MultipleKeyWithNone(beam.PTransform): def expand(self, pcoll): _ = pcoll | 'key-with-none-a' >> beam.ParDo(core._KeyWithNone()) _ = pcoll | 'key-with-none-b' >> beam.ParDo(core._KeyWithNone()) _ = pcoll | 'key-with-none-c' >> beam.ParDo(core._KeyWithNone()) pipeline = beam.Pipeline() _ = pipeline | beam.Create( [1, 2, 3]) | 'multiple-key-with-none' >> MultipleKeyWithNone() pipeline_proto = pipeline.to_runner_api() _, stages = translations.create_and_optimize_stages( pipeline_proto, [translations._eliminate_common_key_with_none], known_runner_urns=frozenset()) key_with_none_stages = [ stage for stage in stages if 'key-with-none' in stage.name ] self.assertEqual(len(key_with_none_stages), 1) self.assertIn('multiple-key-with-none', key_with_none_stages[0].parent)
def test_pack_combiners(self): pipeline = beam.Pipeline() vals = [6, 3, 1, 1, 9, 1, 5, 2, 0, 6] pcoll = pipeline | 'start-perkey' >> Create([('a', x) for x in vals]) _ = pcoll | 'mean-perkey' >> combiners.Mean.PerKey() _ = pcoll | 'count-perkey' >> combiners.Count.PerKey() environment = environments.DockerEnvironment.from_options( pipeline_options.PortableOptions(sdk_location='container')) pipeline_proto = pipeline.to_runner_api(default_environment=environment) _, stages = translations.create_and_optimize_stages( pipeline_proto, [translations.pack_combiners], known_runner_urns=frozenset()) combine_per_key_stages = [] for stage in stages: for transform in stage.transforms: if transform.spec.urn == common_urns.composites.COMBINE_PER_KEY.urn: combine_per_key_stages.append(stage) self.assertEqual(len(combine_per_key_stages), 1) self.assertIn('/Pack', combine_per_key_stages[0].name)