예제 #1
0
  def test_pack_combiners_with_missing_environment_capability(self):
    class MultipleCombines(beam.PTransform):
      def expand(self, pcoll):
        _ = pcoll | 'mean-perkey' >> combiners.Mean.PerKey()
        _ = pcoll | 'count-perkey' >> combiners.Count.PerKey()
        _ = pcoll | 'largest-perkey' >> core.CombinePerKey(combiners.Largest(1))

    pipeline = beam.Pipeline()
    vals = [6, 3, 1, 1, 9, 1, 5, 2, 0, 6]
    _ = pipeline | Create([('a', x) for x in vals]) | MultipleCombines()
    environment = environments.DockerEnvironment(capabilities=())
    pipeline_proto = pipeline.to_runner_api(default_environment=environment)
    _, stages = translations.create_and_optimize_stages(
        pipeline_proto, [translations.pack_combiners],
        known_runner_urns=frozenset())
    combine_per_key_stages = []
    for stage in stages:
      for transform in stage.transforms:
        if transform.spec.urn == common_urns.composites.COMBINE_PER_KEY.urn:
          combine_per_key_stages.append(stage)
    # Combiner packing should be skipped because the environment is missing
    # the beam:combinefn:packed_python:v1 capability.
    self.assertEqual(len(combine_per_key_stages), 3)
    for combine_per_key_stage in combine_per_key_stages:
      self.assertNotIn('Packed', combine_per_key_stage.name)
      self.assertNotIn(
          'Packed', combine_per_key_stage.transforms[0].unique_name)
 def create_stages(
         self,
         pipeline_proto  # type: beam_runner_api_pb2.Pipeline
 ):
     # type: (...) -> Tuple[translations.TransformContext, List[translations.Stage]]
     return translations.create_and_optimize_stages(
         copy.deepcopy(pipeline_proto),
         phases=[
             translations.annotate_downstream_side_inputs,
             translations.fix_side_input_pcoll_coders,
             translations.lift_combiners,
             translations.expand_sdf,
             translations.expand_gbk,
             translations.sink_flattens,
             translations.greedily_fuse,
             translations.read_to_impulse,
             translations.impulse_to_input,
             translations.sort_stages,
             translations.setup_timer_mapping,
             translations.populate_data_channel_coders,
         ],
         known_runner_urns=frozenset([
             common_urns.primitives.FLATTEN.urn,
             common_urns.primitives.GROUP_BY_KEY.urn
         ]),
         use_state_iterables=self._use_state_iterables)
예제 #3
0
  def test_pack_combiners(self):
    class MultipleCombines(beam.PTransform):
      def expand(self, pcoll):
        _ = pcoll | 'mean-perkey' >> combiners.Mean.PerKey()
        _ = pcoll | 'count-perkey' >> combiners.Count.PerKey()
        _ = pcoll | 'largest-perkey' >> core.CombinePerKey(combiners.Largest(1))

    pipeline = beam.Pipeline()
    vals = [6, 3, 1, 1, 9, 1, 5, 2, 0, 6]
    _ = pipeline | Create([('a', x) for x in vals
                           ]) | 'multiple-combines' >> MultipleCombines()
    environment = environments.DockerEnvironment.from_options(
        pipeline_options.PortableOptions(sdk_location='container'))
    pipeline_proto = pipeline.to_runner_api(default_environment=environment)
    _, stages = translations.create_and_optimize_stages(
        pipeline_proto, [translations.pack_combiners],
        known_runner_urns=frozenset())
    combine_per_key_stages = []
    for stage in stages:
      for transform in stage.transforms:
        if transform.spec.urn == common_urns.composites.COMBINE_PER_KEY.urn:
          combine_per_key_stages.append(stage)
    self.assertEqual(len(combine_per_key_stages), 1)
    self.assertIn('Packed', combine_per_key_stages[0].name)
    self.assertIn('Packed', combine_per_key_stages[0].transforms[0].unique_name)
    self.assertIn('multiple-combines', combine_per_key_stages[0].parent)
    self.assertNotIn('-perkey', combine_per_key_stages[0].parent)
예제 #4
0
    def test_pack_global_combiners(self):
        class MultipleCombines(beam.PTransform):
            def expand(self, pcoll):
                _ = pcoll | 'mean-globally' >> combiners.Mean.Globally()
                _ = pcoll | 'count-globally' >> combiners.Count.Globally()

        pipeline = beam.Pipeline()
        vals = [6, 3, 1, 1, 9, 1, 5, 2, 0, 6]
        _ = pipeline | Create(vals) | 'multiple-combines' >> MultipleCombines()
        environment = environments.DockerEnvironment.from_options(
            pipeline_options.PortableOptions(sdk_location='container'))
        pipeline_proto = pipeline.to_runner_api(
            default_environment=environment)
        _, stages = translations.create_and_optimize_stages(
            pipeline_proto, [
                translations.eliminate_common_key_with_none,
                translations.pack_combiners,
            ],
            known_runner_urns=frozenset())
        key_with_void_stages = [
            stage for stage in stages if 'KeyWithVoid' in stage.name
        ]
        self.assertEqual(len(key_with_void_stages), 1)
        self.assertIn('multiple-combines', key_with_void_stages[0].parent)
        self.assertNotIn('-globally', key_with_void_stages[0].parent)

        combine_per_key_stages = []
        for stage in stages:
            for transform in stage.transforms:
                if transform.spec.urn == common_urns.composites.COMBINE_PER_KEY.urn:
                    combine_per_key_stages.append(stage)
        self.assertEqual(len(combine_per_key_stages), 1)
        self.assertIn('/Pack', combine_per_key_stages[0].name)
        self.assertIn('multiple-combines', combine_per_key_stages[0].parent)
        self.assertNotIn('-globally', combine_per_key_stages[0].parent)
예제 #5
0
    def test_eliminate_common_key_with_void(self):
        pipeline = beam.Pipeline()
        pcoll = pipeline | 'Start' >> beam.Create([1, 2, 3])
        _ = pcoll | 'TestKeyWithNoneA' >> beam.ParDo(core._KeyWithNone())
        _ = pcoll | 'TestKeyWithNoneB' >> beam.ParDo(core._KeyWithNone())

        pipeline_proto = pipeline.to_runner_api()
        _, stages = translations.create_and_optimize_stages(
            pipeline_proto, [translations.eliminate_common_key_with_none],
            known_runner_urns=frozenset())
        key_with_none_stages = [
            stage for stage in stages if 'TestKeyWithNone' in stage.name
        ]
        self.assertEqual(len(key_with_none_stages), 1)
예제 #6
0
  def test_eliminate_common_key_with_void(self):
    class MultipleKeyWithNone(beam.PTransform):
      def expand(self, pcoll):
        _ = pcoll | 'key-with-none-a' >> beam.ParDo(core._KeyWithNone())
        _ = pcoll | 'key-with-none-b' >> beam.ParDo(core._KeyWithNone())
        _ = pcoll | 'key-with-none-c' >> beam.ParDo(core._KeyWithNone())

    pipeline = beam.Pipeline()
    _ = pipeline | beam.Create(
        [1, 2, 3]) | 'multiple-key-with-none' >> MultipleKeyWithNone()
    pipeline_proto = pipeline.to_runner_api()
    _, stages = translations.create_and_optimize_stages(
        pipeline_proto, [translations._eliminate_common_key_with_none],
        known_runner_urns=frozenset())
    key_with_none_stages = [
        stage for stage in stages if 'key-with-none' in stage.name
    ]
    self.assertEqual(len(key_with_none_stages), 1)
    self.assertIn('multiple-key-with-none', key_with_none_stages[0].parent)
예제 #7
0
  def test_pack_combiners(self):
    pipeline = beam.Pipeline()
    vals = [6, 3, 1, 1, 9, 1, 5, 2, 0, 6]
    pcoll = pipeline | 'start-perkey' >> Create([('a', x) for x in vals])
    _ = pcoll | 'mean-perkey' >> combiners.Mean.PerKey()
    _ = pcoll | 'count-perkey' >> combiners.Count.PerKey()

    environment = environments.DockerEnvironment.from_options(
        pipeline_options.PortableOptions(sdk_location='container'))
    pipeline_proto = pipeline.to_runner_api(default_environment=environment)
    _, stages = translations.create_and_optimize_stages(
        pipeline_proto, [translations.pack_combiners],
        known_runner_urns=frozenset())
    combine_per_key_stages = []
    for stage in stages:
      for transform in stage.transforms:
        if transform.spec.urn == common_urns.composites.COMBINE_PER_KEY.urn:
          combine_per_key_stages.append(stage)
    self.assertEqual(len(combine_per_key_stages), 1)
    self.assertIn('/Pack', combine_per_key_stages[0].name)