Ejemplo n.º 1
0
 def test_default_capabilities(self):
     environment = DockerEnvironment.from_options(
         PortableOptions(sdk_location='container'))
     context = pipeline_context.PipelineContext()
     proto = environment.to_runner_api(context)
     self.assertEqual(set(proto.capabilities),
                      set(environments.python_sdk_capabilities()))
Ejemplo n.º 2
0
 def test_sdk_capabilities(self):
   sdk_capabilities = environments.python_sdk_capabilities()
   self.assertIn(common_urns.coders.LENGTH_PREFIX.urn, sdk_capabilities)
   self.assertIn(common_urns.protocols.WORKER_STATUS.urn, sdk_capabilities)
   self.assertIn(
       common_urns.sdf_components.TRUNCATE_SIZED_RESTRICTION.urn,
       sdk_capabilities)
Ejemplo n.º 3
0
    def test_conditionally_packed_combiners(self):
        class RecursiveCombine(beam.PTransform):
            def __init__(self, labels):
                self._labels = labels

            def expand(self, pcoll):
                base = pcoll | 'Sum' >> beam.CombineGlobally(sum)
                if self._labels:
                    rest = pcoll | self._labels[0] >> RecursiveCombine(
                        self._labels[1:])
                    return (base, rest) | beam.Flatten()
                else:
                    return base

            def annotations(self):
                if len(self._labels) == 2:
                    return {python_urns.APPLY_COMBINER_PACKING: b''}
                else:
                    return {}

        # Verify the results are as expected.
        with TestPipeline() as pipeline:
            result = pipeline | beam.Create([1, 2, 3
                                             ]) | RecursiveCombine('ABCD')
            assert_that(result, equal_to([6, 6, 6, 6, 6]))

        # Verify the optimization is as expected.
        proto = pipeline.to_runner_api(
            default_environment=environments.EmbeddedPythonEnvironment(
                capabilities=environments.python_sdk_capabilities()))
        optimized = translations.optimize_pipeline(
            proto,
            phases=[translations.pack_combiners],
            known_runner_urns=frozenset(),
            partial=True)
        optimized_stage_names = sorted(
            t.unique_name for t in optimized.components.transforms.values())
        self.assertIn('RecursiveCombine/Sum/CombinePerKey',
                      optimized_stage_names)
        self.assertIn('RecursiveCombine/A/Sum/CombinePerKey',
                      optimized_stage_names)
        self.assertNotIn('RecursiveCombine/A/B/Sum/CombinePerKey',
                         optimized_stage_names)
        self.assertIn(
            'RecursiveCombine/A/B/Packed[Sum_CombinePerKey, '
            'C_Sum_CombinePerKey, C_D_Sum_CombinePerKey]/Pack',
            optimized_stage_names)
Ejemplo n.º 4
0
 def test_environment_override_translation(self):
   self.default_properties.append('--experiments=beam_fn_api')
   self.default_properties.append('--worker_harness_container_image=FOO')
   remote_runner = DataflowRunner()
   with Pipeline(remote_runner,
                 options=PipelineOptions(self.default_properties)) as p:
     (  # pylint: disable=expression-not-assigned
         p | ptransform.Create([1, 2, 3])
         | 'Do' >> ptransform.FlatMap(lambda x: [(x, x)])
         | ptransform.GroupByKey())
   self.assertEqual(
       list(remote_runner.proto_pipeline.components.environments.values()),
       [
           beam_runner_api_pb2.Environment(
               urn=common_urns.environments.DOCKER.urn,
               payload=beam_runner_api_pb2.DockerPayload(
                   container_image='FOO').SerializeToString(),
               capabilities=environments.python_sdk_capabilities())
       ])
Ejemplo n.º 5
0
  def test_run_packable_combine_limit(self):
    class MultipleLargeCombines(beam.PTransform):
      def annotations(self):
        # Limit to at most 2 combiners per packed combiner.
        return {python_urns.APPLY_COMBINER_PACKING: b'2'}

      def expand(self, pcoll):
        assert_that(
            pcoll | 'min-1-globally' >> core.CombineGlobally(min),
            equal_to([-1]),
            label='assert-min-1-globally')
        assert_that(
            pcoll | 'min-2-globally' >> core.CombineGlobally(min),
            equal_to([-1]),
            label='assert-min-2-globally')
        assert_that(
            pcoll | 'min-3-globally' >> core.CombineGlobally(min),
            equal_to([-1]),
            label='assert-min-3-globally')

    class MultipleSmallCombines(beam.PTransform):
      def annotations(self):
        # Limit to at most 4 combiners per packed combiner.
        return {python_urns.APPLY_COMBINER_PACKING: b'4'}

      def expand(self, pcoll):
        assert_that(
            pcoll | 'min-4-globally' >> core.CombineGlobally(min),
            equal_to([-1]),
            label='assert-min-4-globally')
        assert_that(
            pcoll | 'min-5-globally' >> core.CombineGlobally(min),
            equal_to([-1]),
            label='assert-min-5-globally')

    with TestPipeline() as pipeline:
      vals = [6, 3, 1, -1, 9, 1, 5, 2, 0, 6]
      pcoll = pipeline | Create(vals)
      _ = pcoll | 'multiple-large-combines' >> MultipleLargeCombines()
      _ = pcoll | 'multiple-small-combines' >> MultipleSmallCombines()

    proto = pipeline.to_runner_api(
        default_environment=environments.EmbeddedPythonEnvironment(
            capabilities=environments.python_sdk_capabilities()))
    optimized = translations.optimize_pipeline(
        proto,
        phases=[translations.pack_combiners],
        known_runner_urns=frozenset(),
        partial=True)
    optimized_stage_names = [
        t.unique_name for t in optimized.components.transforms.values()
    ]
    self.assertIn(
        'multiple-large-combines/Packed[min-1-globally_CombinePerKey, '
        'min-2-globally_CombinePerKey]/Pack',
        optimized_stage_names)
    self.assertIn(
        'Packed[multiple-large-combines_min-3-globally_CombinePerKey, '
        'multiple-small-combines_min-4-globally_CombinePerKey]/Pack',
        optimized_stage_names)
    self.assertIn(
        'multiple-small-combines/min-5-globally/CombinePerKey',
        optimized_stage_names)
    self.assertNotIn(
        'multiple-large-combines/min-1-globally/CombinePerKey',
        optimized_stage_names)
    self.assertNotIn(
        'multiple-large-combines/min-2-globally/CombinePerKey',
        optimized_stage_names)
    self.assertNotIn(
        'multiple-large-combines/min-3-globally/CombinePerKey',
        optimized_stage_names)
    self.assertNotIn(
        'multiple-small-combines/min-4-globally/CombinePerKey',
        optimized_stage_names)
Ejemplo n.º 6
0
 def test_sdk_capabilities(self):
   sdk_capabilities = environments.python_sdk_capabilities()
   self.assertIn(common_urns.coders.LENGTH_PREFIX.urn, sdk_capabilities)
   self.assertIn(common_urns.protocols.WORKER_STATUS.urn, sdk_capabilities)