Ejemplo n.º 1
0
  def test_flexrs_cost(self):
    pipeline_options = PipelineOptions(
        ['--flexrs_goal', 'COST_OPTIMIZED',
         '--temp_location', 'gs://any-location/temp'])

    env = apiclient.Environment([], #packages
                                pipeline_options,
                                '2.0.0', #any environment version
                                FAKE_PIPELINE_URL)
    self.assertEqual(
        env.proto.flexResourceSchedulingGoal,
        (dataflow.Environment.FlexResourceSchedulingGoalValueValuesEnum.
         FLEXRS_COST_OPTIMIZED))
Ejemplo n.º 2
0
    def test_set_subnetwork(self):
        pipeline_options = PipelineOptions([
            '--subnetwork', '/regions/MY/subnetworks/SUBNETWORK',
            '--temp_location', 'gs://any-location/temp'
        ])

        env = apiclient.Environment(
            [],  #packages
            pipeline_options,
            '2.0.0',  #any environment version
            FAKE_PIPELINE_URL)
        self.assertEqual(env.proto.workerPools[0].subnetwork,
                         '/regions/MY/subnetworks/SUBNETWORK')
Ejemplo n.º 3
0
 def test_harness_override_uses_base_version_in_rc_releases(self):
     pipeline_options = PipelineOptions(
         ['--temp_location', 'gs://any-location/temp', '--streaming'])
     override = ''.join([
         'runner_harness_container_image=',
         names.DATAFLOW_CONTAINER_IMAGE_REPOSITORY, '/harness:2.2.0'
     ])
     env = apiclient.Environment(
         [],  #packages
         pipeline_options,
         '2.0.0',  #any environment version
         FAKE_PIPELINE_URL)
     self.assertIn(override, env.proto.experiments)
Ejemplo n.º 4
0
 def test_harness_override_absent_in_released_sdks_with_runner_v2(self):
     pipeline_options = PipelineOptions([
         '--temp_location', 'gs://any-location/temp', '--streaming',
         '--experiments=use_runner_v2'
     ])
     env = apiclient.Environment(
         [],  #packages
         pipeline_options,
         '2.0.0',  #any environment version
         FAKE_PIPELINE_URL)
     if env.proto.experiments:
         for experiment in env.proto.experiments:
             self.assertNotIn('runner_harness_container_image=', experiment)
Ejemplo n.º 5
0
    def test_sdk_harness_container_images_get_set(self):

        pipeline_options = PipelineOptions([
            '--experiments=beam_fn_api', '--experiments=use_unified_worker',
            '--temp_location', 'gs://any-location/temp'
        ])

        pipeline = Pipeline(options=pipeline_options)
        pipeline | Create([1, 2, 3]) | ParDo(DoFn())  # pylint:disable=expression-not-assigned

        test_environment = DockerEnvironment(
            container_image='test_default_image')
        proto_pipeline, _ = pipeline.to_runner_api(
            return_context=True, default_environment=test_environment)

        # We have to manually add environments since Dataflow only sets
        # 'sdkHarnessContainerImages' when there are at least two environments.
        dummy_env = beam_runner_api_pb2.Environment(
            urn=common_urns.environments.DOCKER.urn,
            payload=(beam_runner_api_pb2.DockerPayload(
                container_image='dummy_image')).SerializeToString())
        proto_pipeline.components.environments['dummy_env_id'].CopyFrom(
            dummy_env)

        dummy_transform = beam_runner_api_pb2.PTransform(
            environment_id='dummy_env_id')
        proto_pipeline.components.transforms['dummy_transform_id'].CopyFrom(
            dummy_transform)

        env = apiclient.Environment(
            [],  # packages
            pipeline_options,
            '2.0.0',  # any environment version
            FAKE_PIPELINE_URL,
            proto_pipeline,
            _sdk_image_overrides={
                '.*dummy.*': 'dummy_image',
                '.*test.*': 'test_default_image'
            })
        worker_pool = env.proto.workerPools[0]

        # For the test, a third environment get added since actual default
        # container image for Dataflow is different from 'test_default_image'
        # we've provided above.
        self.assertEqual(3, len(worker_pool.sdkHarnessContainerImages))

        # Container image should be overridden by a Dataflow specific URL.
        self.assertTrue(
            str.startswith(
                (worker_pool.sdkHarnessContainerImages[0]).containerImage,
                'gcr.io/cloud-dataflow/v1beta3/python'))
Ejemplo n.º 6
0
    def test_pinned_worker_harness_image_tag_used_in_dev_sdk(self):
        # streaming, fnapi pipeline.
        pipeline_options = PipelineOptions(
            ['--temp_location', 'gs://any-location/temp', '--streaming'])
        env = apiclient.Environment(
            [],  #packages
            pipeline_options,
            '2.0.0',  #any environment version
            FAKE_PIPELINE_URL)
        if sys.version_info[0] == 3:
            self.assertEqual(
                env.proto.workerPools[0].workerHarnessContainerImage,
                (names.DATAFLOW_CONTAINER_IMAGE_REPOSITORY +
                 '/python3-fnapi:' + names.BEAM_FNAPI_CONTAINER_VERSION))
        else:
            self.assertEqual(
                env.proto.workerPools[0].workerHarnessContainerImage,
                (names.DATAFLOW_CONTAINER_IMAGE_REPOSITORY + '/python-fnapi:' +
                 names.BEAM_FNAPI_CONTAINER_VERSION))

        # batch, legacy pipeline.
        pipeline_options = PipelineOptions(
            ['--temp_location', 'gs://any-location/temp'])
        env = apiclient.Environment(
            [],  #packages
            pipeline_options,
            '2.0.0',  #any environment version
            FAKE_PIPELINE_URL)
        if sys.version_info[0] == 3:
            self.assertEqual(
                env.proto.workerPools[0].workerHarnessContainerImage,
                (names.DATAFLOW_CONTAINER_IMAGE_REPOSITORY + '/python3:' +
                 names.BEAM_CONTAINER_VERSION))
        else:
            self.assertEqual(
                env.proto.workerPools[0].workerHarnessContainerImage,
                (names.DATAFLOW_CONTAINER_IMAGE_REPOSITORY + '/python:' +
                 names.BEAM_CONTAINER_VERSION))
Ejemplo n.º 7
0
    def test_enable_hot_key_logging(self):
        # Tests that the enable_hot_key_logging is not set by default.
        pipeline_options = PipelineOptions(
            ['--temp_location', 'gs://any-location/temp'])
        env = apiclient.Environment(
            [],  #packages
            pipeline_options,
            '2.0.0',  #any environment version
            FAKE_PIPELINE_URL)
        self.assertIsNone(env.proto.debugOptions)

        # Now test that it is set when given.
        pipeline_options = PipelineOptions([
            '--enable_hot_key_logging', '--temp_location',
            'gs://any-location/temp'
        ])
        env = apiclient.Environment(
            [],  #packages
            pipeline_options,
            '2.0.0',  #any environment version
            FAKE_PIPELINE_URL)
        self.assertEqual(env.proto.debugOptions,
                         dataflow.DebugOptions(enableHotKeyLogging=True))
Ejemplo n.º 8
0
 def test_worker_harness_override_takes_precedence_over_sdk_defaults(self):
   # streaming, fnapi pipeline.
   pipeline_options = PipelineOptions(
       ['--temp_location', 'gs://any-location/temp', '--streaming',
        '--worker_harness_container_image=some:image'])
   env = apiclient.Environment([], #packages
                               pipeline_options,
                               '2.0.0', #any environment version
                               FAKE_PIPELINE_URL)
   self.assertEqual(
       env.proto.workerPools[0].workerHarnessContainerImage,
       'some:image')
   # batch, legacy pipeline.
   pipeline_options = PipelineOptions(
       ['--temp_location', 'gs://any-location/temp',
        '--worker_harness_container_image=some:image'])
   env = apiclient.Environment([], #packages
                               pipeline_options,
                               '2.0.0', #any environment version
                               FAKE_PIPELINE_URL)
   self.assertEqual(
       env.proto.workerPools[0].workerHarnessContainerImage,
       'some:image')
Ejemplo n.º 9
0
 def test_harness_override_present_in_dataflow_distributions(self):
     pipeline_options = PipelineOptions(
         ['--temp_location', 'gs://any-location/temp', '--streaming'])
     override = ''.join([
         'runner_harness_container_image=',
         dependency.DATAFLOW_CONTAINER_IMAGE_REPOSITORY, '/harness:2.2.0'
     ])
     distribution = pkg_resources.Distribution(version='2.2.0')
     with mock.patch(
             'apache_beam.runners.dataflow.internal.dependency.pkg_resources'
             '.get_distribution',
             mock.MagicMock(return_value=distribution)):
         env = apiclient.Environment([], pipeline_options, '2.2.0')
         self.assertIn(override, env.proto.experiments)
Ejemplo n.º 10
0
    def test_worker_harness_image_tag_matches_released_sdk_version(self):
        # streaming, fnapi pipeline.
        pipeline_options = PipelineOptions(
            ['--temp_location', 'gs://any-location/temp', '--streaming'])
        env = apiclient.Environment(
            [],  #packages
            pipeline_options,
            '2.0.0',  #any environment version
            FAKE_PIPELINE_URL)
        self.assertEqual(env.proto.workerPools[0].workerHarnessContainerImage,
                         (names.DATAFLOW_CONTAINER_IMAGE_REPOSITORY +
                          '/python-fnapi:2.2.0'))

        # batch, legacy pipeline.
        pipeline_options = PipelineOptions(
            ['--temp_location', 'gs://any-location/temp'])
        env = apiclient.Environment(
            [],  #packages
            pipeline_options,
            '2.0.0',  #any environment version
            FAKE_PIPELINE_URL)
        self.assertEqual(
            env.proto.workerPools[0].workerHarnessContainerImage,
            (names.DATAFLOW_CONTAINER_IMAGE_REPOSITORY + '/python:2.2.0'))
Ejemplo n.º 11
0
 def test_harness_override_absent_in_unreleased_sdk(self):
     pipeline_options = PipelineOptions(
         ['--temp_location', 'gs://any-location/temp', '--streaming'])
     with mock.patch(
             'apache_beam.runners.dataflow.internal.dependency.pkg_resources'
             '.get_distribution',
             mock.Mock(side_effect=pkg_resources.DistributionNotFound())):
         env = apiclient.Environment(
             [],  #packages
             pipeline_options,
             '2.0.0')  #any environment version
         if env.proto.experiments:
             for experiment in env.proto.experiments:
                 self.assertNotIn('runner_harness_container_image=',
                                  experiment)
Ejemplo n.º 12
0
 def test_harness_override_present_in_beam_releases(self):
     pipeline_options = PipelineOptions(
         ['--temp_location', 'gs://any-location/temp', '--streaming'])
     override = ''.join([
         'runner_harness_container_image=',
         dependency.DATAFLOW_CONTAINER_IMAGE_REPOSITORY, '/harness:2.2.0'
     ])
     with mock.patch(
             'apache_beam.runners.dataflow.internal.dependency.pkg_resources'
             '.get_distribution',
             mock.Mock(side_effect=pkg_resources.DistributionNotFound())):
         env = apiclient.Environment(
             [],  #packages
             pipeline_options,
             '2.0.0')  #any environment version
         self.assertIn(override, env.proto.experiments)
Ejemplo n.º 13
0
  def test_default_environment_get_set(self):

    pipeline_options = PipelineOptions([
        '--experiments=beam_fn_api',
        '--experiments=use_unified_worker',
        '--temp_location',
        'gs://any-location/temp'
    ])

    pipeline = Pipeline(options=pipeline_options)
    pipeline | Create([1, 2, 3]) | ParDo(DoFn())  # pylint:disable=expression-not-assigned

    test_environment = DockerEnvironment(container_image='test_default_image')
    proto_pipeline, _ = pipeline.to_runner_api(
        return_context=True, default_environment=test_environment)

    dummy_env = beam_runner_api_pb2.Environment(
        urn=common_urns.environments.DOCKER.urn,
        payload=(
            beam_runner_api_pb2.DockerPayload(
                container_image='dummy_image')).SerializeToString())
    proto_pipeline.components.environments['dummy_env_id'].CopyFrom(dummy_env)

    dummy_transform = beam_runner_api_pb2.PTransform(
        environment_id='dummy_env_id')
    proto_pipeline.components.transforms['dummy_transform_id'].CopyFrom(
        dummy_transform)

    env = apiclient.Environment(
        [],  # packages
        pipeline_options,
        '2.0.0',  # any environment version
        FAKE_PIPELINE_URL,
        proto_pipeline,
        _sdk_image_overrides={
            '.*dummy.*': 'dummy_image', '.*test.*': 'test_default_image'
        })
    worker_pool = env.proto.workerPools[0]

    self.assertEqual(2, len(worker_pool.sdkHarnessContainerImages))

    images_from_proto = [
        sdk_info.containerImage
        for sdk_info in worker_pool.sdkHarnessContainerImages
    ]
    self.assertIn('test_default_image', images_from_proto)
Ejemplo n.º 14
0
 def test_harness_override_custom_in_released_sdks(self):
     pipeline_options = PipelineOptions([
         '--temp_location', 'gs://any-location/temp', '--streaming',
         '--experiments=runner_harness_container_image=fake_image'
     ])
     env = apiclient.Environment(
         [],  #packages
         pipeline_options,
         '2.0.0',  #any environment version
         FAKE_PIPELINE_URL)
     self.assertEqual(
         1,
         len([
             x for x in env.proto.experiments
             if x.startswith('runner_harness_container_image=')
         ]))
     self.assertIn('runner_harness_container_image=fake_image',
                   env.proto.experiments)
Ejemplo n.º 15
0
 def test_get_python_sdk_name(self):
   pipeline_options = PipelineOptions([
       '--project',
       'test_project',
       '--job_name',
       'test_job_name',
       '--temp_location',
       'gs://test-location/temp',
       '--experiments',
       'beam_fn_api',
       '--experiments',
       'use_multiple_sdk_containers'
   ])
   environment = apiclient.Environment([],
                                       pipeline_options,
                                       1,
                                       FAKE_PIPELINE_URL)
   self.assertEqual(
       'Apache Beam Python 3.5 SDK', environment._get_python_sdk_name())
Ejemplo n.º 16
0
 def test_default_ip_configuration(self):
     pipeline_options = PipelineOptions(
         ['--temp_location', 'gs://any-location/temp'])
     env = apiclient.Environment([], pipeline_options, '2.0.0')
     self.assertEqual(env.proto.workerPools[0].ipConfiguration, None)