Python PipelineOptionsValidator.PipelineOptionsValidator Beispiele, apache_beam.options.pipeline_options_validator.PipelineOptionsValidator.PipelineOptionsValidator Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: pipeline_options_validator_test.py Projekt: horvathaa/beamTest

 def test_validate_template_location(self):
     runner = MockRunners.OtherRunner()
     options = PipelineOptions([
         '--template_location',
         'abc',
     ])
     validator = PipelineOptionsValidator(options, runner)
     errors = validator.validate()
     self.assertFalse(errors)

Beispiel #2

0

Datei anzeigen

 def test_zone_and_worker_zone_mutually_exclusive(self):
   runner = MockRunners.DataflowRunner()
   options = PipelineOptions([
       '--zone', 'us-east1-b',
       '--worker_zone', 'us-east1-c',
   ])
   validator = PipelineOptionsValidator(options, runner)
   errors = validator.validate()
   self.assertTrue(errors)

Beispiel #3

0

Datei anzeigen

 def test_dataflow_job_file_and_template_location_mutually_exclusive(self):
   runner = MockRunners.OtherRunner()
   options = PipelineOptions([
       '--template_location', 'abc',
       '--dataflow_job_file', 'def'
   ])
   validator = PipelineOptionsValidator(options, runner)
   errors = validator.validate()
   self.assertTrue(errors)

Beispiel #4

0

Datei anzeigen

Datei: pipeline_options_validator_test.py Projekt: horvathaa/beamTest

    def test_missing_required_options(self):
        options = PipelineOptions([''])
        runner = MockRunners.DataflowRunner()
        validator = PipelineOptionsValidator(options, runner)
        errors = validator.validate()

        self.assertEqual(
            self.check_errors_for_arguments(
                errors, ['project', 'staging_location', 'temp_location']), [])

Beispiel #5

0

Datei anzeigen

 def test_experiment_region_and_worker_region_mutually_exclusive(self):
   runner = MockRunners.DataflowRunner()
   options = PipelineOptions([
       '--experiments', 'worker_region=us-west1',
       '--worker_region', 'us-east1',
   ])
   validator = PipelineOptionsValidator(options, runner)
   errors = validator.validate()
   self.assertTrue(errors)

Beispiel #6

0

Datei anzeigen

    def get_validator(matcher):
      options = ['--project=example:example',
                 '--job_name=job',
                 '--staging_location=gs://foo/bar',
                 '--temp_location=gs://foo/bar',]
      if matcher:
        options.append('%s=%s' % ('--on_success_matcher', matcher.decode()))

      pipeline_options = PipelineOptions(options)
      runner = MockRunners.TestDataflowRunner()
      return PipelineOptionsValidator(pipeline_options, runner)

Beispiel #7

0

Datei anzeigen

    def get_validator(job_name):
      options = ['--project=example:example', '--staging_location=gs://foo/bar',
                 '--temp_location=gs://foo/bar']

      if job_name is not None:
        options.append('--job_name=' + job_name)

      pipeline_options = PipelineOptions(options)
      runner = MockRunners.DataflowRunner()
      validator = PipelineOptionsValidator(pipeline_options, runner)
      return validator

Beispiel #8

0

Datei anzeigen

 def test_region_optional_for_non_service_runner(self):
   runner = MockRunners.DataflowRunner()
   # Remove default region for this test.
   runner.get_default_gcp_region = lambda: None
   options = PipelineOptions([
       '--project=example:example',
       '--temp_location=gs://foo/bar',
       '--dataflow_endpoint=http://localhost:20281',
   ])
   validator = PipelineOptionsValidator(options, runner)
   errors = validator.validate()
   self.assertEqual(len(errors), 0)

Beispiel #9

0

Datei anzeigen

  def test_missing_required_options(self):
    options = PipelineOptions([''])
    runner = MockRunners.DataflowRunner()
    # Remove default region for this test.
    runner.get_default_gcp_region = lambda: None
    validator = PipelineOptionsValidator(options, runner)
    errors = validator.validate()

    self.assertEqual(
        self.check_errors_for_arguments(
            errors, ['project', 'staging_location', 'temp_location', 'region']),
        [])

Beispiel #10

0

Datei anzeigen

 def test_num_workers_can_equal_max_num_workers(self):
     runner = MockRunners.DataflowRunner()
     options = PipelineOptions([
         '--num_workers=42',
         '--max_num_workers=42',
         '--worker_region=us-east1',
         '--project=example:example',
         '--temp_location=gs://foo/bar',
     ])
     validator = PipelineOptionsValidator(options, runner)
     errors = validator.validate()
     self.assertEqual(len(errors), 0)

Beispiel #11

0

Datei anzeigen

 def test_zone_alias_worker_zone(self):
   runner = MockRunners.DataflowRunner()
   options = PipelineOptions([
       '--zone=us-east1-b',
       '--project=example:example',
       '--temp_location=gs://foo/bar',
   ])
   validator = PipelineOptionsValidator(options, runner)
   errors = validator.validate()
   self.assertEqual(len(errors), 0)
   self.assertIsNone(options.view_as(WorkerOptions).zone)
   self.assertEqual(options.view_as(WorkerOptions).worker_zone, 'us-east1-b')

Beispiel #12

0

Datei anzeigen

Datei: pipeline_options_validator_test.py Projekt: yuchu89/beam

 def test_worker_harness_sdk_container_image_mutually_exclusive(self):
     runner = MockRunners.DataflowRunner()
     options = PipelineOptions([
         '--worker_harness_container_image=WORKER',
         '--sdk_container_image=SDK_ONLY',
         '--project=example:example',
         '--temp_location=gs://foo/bar',
     ])
     validator = PipelineOptionsValidator(options, runner)
     errors = validator.validate()
     self.assertEqual(len(errors), 1)
     self.assertIn('sdk_container_image', errors[0])
     self.assertIn('worker_harness_container_image', errors[0])

Beispiel #13

0

Datei anzeigen

 def test_max_num_workers_is_positive(self):
     runner = MockRunners.DataflowRunner()
     options = PipelineOptions([
         '--max_num_workers=-1',
         '--worker_region=us-east1',
         '--project=example:example',
         '--temp_location=gs://foo/bar',
     ])
     validator = PipelineOptionsValidator(options, runner)
     errors = validator.validate()
     self.assertEqual(len(errors), 1)
     self.assertIn('max_num_workers', errors[0])
     self.assertIn('-1', errors[0])

Beispiel #14

0

Datei anzeigen

  def test_transform_name_mapping_without_update(self):
    options = ['--project=example:example',
               '--staging_location=gs://foo/bar',
               '--temp_location=gs://foo/bar',
               '--transform_name_mapping={\"fromPardo\":\"toPardo\"}']

    pipeline_options = PipelineOptions(options)
    runner = MockRunners.DataflowRunner()
    validator = PipelineOptionsValidator(pipeline_options, runner)
    errors = validator.validate()
    assert_that(errors, only_contains(
        contains_string('Transform name mapping option is only useful when '
                        '--update and --streaming is specified')))

Beispiel #15

0

Datei anzeigen

        def get_validator(temp_location, staging_location):
            options = ['--project=example:example', '--job_name=job']

            if temp_location is not None:
                options.append('--temp_location=' + temp_location)

            if staging_location is not None:
                options.append('--staging_location=' + staging_location)

            pipeline_options = PipelineOptions(options)
            runner = MockRunners.DataflowRunner()
            validator = PipelineOptionsValidator(pipeline_options, runner)
            return validator

Beispiel #16

0

Datei anzeigen

    def test_transform_name_mapping_invalid_format(self):
        options = [
            '--project=example:example', '--staging_location=gs://foo/bar',
            '--temp_location=gs://foo/bar', '--update', '--job_name=test',
            '--streaming', '--transform_name_mapping={\"fromPardo\":123}'
        ]

        pipeline_options = PipelineOptions(options)
        runner = MockRunners.DataflowRunner()
        validator = PipelineOptionsValidator(pipeline_options, runner)
        errors = validator.validate()
        assert_that(
            errors,
            only_contains(
                contains_string('Invalid transform name mapping format.')))

Beispiel #17

0

Datei anzeigen

 def test_worker_region_and_worker_zone_mutually_exclusive(self):
     runner = MockRunners.DataflowRunner()
     options = PipelineOptions([
         '--worker_region',
         'us-east1',
         '--worker_zone',
         'us-east1-b',
         '--project=example:example',
         '--temp_location=gs://foo/bar',
     ])
     validator = PipelineOptionsValidator(options, runner)
     errors = validator.validate()
     self.assertEqual(len(errors), 1)
     self.assertIn('worker_region', errors[0])
     self.assertIn('worker_zone', errors[0])

Beispiel #18

0

Datei anzeigen

Datei: pipeline_options_validator_test.py Projekt: yuchu89/beam

 def test_alias_worker_harness_sdk_container_image(self):
     runner = MockRunners.DataflowRunner()
     test_image = "WORKER_HARNESS"
     options = PipelineOptions([
         '--worker_harness_container_image=%s' % test_image,
         '--project=example:example',
         '--temp_location=gs://foo/bar',
     ])
     validator = PipelineOptionsValidator(options, runner)
     errors = validator.validate()
     self.assertEqual(len(errors), 0)
     self.assertEqual(
         options.view_as(WorkerOptions).worker_harness_container_image,
         test_image)
     self.assertEqual(
         options.view_as(WorkerOptions).sdk_container_image, test_image)

Beispiel #19

0

Datei anzeigen

 def test_local_runner(self):
     runner = MockRunners.OtherRunner()
     options = PipelineOptions([])
     validator = PipelineOptionsValidator(options, runner)
     errors = validator.validate()
     self.assertEqual(len(errors), 0)

Beispiel #20

0

Datei anzeigen

 def test_validate_dataflow_job_file(self):
     runner = MockRunners.OtherRunner()
     options = PipelineOptions(['--dataflow_job_file', 'abc'])
     validator = PipelineOptionsValidator(options, runner)
     errors = validator.validate()
     self.assertFalse(errors)

Beispiel #21

0

Datei anzeigen

  def __init__(self, runner=None, options=None, argv=None):
    """Initialize a pipeline object.

    Args:
      runner (~apache_beam.runners.runner.PipelineRunner): An object of
        type :class:`~apache_beam.runners.runner.PipelineRunner` that will be
        used to execute the pipeline. For registered runners, the runner name
        can be specified, otherwise a runner object must be supplied.
      options (~apache_beam.options.pipeline_options.PipelineOptions):
        A configured
        :class:`~apache_beam.options.pipeline_options.PipelineOptions` object
        containing arguments that should be used for running the Beam job.
      argv (List[str]): a list of arguments (such as :data:`sys.argv`)
        to be used for building a
        :class:`~apache_beam.options.pipeline_options.PipelineOptions` object.
        This will only be used if argument **options** is :data:`None`.

    Raises:
      ~exceptions.ValueError: if either the runner or options argument is not
        of the expected type.
    """
    if options is not None:
      if isinstance(options, PipelineOptions):
        self._options = options
      else:
        raise ValueError(
            'Parameter options, if specified, must be of type PipelineOptions. '
            'Received : %r', options)
    elif argv is not None:
      if isinstance(argv, list):
        self._options = PipelineOptions(argv)
      else:
        raise ValueError(
            'Parameter argv, if specified, must be a list. Received : %r', argv)
    else:
      self._options = PipelineOptions([])

    FileSystems.set_options(self._options)

    if runner is None:
      runner = self._options.view_as(StandardOptions).runner
      if runner is None:
        runner = StandardOptions.DEFAULT_RUNNER
        logging.info(('Missing pipeline option (runner). Executing pipeline '
                      'using the default runner: %s.'), runner)

    if isinstance(runner, str):
      runner = create_runner(runner)
    elif not isinstance(runner, PipelineRunner):
      raise TypeError('Runner must be a PipelineRunner object or the '
                      'name of a registered runner.')

    # Validate pipeline options
    errors = PipelineOptionsValidator(self._options, runner).validate()
    if errors:
      raise ValueError(
          'Pipeline has validations errors: \n' + '\n'.join(errors))

    # Default runner to be used.
    self.runner = runner
    # Stack of transforms generated by nested apply() calls. The stack will
    # contain a root node as an enclosing (parent) node for top transforms.
    self.transforms_stack = [AppliedPTransform(None, None, '', None)]
    # Set of transform labels (full labels) applied to the pipeline.
    # If a transform is applied and the full label is already in the set
    # then the transform will have to be cloned with a new label.
    self.applied_labels = set()

Beispiel #22

0

Datei anzeigen

Datei: pipeline.py Projekt: scosenza/beam

    def __init__(self, runner=None, options=None, argv=None):
        # type: (Optional[Union[str, PipelineRunner]], Optional[PipelineOptions], Optional[List[str]]) -> None
        """Initialize a pipeline object.

    Args:
      runner (~apache_beam.runners.runner.PipelineRunner): An object of
        type :class:`~apache_beam.runners.runner.PipelineRunner` that will be
        used to execute the pipeline. For registered runners, the runner name
        can be specified, otherwise a runner object must be supplied.
      options (~apache_beam.options.pipeline_options.PipelineOptions):
        A configured
        :class:`~apache_beam.options.pipeline_options.PipelineOptions` object
        containing arguments that should be used for running the Beam job.
      argv (List[str]): a list of arguments (such as :data:`sys.argv`)
        to be used for building a
        :class:`~apache_beam.options.pipeline_options.PipelineOptions` object.
        This will only be used if argument **options** is :data:`None`.

    Raises:
      ValueError: if either the runner or options argument is not
        of the expected type.
    """
        # Initializing logging configuration in case the user did not set it up.
        logging.basicConfig()

        if options is not None:
            if isinstance(options, PipelineOptions):
                self._options = options
            else:
                raise ValueError(
                    'Parameter options, if specified, must be of type PipelineOptions. '
                    'Received : %r' % options)
        elif argv is not None:
            if isinstance(argv, list):
                self._options = PipelineOptions(argv)
            else:
                raise ValueError(
                    'Parameter argv, if specified, must be a list. Received : %r'
                    % argv)
        else:
            self._options = PipelineOptions([])

        FileSystems.set_options(self._options)

        if runner is None:
            runner = self._options.view_as(StandardOptions).runner
            if runner is None:
                runner = StandardOptions.DEFAULT_RUNNER
                logging.info(
                    ('Missing pipeline option (runner). Executing pipeline '
                     'using the default runner: %s.'), runner)

        if isinstance(runner, str):
            runner = create_runner(runner)
        elif not isinstance(runner, PipelineRunner):
            raise TypeError('Runner %s is not a PipelineRunner object or the '
                            'name of a registered runner.' % runner)

        # Validate pipeline options
        errors = PipelineOptionsValidator(self._options, runner).validate()
        if errors:
            raise ValueError('Pipeline has validations errors: \n' +
                             '\n'.join(errors))

        # set default experiments for portable runners
        # (needs to occur prior to pipeline construction)
        if runner.is_fnapi_compatible():
            experiments = (self._options.view_as(DebugOptions).experiments
                           or [])
            if not 'beam_fn_api' in experiments:
                experiments.append('beam_fn_api')
                self._options.view_as(DebugOptions).experiments = experiments

        # Default runner to be used.
        self.runner = runner
        # Stack of transforms generated by nested apply() calls. The stack will
        # contain a root node as an enclosing (parent) node for top transforms.
        self.transforms_stack = [AppliedPTransform(None, None, '', None)]
        # Set of transform labels (full labels) applied to the pipeline.
        # If a transform is applied and the full label is already in the set
        # then the transform will have to be cloned with a new label.
        self.applied_labels = set()  # type: Set[str]

Beispiel #23

0

Datei anzeigen

Datei: pipeline_options_validator_test.py Projekt: yuchu89/beam

 def test_type_check_additional_unrecognized_feature(self):
     runner = MockRunners.OtherRunner()
     options = PipelineOptions(['--type_check_additional=all,dfgdf'])
     validator = PipelineOptionsValidator(options, runner)
     errors = validator.validate()
     self.assertTrue(errors)