コード例 #1
0
 def test_create_runner(self):
   self.assertTrue(
       isinstance(create_runner('DataflowRunner'),
                  DataflowRunner))
   self.assertTrue(
       isinstance(create_runner('TestDataflowRunner'),
                  TestDataflowRunner))
コード例 #2
0
 def test_create_runner(self):
   self.assertTrue(
       isinstance(create_runner('DirectRunner'),
                  DirectRunner))
   self.assertTrue(
       isinstance(create_runner('TestDirectRunner'),
                  TestDirectRunner))
コード例 #3
0
ファイル: runner_test.py プロジェクト: fernando-wizeline/beam
 def test_create_runner_shorthand(self):
     self.assertTrue(isinstance(create_runner('DiReCtRuNnEr'),
                                DirectRunner))
     self.assertTrue(isinstance(create_runner('directrunner'),
                                DirectRunner))
     self.assertTrue(isinstance(create_runner('direct'), DirectRunner))
     self.assertTrue(isinstance(create_runner('DiReCt'), DirectRunner))
     self.assertTrue(isinstance(create_runner('Direct'), DirectRunner))
コード例 #4
0
 def test_create_runner(self):
     self.assertTrue(isinstance(create_runner('DirectRunner'),
                                DirectRunner))
     self.assertTrue(
         isinstance(create_runner('DataflowRunner'), DataflowRunner))
     self.assertTrue(
         isinstance(create_runner('TestDataflowRunner'),
                    TestDataflowRunner))
     self.assertRaises(ValueError, create_runner, 'xyz')
コード例 #5
0
 def test_create_runner_shorthand(self):
   self.assertTrue(
       isinstance(create_runner('DiReCtRuNnEr'), DirectRunner))
   self.assertTrue(
       isinstance(create_runner('directrunner'), DirectRunner))
   self.assertTrue(
       isinstance(create_runner('direct'), DirectRunner))
   self.assertTrue(
       isinstance(create_runner('DiReCt'), DirectRunner))
   self.assertTrue(
       isinstance(create_runner('Direct'), DirectRunner))
コード例 #6
0
 def test_create_runner(self):
   self.assertTrue(
       isinstance(create_runner('DirectRunner'), DirectRunner))
   if apiclient is not None:
     self.assertTrue(
         isinstance(create_runner('DataflowRunner'),
                    DataflowRunner))
   if apiclient is not None:
     self.assertTrue(
         isinstance(create_runner('TestDataflowRunner'),
                    TestDataflowRunner))
   self.assertRaises(ValueError, create_runner, 'xyz')
コード例 #7
0
 def test_create_runner(self):
     self.assertTrue(isinstance(create_runner('DirectRunner'),
                                DirectRunner))
     self.assertTrue(
         isinstance(create_runner('DataflowRunner'), DataflowRunner))
     self.assertTrue(
         isinstance(create_runner('BlockingDataflowRunner'),
                    DataflowRunner))
     self.assertTrue(
         isinstance(create_runner('TestDataflowRunner'),
                    TestDataflowRunner))
     self.assertRaises(ValueError, create_runner, 'xyz')
     # TODO(BEAM-1185): Remove when all references to PipelineRunners are gone.
     self.assertTrue(
         isinstance(create_runner('DirectPipelineRunner'), DirectRunner))
     self.assertTrue(
         isinstance(create_runner('DataflowPipelineRunner'),
                    DataflowRunner))
     self.assertTrue(
         isinstance(create_runner('BlockingDataflowPipelineRunner'),
                    DataflowRunner))
コード例 #8
0
  def __init__(self, runner=None, options=None, argv=None):
    """Initialize a pipeline object.

    Args:
      runner (~apache_beam.runners.runner.PipelineRunner): An object of
        type :class:`~apache_beam.runners.runner.PipelineRunner` that will be
        used to execute the pipeline. For registered runners, the runner name
        can be specified, otherwise a runner object must be supplied.
      options (~apache_beam.options.pipeline_options.PipelineOptions):
        A configured
        :class:`~apache_beam.options.pipeline_options.PipelineOptions` object
        containing arguments that should be used for running the Beam job.
      argv (List[str]): a list of arguments (such as :data:`sys.argv`)
        to be used for building a
        :class:`~apache_beam.options.pipeline_options.PipelineOptions` object.
        This will only be used if argument **options** is :data:`None`.

    Raises:
      ~exceptions.ValueError: if either the runner or options argument is not
        of the expected type.
    """
    if options is not None:
      if isinstance(options, PipelineOptions):
        self._options = options
      else:
        raise ValueError(
            'Parameter options, if specified, must be of type PipelineOptions. '
            'Received : %r', options)
    elif argv is not None:
      if isinstance(argv, list):
        self._options = PipelineOptions(argv)
      else:
        raise ValueError(
            'Parameter argv, if specified, must be a list. Received : %r', argv)
    else:
      self._options = PipelineOptions([])

    FileSystems.set_options(self._options)

    if runner is None:
      runner = self._options.view_as(StandardOptions).runner
      if runner is None:
        runner = StandardOptions.DEFAULT_RUNNER
        logging.info(('Missing pipeline option (runner). Executing pipeline '
                      'using the default runner: %s.'), runner)

    if isinstance(runner, str):
      runner = create_runner(runner)
    elif not isinstance(runner, PipelineRunner):
      raise TypeError('Runner must be a PipelineRunner object or the '
                      'name of a registered runner.')

    # Validate pipeline options
    errors = PipelineOptionsValidator(self._options, runner).validate()
    if errors:
      raise ValueError(
          'Pipeline has validations errors: \n' + '\n'.join(errors))

    # Default runner to be used.
    self.runner = runner
    # Stack of transforms generated by nested apply() calls. The stack will
    # contain a root node as an enclosing (parent) node for top transforms.
    self.transforms_stack = [AppliedPTransform(None, None, '', None)]
    # Set of transform labels (full labels) applied to the pipeline.
    # If a transform is applied and the full label is already in the set
    # then the transform will have to be cloned with a new label.
    self.applied_labels = set()
コード例 #9
0
ファイル: runner_test.py プロジェクト: aaltay/incubator-beam
 def test_create_runner(self):
   self.assertTrue(
       isinstance(create_runner('DirectRunner'), DirectRunner))
   self.assertRaises(ValueError, create_runner, 'xyz')
コード例 #10
0
  def __init__(self, runner=None, options=None, argv=None):
    """Initialize a pipeline object.

    Args:
      runner: An object of type 'PipelineRunner' that will be used to execute
        the pipeline. For registered runners, the runner name can be specified,
        otherwise a runner object must be supplied.
      options: A configured 'PipelineOptions' object containing arguments
        that should be used for running the Dataflow job.
      argv: a list of arguments (such as sys.argv) to be used for building a
        'PipelineOptions' object. This will only be used if argument 'options'
        is None.

    Raises:
      ValueError: if either the runner or options argument is not of the
      expected type.
    """

    if options is not None:
      if isinstance(options, PipelineOptions):
        self.options = options
      else:
        raise ValueError(
            'Parameter options, if specified, must be of type PipelineOptions. '
            'Received : %r', options)
    elif argv is not None:
      if isinstance(argv, list):
        self.options = PipelineOptions(argv)
      else:
        raise ValueError(
            'Parameter argv, if specified, must be a list. Received : %r', argv)
    else:
      self.options = PipelineOptions([])

    if runner is None:
      runner = self.options.view_as(StandardOptions).runner
      if runner is None:
        runner = StandardOptions.DEFAULT_RUNNER
        logging.info(('Missing pipeline option (runner). Executing pipeline '
                      'using the default runner: %s.'), runner)

    if isinstance(runner, str):
      runner = create_runner(runner)
    elif not isinstance(runner, PipelineRunner):
      raise TypeError('Runner must be a PipelineRunner object or the '
                      'name of a registered runner.')

    # Validate pipeline options
    errors = PipelineOptionsValidator(self.options, runner).validate()
    if errors:
      raise ValueError(
          'Pipeline has validations errors: \n' + '\n'.join(errors))

    # Default runner to be used.
    self.runner = runner
    # Stack of transforms generated by nested apply() calls. The stack will
    # contain a root node as an enclosing (parent) node for top transforms.
    self.transforms_stack = [AppliedPTransform(None, None, '', None)]
    # Set of transform labels (full labels) applied to the pipeline.
    # If a transform is applied and the full label is already in the set
    # then the transform will have to be cloned with a new label.
    self.applied_labels = set()
    # Store cache of views created from PCollections.  For reference, see
    # pvalue._cache_view().
    self._view_cache = {}
コード例 #11
0
ファイル: pipeline.py プロジェクト: scosenza/beam
    def __init__(self, runner=None, options=None, argv=None):
        # type: (Optional[Union[str, PipelineRunner]], Optional[PipelineOptions], Optional[List[str]]) -> None
        """Initialize a pipeline object.

    Args:
      runner (~apache_beam.runners.runner.PipelineRunner): An object of
        type :class:`~apache_beam.runners.runner.PipelineRunner` that will be
        used to execute the pipeline. For registered runners, the runner name
        can be specified, otherwise a runner object must be supplied.
      options (~apache_beam.options.pipeline_options.PipelineOptions):
        A configured
        :class:`~apache_beam.options.pipeline_options.PipelineOptions` object
        containing arguments that should be used for running the Beam job.
      argv (List[str]): a list of arguments (such as :data:`sys.argv`)
        to be used for building a
        :class:`~apache_beam.options.pipeline_options.PipelineOptions` object.
        This will only be used if argument **options** is :data:`None`.

    Raises:
      ValueError: if either the runner or options argument is not
        of the expected type.
    """
        # Initializing logging configuration in case the user did not set it up.
        logging.basicConfig()

        if options is not None:
            if isinstance(options, PipelineOptions):
                self._options = options
            else:
                raise ValueError(
                    'Parameter options, if specified, must be of type PipelineOptions. '
                    'Received : %r' % options)
        elif argv is not None:
            if isinstance(argv, list):
                self._options = PipelineOptions(argv)
            else:
                raise ValueError(
                    'Parameter argv, if specified, must be a list. Received : %r'
                    % argv)
        else:
            self._options = PipelineOptions([])

        FileSystems.set_options(self._options)

        if runner is None:
            runner = self._options.view_as(StandardOptions).runner
            if runner is None:
                runner = StandardOptions.DEFAULT_RUNNER
                logging.info(
                    ('Missing pipeline option (runner). Executing pipeline '
                     'using the default runner: %s.'), runner)

        if isinstance(runner, str):
            runner = create_runner(runner)
        elif not isinstance(runner, PipelineRunner):
            raise TypeError('Runner %s is not a PipelineRunner object or the '
                            'name of a registered runner.' % runner)

        # Validate pipeline options
        errors = PipelineOptionsValidator(self._options, runner).validate()
        if errors:
            raise ValueError('Pipeline has validations errors: \n' +
                             '\n'.join(errors))

        # set default experiments for portable runners
        # (needs to occur prior to pipeline construction)
        if runner.is_fnapi_compatible():
            experiments = (self._options.view_as(DebugOptions).experiments
                           or [])
            if not 'beam_fn_api' in experiments:
                experiments.append('beam_fn_api')
                self._options.view_as(DebugOptions).experiments = experiments

        # Default runner to be used.
        self.runner = runner
        # Stack of transforms generated by nested apply() calls. The stack will
        # contain a root node as an enclosing (parent) node for top transforms.
        self.transforms_stack = [AppliedPTransform(None, None, '', None)]
        # Set of transform labels (full labels) applied to the pipeline.
        # If a transform is applied and the full label is already in the set
        # then the transform will have to be cloned with a new label.
        self.applied_labels = set()  # type: Set[str]
コード例 #12
0
  def __init__(self, runner=None, options=None, argv=None):
    """Initialize a pipeline object.

    Args:
      runner (~apache_beam.runners.runner.PipelineRunner): An object of
        type :class:`~apache_beam.runners.runner.PipelineRunner` that will be
        used to execute the pipeline. For registered runners, the runner name
        can be specified, otherwise a runner object must be supplied.
      options (~apache_beam.options.pipeline_options.PipelineOptions):
        A configured
        :class:`~apache_beam.options.pipeline_options.PipelineOptions` object
        containing arguments that should be used for running the Beam job.
      argv (List[str]): a list of arguments (such as :data:`sys.argv`)
        to be used for building a
        :class:`~apache_beam.options.pipeline_options.PipelineOptions` object.
        This will only be used if argument **options** is :data:`None`.

    Raises:
      ~exceptions.ValueError: if either the runner or options argument is not
        of the expected type.
    """
    if options is not None:
      if isinstance(options, PipelineOptions):
        self._options = options
      else:
        raise ValueError(
            'Parameter options, if specified, must be of type PipelineOptions. '
            'Received : %r' % options)
    elif argv is not None:
      if isinstance(argv, list):
        self._options = PipelineOptions(argv)
      else:
        raise ValueError(
            'Parameter argv, if specified, must be a list. Received : %r'
            % argv)
    else:
      self._options = PipelineOptions([])

    FileSystems.set_options(self._options)

    if runner is None:
      runner = self._options.view_as(StandardOptions).runner
      if runner is None:
        runner = StandardOptions.DEFAULT_RUNNER
        logging.info(('Missing pipeline option (runner). Executing pipeline '
                      'using the default runner: %s.'), runner)

    if isinstance(runner, str):
      runner = create_runner(runner)
    elif not isinstance(runner, PipelineRunner):
      raise TypeError('Runner must be a PipelineRunner object or the '
                      'name of a registered runner.')

    # Validate pipeline options
    errors = PipelineOptionsValidator(self._options, runner).validate()
    if errors:
      raise ValueError(
          'Pipeline has validations errors: \n' + '\n'.join(errors))

    # set default experiments for portable runner
    # (needs to occur prior to pipeline construction)
    if self._options.view_as(StandardOptions).runner == 'PortableRunner':
      experiments = (self._options.view_as(DebugOptions).experiments or [])
      if not 'beam_fn_api' in experiments:
        experiments.append('beam_fn_api')
        self._options.view_as(DebugOptions).experiments = experiments

    # Default runner to be used.
    self.runner = runner
    # Stack of transforms generated by nested apply() calls. The stack will
    # contain a root node as an enclosing (parent) node for top transforms.
    self.transforms_stack = [AppliedPTransform(None, None, '', None)]
    # Set of transform labels (full labels) applied to the pipeline.
    # If a transform is applied and the full label is already in the set
    # then the transform will have to be cloned with a new label.
    self.applied_labels = set()