def test_create_runner(self): self.assertTrue( isinstance(create_runner('DataflowRunner'), DataflowRunner)) self.assertTrue( isinstance(create_runner('TestDataflowRunner'), TestDataflowRunner))
def test_create_runner(self): self.assertTrue( isinstance(create_runner('DirectRunner'), DirectRunner)) self.assertTrue( isinstance(create_runner('TestDirectRunner'), TestDirectRunner))
def test_create_runner_shorthand(self): self.assertTrue(isinstance(create_runner('DiReCtRuNnEr'), DirectRunner)) self.assertTrue(isinstance(create_runner('directrunner'), DirectRunner)) self.assertTrue(isinstance(create_runner('direct'), DirectRunner)) self.assertTrue(isinstance(create_runner('DiReCt'), DirectRunner)) self.assertTrue(isinstance(create_runner('Direct'), DirectRunner))
def test_create_runner(self): self.assertTrue(isinstance(create_runner('DirectRunner'), DirectRunner)) self.assertTrue( isinstance(create_runner('DataflowRunner'), DataflowRunner)) self.assertTrue( isinstance(create_runner('TestDataflowRunner'), TestDataflowRunner)) self.assertRaises(ValueError, create_runner, 'xyz')
def test_create_runner_shorthand(self): self.assertTrue( isinstance(create_runner('DiReCtRuNnEr'), DirectRunner)) self.assertTrue( isinstance(create_runner('directrunner'), DirectRunner)) self.assertTrue( isinstance(create_runner('direct'), DirectRunner)) self.assertTrue( isinstance(create_runner('DiReCt'), DirectRunner)) self.assertTrue( isinstance(create_runner('Direct'), DirectRunner))
def test_create_runner(self): self.assertTrue( isinstance(create_runner('DirectRunner'), DirectRunner)) if apiclient is not None: self.assertTrue( isinstance(create_runner('DataflowRunner'), DataflowRunner)) if apiclient is not None: self.assertTrue( isinstance(create_runner('TestDataflowRunner'), TestDataflowRunner)) self.assertRaises(ValueError, create_runner, 'xyz')
def test_create_runner(self): self.assertTrue(isinstance(create_runner('DirectRunner'), DirectRunner)) self.assertTrue( isinstance(create_runner('DataflowRunner'), DataflowRunner)) self.assertTrue( isinstance(create_runner('BlockingDataflowRunner'), DataflowRunner)) self.assertTrue( isinstance(create_runner('TestDataflowRunner'), TestDataflowRunner)) self.assertRaises(ValueError, create_runner, 'xyz') # TODO(BEAM-1185): Remove when all references to PipelineRunners are gone. self.assertTrue( isinstance(create_runner('DirectPipelineRunner'), DirectRunner)) self.assertTrue( isinstance(create_runner('DataflowPipelineRunner'), DataflowRunner)) self.assertTrue( isinstance(create_runner('BlockingDataflowPipelineRunner'), DataflowRunner))
def __init__(self, runner=None, options=None, argv=None): """Initialize a pipeline object. Args: runner (~apache_beam.runners.runner.PipelineRunner): An object of type :class:`~apache_beam.runners.runner.PipelineRunner` that will be used to execute the pipeline. For registered runners, the runner name can be specified, otherwise a runner object must be supplied. options (~apache_beam.options.pipeline_options.PipelineOptions): A configured :class:`~apache_beam.options.pipeline_options.PipelineOptions` object containing arguments that should be used for running the Beam job. argv (List[str]): a list of arguments (such as :data:`sys.argv`) to be used for building a :class:`~apache_beam.options.pipeline_options.PipelineOptions` object. This will only be used if argument **options** is :data:`None`. Raises: ~exceptions.ValueError: if either the runner or options argument is not of the expected type. """ if options is not None: if isinstance(options, PipelineOptions): self._options = options else: raise ValueError( 'Parameter options, if specified, must be of type PipelineOptions. ' 'Received : %r', options) elif argv is not None: if isinstance(argv, list): self._options = PipelineOptions(argv) else: raise ValueError( 'Parameter argv, if specified, must be a list. Received : %r', argv) else: self._options = PipelineOptions([]) FileSystems.set_options(self._options) if runner is None: runner = self._options.view_as(StandardOptions).runner if runner is None: runner = StandardOptions.DEFAULT_RUNNER logging.info(('Missing pipeline option (runner). Executing pipeline ' 'using the default runner: %s.'), runner) if isinstance(runner, str): runner = create_runner(runner) elif not isinstance(runner, PipelineRunner): raise TypeError('Runner must be a PipelineRunner object or the ' 'name of a registered runner.') # Validate pipeline options errors = PipelineOptionsValidator(self._options, runner).validate() if errors: raise ValueError( 'Pipeline has validations errors: \n' + '\n'.join(errors)) # Default runner to be used. self.runner = runner # Stack of transforms generated by nested apply() calls. The stack will # contain a root node as an enclosing (parent) node for top transforms. self.transforms_stack = [AppliedPTransform(None, None, '', None)] # Set of transform labels (full labels) applied to the pipeline. # If a transform is applied and the full label is already in the set # then the transform will have to be cloned with a new label. self.applied_labels = set()
def test_create_runner(self): self.assertTrue( isinstance(create_runner('DirectRunner'), DirectRunner)) self.assertRaises(ValueError, create_runner, 'xyz')
def __init__(self, runner=None, options=None, argv=None): """Initialize a pipeline object. Args: runner: An object of type 'PipelineRunner' that will be used to execute the pipeline. For registered runners, the runner name can be specified, otherwise a runner object must be supplied. options: A configured 'PipelineOptions' object containing arguments that should be used for running the Dataflow job. argv: a list of arguments (such as sys.argv) to be used for building a 'PipelineOptions' object. This will only be used if argument 'options' is None. Raises: ValueError: if either the runner or options argument is not of the expected type. """ if options is not None: if isinstance(options, PipelineOptions): self.options = options else: raise ValueError( 'Parameter options, if specified, must be of type PipelineOptions. ' 'Received : %r', options) elif argv is not None: if isinstance(argv, list): self.options = PipelineOptions(argv) else: raise ValueError( 'Parameter argv, if specified, must be a list. Received : %r', argv) else: self.options = PipelineOptions([]) if runner is None: runner = self.options.view_as(StandardOptions).runner if runner is None: runner = StandardOptions.DEFAULT_RUNNER logging.info(('Missing pipeline option (runner). Executing pipeline ' 'using the default runner: %s.'), runner) if isinstance(runner, str): runner = create_runner(runner) elif not isinstance(runner, PipelineRunner): raise TypeError('Runner must be a PipelineRunner object or the ' 'name of a registered runner.') # Validate pipeline options errors = PipelineOptionsValidator(self.options, runner).validate() if errors: raise ValueError( 'Pipeline has validations errors: \n' + '\n'.join(errors)) # Default runner to be used. self.runner = runner # Stack of transforms generated by nested apply() calls. The stack will # contain a root node as an enclosing (parent) node for top transforms. self.transforms_stack = [AppliedPTransform(None, None, '', None)] # Set of transform labels (full labels) applied to the pipeline. # If a transform is applied and the full label is already in the set # then the transform will have to be cloned with a new label. self.applied_labels = set() # Store cache of views created from PCollections. For reference, see # pvalue._cache_view(). self._view_cache = {}
def __init__(self, runner=None, options=None, argv=None): # type: (Optional[Union[str, PipelineRunner]], Optional[PipelineOptions], Optional[List[str]]) -> None """Initialize a pipeline object. Args: runner (~apache_beam.runners.runner.PipelineRunner): An object of type :class:`~apache_beam.runners.runner.PipelineRunner` that will be used to execute the pipeline. For registered runners, the runner name can be specified, otherwise a runner object must be supplied. options (~apache_beam.options.pipeline_options.PipelineOptions): A configured :class:`~apache_beam.options.pipeline_options.PipelineOptions` object containing arguments that should be used for running the Beam job. argv (List[str]): a list of arguments (such as :data:`sys.argv`) to be used for building a :class:`~apache_beam.options.pipeline_options.PipelineOptions` object. This will only be used if argument **options** is :data:`None`. Raises: ValueError: if either the runner or options argument is not of the expected type. """ # Initializing logging configuration in case the user did not set it up. logging.basicConfig() if options is not None: if isinstance(options, PipelineOptions): self._options = options else: raise ValueError( 'Parameter options, if specified, must be of type PipelineOptions. ' 'Received : %r' % options) elif argv is not None: if isinstance(argv, list): self._options = PipelineOptions(argv) else: raise ValueError( 'Parameter argv, if specified, must be a list. Received : %r' % argv) else: self._options = PipelineOptions([]) FileSystems.set_options(self._options) if runner is None: runner = self._options.view_as(StandardOptions).runner if runner is None: runner = StandardOptions.DEFAULT_RUNNER logging.info( ('Missing pipeline option (runner). Executing pipeline ' 'using the default runner: %s.'), runner) if isinstance(runner, str): runner = create_runner(runner) elif not isinstance(runner, PipelineRunner): raise TypeError('Runner %s is not a PipelineRunner object or the ' 'name of a registered runner.' % runner) # Validate pipeline options errors = PipelineOptionsValidator(self._options, runner).validate() if errors: raise ValueError('Pipeline has validations errors: \n' + '\n'.join(errors)) # set default experiments for portable runners # (needs to occur prior to pipeline construction) if runner.is_fnapi_compatible(): experiments = (self._options.view_as(DebugOptions).experiments or []) if not 'beam_fn_api' in experiments: experiments.append('beam_fn_api') self._options.view_as(DebugOptions).experiments = experiments # Default runner to be used. self.runner = runner # Stack of transforms generated by nested apply() calls. The stack will # contain a root node as an enclosing (parent) node for top transforms. self.transforms_stack = [AppliedPTransform(None, None, '', None)] # Set of transform labels (full labels) applied to the pipeline. # If a transform is applied and the full label is already in the set # then the transform will have to be cloned with a new label. self.applied_labels = set() # type: Set[str]
def __init__(self, runner=None, options=None, argv=None): """Initialize a pipeline object. Args: runner (~apache_beam.runners.runner.PipelineRunner): An object of type :class:`~apache_beam.runners.runner.PipelineRunner` that will be used to execute the pipeline. For registered runners, the runner name can be specified, otherwise a runner object must be supplied. options (~apache_beam.options.pipeline_options.PipelineOptions): A configured :class:`~apache_beam.options.pipeline_options.PipelineOptions` object containing arguments that should be used for running the Beam job. argv (List[str]): a list of arguments (such as :data:`sys.argv`) to be used for building a :class:`~apache_beam.options.pipeline_options.PipelineOptions` object. This will only be used if argument **options** is :data:`None`. Raises: ~exceptions.ValueError: if either the runner or options argument is not of the expected type. """ if options is not None: if isinstance(options, PipelineOptions): self._options = options else: raise ValueError( 'Parameter options, if specified, must be of type PipelineOptions. ' 'Received : %r' % options) elif argv is not None: if isinstance(argv, list): self._options = PipelineOptions(argv) else: raise ValueError( 'Parameter argv, if specified, must be a list. Received : %r' % argv) else: self._options = PipelineOptions([]) FileSystems.set_options(self._options) if runner is None: runner = self._options.view_as(StandardOptions).runner if runner is None: runner = StandardOptions.DEFAULT_RUNNER logging.info(('Missing pipeline option (runner). Executing pipeline ' 'using the default runner: %s.'), runner) if isinstance(runner, str): runner = create_runner(runner) elif not isinstance(runner, PipelineRunner): raise TypeError('Runner must be a PipelineRunner object or the ' 'name of a registered runner.') # Validate pipeline options errors = PipelineOptionsValidator(self._options, runner).validate() if errors: raise ValueError( 'Pipeline has validations errors: \n' + '\n'.join(errors)) # set default experiments for portable runner # (needs to occur prior to pipeline construction) if self._options.view_as(StandardOptions).runner == 'PortableRunner': experiments = (self._options.view_as(DebugOptions).experiments or []) if not 'beam_fn_api' in experiments: experiments.append('beam_fn_api') self._options.view_as(DebugOptions).experiments = experiments # Default runner to be used. self.runner = runner # Stack of transforms generated by nested apply() calls. The stack will # contain a root node as an enclosing (parent) node for top transforms. self.transforms_stack = [AppliedPTransform(None, None, '', None)] # Set of transform labels (full labels) applied to the pipeline. # If a transform is applied and the full label is already in the set # then the transform will have to be cloned with a new label. self.applied_labels = set()