def test_set_runtime_option(self): # define ValueProvider ptions, with and without default values class UserDefinedOptions1(PipelineOptions): @classmethod def _add_argparse_args(cls, parser): parser.add_value_provider_argument( '--vp_arg', help='This keyword argument is a value provider' ) # set at runtime parser.add_value_provider_argument( # not set, had default int '-v', '--vp_arg2', # with short form default=123, type=int) parser.add_value_provider_argument( # not set, had default str '--vp-arg3', # with dash in name default='123', type=str) parser.add_value_provider_argument( # not set and no default '--vp_arg4', type=float) parser.add_value_provider_argument( # positional argument set 'vp_pos_arg', # default & runtime ignored help='This positional argument is a value provider', type=float, default=5.4) # provide values at graph-construction time # (options not provided here become of the type RuntimeValueProvider) options = UserDefinedOptions1(['1.2']) self.assertFalse(options.vp_arg.is_accessible()) self.assertFalse(options.vp_arg2.is_accessible()) self.assertFalse(options.vp_arg3.is_accessible()) self.assertFalse(options.vp_arg4.is_accessible()) self.assertTrue(options.vp_pos_arg.is_accessible()) # provide values at job-execution time # (options not provided here will use their default, if they have one) RuntimeValueProvider.set_runtime_options(None, { 'vp_arg': 'abc', 'vp_pos_arg': '3.2' }) self.assertTrue(options.vp_arg.is_accessible()) self.assertEqual(options.vp_arg.get(), 'abc') self.assertTrue(options.vp_arg2.is_accessible()) self.assertEqual(options.vp_arg2.get(), 123) self.assertTrue(options.vp_arg3.is_accessible()) self.assertEqual(options.vp_arg3.get(), '123') self.assertTrue(options.vp_arg4.is_accessible()) self.assertIsNone(options.vp_arg4.get()) self.assertTrue(options.vp_pos_arg.is_accessible()) self.assertEqual(options.vp_pos_arg.get(), 1.2)
def run(self, pipeline): """Execute the entire pipeline and returns an DirectPipelineResult.""" # TODO: Move imports to top. Pipeline <-> Runner dependency cause problems # with resolving imports when they are at top. # pylint: disable=wrong-import-position from apache_beam.runners.direct.consumer_tracking_pipeline_visitor import \ ConsumerTrackingPipelineVisitor from apache_beam.runners.direct.evaluation_context import EvaluationContext from apache_beam.runners.direct.executor import Executor from apache_beam.runners.direct.transform_evaluator import \ TransformEvaluatorRegistry MetricsEnvironment.set_metrics_supported(True) logging.info('Running pipeline with DirectRunner.') self.consumer_tracking_visitor = ConsumerTrackingPipelineVisitor() pipeline.visit(group_by_key_input_visitor()) pipeline.visit(self.consumer_tracking_visitor) evaluation_context = EvaluationContext( pipeline.options, BundleFactory(stacked=pipeline.options.view_as(DirectOptions) .direct_runner_use_stacked_bundle), self.consumer_tracking_visitor.root_transforms, self.consumer_tracking_visitor.value_to_consumers, self.consumer_tracking_visitor.step_names, self.consumer_tracking_visitor.views) evaluation_context.use_pvalue_cache(self._cache) executor = Executor(self.consumer_tracking_visitor.value_to_consumers, TransformEvaluatorRegistry(evaluation_context), evaluation_context) # Start the executor. This is a non-blocking call, it will start the # execution in background threads and return. if pipeline.options: RuntimeValueProvider.set_runtime_options(pipeline.options._options_id, {}) executor.start(self.consumer_tracking_visitor.root_transforms) result = DirectPipelineResult(executor, evaluation_context) if self._cache: # We are running in eager mode, block until the pipeline execution # completes in order to have full results in the cache. result.wait_until_finish() self._cache.finalize() # Unset runtime options after the pipeline finishes. # TODO: Move this to a post finish hook and clean for all cases. if pipeline.options: RuntimeValueProvider.unset_runtime_options(pipeline.options._options_id) return result
def test_set_runtime_option(self): # define ValueProvider ptions, with and without default values class UserDefinedOptions1(PipelineOptions): @classmethod def _add_argparse_args(cls, parser): parser.add_value_provider_argument( '--vp_arg', help='This keyword argument is a value provider') # set at runtime parser.add_value_provider_argument( # not set, had default int '-v', '--vp_arg2', # with short form default=123, type=int) parser.add_value_provider_argument( # not set, had default str '--vp-arg3', # with dash in name default='123', type=str) parser.add_value_provider_argument( # not set and no default '--vp_arg4', type=float) parser.add_value_provider_argument( # positional argument set 'vp_pos_arg', # default & runtime ignored help='This positional argument is a value provider', type=float, default=5.4) # provide values at graph-construction time # (options not provided here become of the type RuntimeValueProvider) options = UserDefinedOptions1(['1.2']) self.assertFalse(options.vp_arg.is_accessible()) self.assertFalse(options.vp_arg2.is_accessible()) self.assertFalse(options.vp_arg3.is_accessible()) self.assertFalse(options.vp_arg4.is_accessible()) self.assertTrue(options.vp_pos_arg.is_accessible()) # provide values at job-execution time # (options not provided here will use their default, if they have one) RuntimeValueProvider.set_runtime_options({'vp_arg': 'abc', 'vp_pos_arg':'3.2'}) self.assertTrue(options.vp_arg.is_accessible()) self.assertEqual(options.vp_arg.get(), 'abc') self.assertTrue(options.vp_arg2.is_accessible()) self.assertEqual(options.vp_arg2.get(), 123) self.assertTrue(options.vp_arg3.is_accessible()) self.assertEqual(options.vp_arg3.get(), '123') self.assertTrue(options.vp_arg4.is_accessible()) self.assertIsNone(options.vp_arg4.get()) self.assertTrue(options.vp_pos_arg.is_accessible()) self.assertEqual(options.vp_pos_arg.get(), 1.2)
def run(self, pipeline): """Execute the entire pipeline and returns an DirectPipelineResult.""" # TODO: Move imports to top. Pipeline <-> Runner dependency cause problems # with resolving imports when they are at top. # pylint: disable=wrong-import-position from apache_beam.runners.direct.consumer_tracking_pipeline_visitor import \ ConsumerTrackingPipelineVisitor from apache_beam.runners.direct.evaluation_context import EvaluationContext from apache_beam.runners.direct.executor import Executor from apache_beam.runners.direct.transform_evaluator import \ TransformEvaluatorRegistry MetricsEnvironment.set_metrics_supported(True) logging.info('Running pipeline with DirectRunner.') self.visitor = ConsumerTrackingPipelineVisitor() pipeline.visit(self.visitor) evaluation_context = EvaluationContext( pipeline.options, BundleFactory(stacked=pipeline.options.view_as(DirectOptions) .direct_runner_use_stacked_bundle), self.visitor.root_transforms, self.visitor.value_to_consumers, self.visitor.step_names, self.visitor.views) evaluation_context.use_pvalue_cache(self._cache) executor = Executor(self.visitor.value_to_consumers, TransformEvaluatorRegistry(evaluation_context), evaluation_context) # Start the executor. This is a non-blocking call, it will start the # execution in background threads and return. if pipeline.options: RuntimeValueProvider.set_runtime_options(pipeline.options._options_id, {}) executor.start(self.visitor.root_transforms) result = DirectPipelineResult(executor, evaluation_context) if self._cache: # We are running in eager mode, block until the pipeline execution # completes in order to have full results in the cache. result.wait_until_finish() self._cache.finalize() # Unset runtime options after the pipeline finishes. # TODO: Move this to a post finish hook and clean for all cases. if pipeline.options: RuntimeValueProvider.unset_runtime_options(pipeline.options._options_id) return result
def test_value_provider_options(self): class UserOptions(PipelineOptions): @classmethod def _add_argparse_args(cls, parser): parser.add_value_provider_argument( '--vp_arg', help='This flag is a value provider') parser.add_value_provider_argument('--vp_arg2', default=1, type=int) parser.add_argument('--non_vp_arg', default=1, type=int) # Provide values: if not provided, the option becomes of the type runtime vp options = UserOptions(['--vp_arg', 'hello']) self.assertIsInstance(options.vp_arg, StaticValueProvider) self.assertIsInstance(options.vp_arg2, RuntimeValueProvider) self.assertIsInstance(options.non_vp_arg, int) # Values can be overwritten options = UserOptions(vp_arg=5, vp_arg2=StaticValueProvider(value_type=str, value='bye'), non_vp_arg=RuntimeValueProvider( option_name='foo', value_type=int, default_value=10)) self.assertEqual(options.vp_arg, 5) self.assertTrue(options.vp_arg2.is_accessible(), '%s is not accessible' % options.vp_arg2) self.assertEqual(options.vp_arg2.get(), 'bye') self.assertFalse(options.non_vp_arg.is_accessible()) with self.assertRaises(RuntimeError): options.non_vp_arg.get()
def add_value_provider_argument(self, *args, **kwargs): """ValueProvider arguments can be either of type keyword or positional. At runtime, even positional arguments will need to be supplied in the key/value form. """ # Extract the option name from positional argument ['pos_arg'] assert args != () and len(args[0]) >= 1 if args[0][0] != '-': option_name = args[0] if kwargs.get('nargs') is None: # make them optionally templated kwargs['nargs'] = '?' else: # or keyword arguments like [--kw_arg, -k, -w] or [--kw-arg] option_name = [i.replace('--', '') for i in args if i[:2] == '--'][0] # reassign the type to make room for using # StaticValueProvider as the type for add_argument value_type = kwargs.get('type') or str kwargs['type'] = _static_value_provider_of(value_type) # reassign default to default_value to make room for using # RuntimeValueProvider as the default for add_argument default_value = kwargs.get('default') kwargs['default'] = RuntimeValueProvider(option_name=option_name, value_type=value_type, default_value=default_value) # have add_argument do most of the work self.add_argument(*args, **kwargs)
def test_options_id(self): class Opt1(PipelineOptions): @classmethod def _add_argparse_args(cls, parser): parser.add_value_provider_argument('--arg1') class Opt2(PipelineOptions): @classmethod def _add_argparse_args(cls, parser): parser.add_value_provider_argument('--arg2') opt1 = Opt1() opt2 = Opt2() self.assertFalse(opt1.arg1.is_accessible()) self.assertFalse(opt2.arg2.is_accessible()) RuntimeValueProvider.set_runtime_options( opt1.arg1.options_id, {'arg1': 'val1'}) self.assertTrue(opt1.arg1.is_accessible()) self.assertFalse(opt2.arg2.is_accessible())
def test_string_or_value_provider_only(self): str_file_pattern = tempfile.NamedTemporaryFile(delete=False).name self.assertEqual(str_file_pattern, FileBasedSource(str_file_pattern)._pattern.value) static_vp_file_pattern = StaticValueProvider(value_type=str, value=str_file_pattern) self.assertEqual(static_vp_file_pattern, FileBasedSource(static_vp_file_pattern)._pattern) runtime_vp_file_pattern = RuntimeValueProvider( option_name='arg', value_type=str, default_value=str_file_pattern) self.assertEqual(runtime_vp_file_pattern, FileBasedSource(runtime_vp_file_pattern)._pattern) invalid_file_pattern = 123 with self.assertRaises(TypeError): FileBasedSource(invalid_file_pattern)
def process(self, an_int): logging.info('The string_value is %s' % self.string_vp.get()) # Another option (where you don't need to pass the value at all) is: logging.info('The string value is %s' % RuntimeValueProvider.get_value('string_value', str, ''))
def test_runtime_value_provider_to(self): RuntimeValueProvider.runtime_options = None rvp = RuntimeValueProvider('arg', 123, int) self.assertEquals(JsonValue(is_null=True), to_json_value(rvp))