コード例 #1
0
    def test_set_runtime_option(self):
        # define ValueProvider ptions, with and without default values
        class UserDefinedOptions1(PipelineOptions):
            @classmethod
            def _add_argparse_args(cls, parser):
                parser.add_value_provider_argument(
                    '--vp_arg',
                    help='This keyword argument is a value provider'
                )  # set at runtime

                parser.add_value_provider_argument(  # not set, had default int
                    '-v',
                    '--vp_arg2',  # with short form
                    default=123,
                    type=int)

                parser.add_value_provider_argument(  # not set, had default str
                    '--vp-arg3',  # with dash in name
                    default='123',
                    type=str)

                parser.add_value_provider_argument(  # not set and no default
                    '--vp_arg4', type=float)

                parser.add_value_provider_argument(  # positional argument set
                    'vp_pos_arg',  # default & runtime ignored
                    help='This positional argument is a value provider',
                    type=float,
                    default=5.4)

        # provide values at graph-construction time
        # (options not provided here become of the type RuntimeValueProvider)
        options = UserDefinedOptions1(['1.2'])
        self.assertFalse(options.vp_arg.is_accessible())
        self.assertFalse(options.vp_arg2.is_accessible())
        self.assertFalse(options.vp_arg3.is_accessible())
        self.assertFalse(options.vp_arg4.is_accessible())
        self.assertTrue(options.vp_pos_arg.is_accessible())

        # provide values at job-execution time
        # (options not provided here will use their default, if they have one)
        RuntimeValueProvider.set_runtime_options(None, {
            'vp_arg': 'abc',
            'vp_pos_arg': '3.2'
        })
        self.assertTrue(options.vp_arg.is_accessible())
        self.assertEqual(options.vp_arg.get(), 'abc')
        self.assertTrue(options.vp_arg2.is_accessible())
        self.assertEqual(options.vp_arg2.get(), 123)
        self.assertTrue(options.vp_arg3.is_accessible())
        self.assertEqual(options.vp_arg3.get(), '123')
        self.assertTrue(options.vp_arg4.is_accessible())
        self.assertIsNone(options.vp_arg4.get())
        self.assertTrue(options.vp_pos_arg.is_accessible())
        self.assertEqual(options.vp_pos_arg.get(), 1.2)
コード例 #2
0
ファイル: direct_runner.py プロジェクト: nmvk/beam
  def run(self, pipeline):
    """Execute the entire pipeline and returns an DirectPipelineResult."""

    # TODO: Move imports to top. Pipeline <-> Runner dependency cause problems
    # with resolving imports when they are at top.
    # pylint: disable=wrong-import-position
    from apache_beam.runners.direct.consumer_tracking_pipeline_visitor import \
      ConsumerTrackingPipelineVisitor
    from apache_beam.runners.direct.evaluation_context import EvaluationContext
    from apache_beam.runners.direct.executor import Executor
    from apache_beam.runners.direct.transform_evaluator import \
      TransformEvaluatorRegistry

    MetricsEnvironment.set_metrics_supported(True)
    logging.info('Running pipeline with DirectRunner.')
    self.consumer_tracking_visitor = ConsumerTrackingPipelineVisitor()
    pipeline.visit(group_by_key_input_visitor())
    pipeline.visit(self.consumer_tracking_visitor)

    evaluation_context = EvaluationContext(
        pipeline.options,
        BundleFactory(stacked=pipeline.options.view_as(DirectOptions)
                      .direct_runner_use_stacked_bundle),
        self.consumer_tracking_visitor.root_transforms,
        self.consumer_tracking_visitor.value_to_consumers,
        self.consumer_tracking_visitor.step_names,
        self.consumer_tracking_visitor.views)

    evaluation_context.use_pvalue_cache(self._cache)

    executor = Executor(self.consumer_tracking_visitor.value_to_consumers,
                        TransformEvaluatorRegistry(evaluation_context),
                        evaluation_context)
    # Start the executor. This is a non-blocking call, it will start the
    # execution in background threads and return.

    if pipeline.options:
      RuntimeValueProvider.set_runtime_options(pipeline.options._options_id, {})
    executor.start(self.consumer_tracking_visitor.root_transforms)
    result = DirectPipelineResult(executor, evaluation_context)

    if self._cache:
      # We are running in eager mode, block until the pipeline execution
      # completes in order to have full results in the cache.
      result.wait_until_finish()
      self._cache.finalize()

      # Unset runtime options after the pipeline finishes.
      # TODO: Move this to a post finish hook and clean for all cases.
      if pipeline.options:
        RuntimeValueProvider.unset_runtime_options(pipeline.options._options_id)

    return result
コード例 #3
0
  def test_set_runtime_option(self):
    # define ValueProvider ptions, with and without default values
    class UserDefinedOptions1(PipelineOptions):
      @classmethod
      def _add_argparse_args(cls, parser):
        parser.add_value_provider_argument(
            '--vp_arg',
            help='This keyword argument is a value provider')   # set at runtime

        parser.add_value_provider_argument(         # not set, had default int
            '-v', '--vp_arg2',                      # with short form
            default=123,
            type=int)

        parser.add_value_provider_argument(         # not set, had default str
            '--vp-arg3',                            # with dash in name
            default='123',
            type=str)

        parser.add_value_provider_argument(         # not set and no default
            '--vp_arg4',
            type=float)

        parser.add_value_provider_argument(         # positional argument set
            'vp_pos_arg',                           # default & runtime ignored
            help='This positional argument is a value provider',
            type=float,
            default=5.4)

    # provide values at graph-construction time
    # (options not provided here become of the type RuntimeValueProvider)
    options = UserDefinedOptions1(['1.2'])
    self.assertFalse(options.vp_arg.is_accessible())
    self.assertFalse(options.vp_arg2.is_accessible())
    self.assertFalse(options.vp_arg3.is_accessible())
    self.assertFalse(options.vp_arg4.is_accessible())
    self.assertTrue(options.vp_pos_arg.is_accessible())

    # provide values at job-execution time
    # (options not provided here will use their default, if they have one)
    RuntimeValueProvider.set_runtime_options({'vp_arg': 'abc',
                                              'vp_pos_arg':'3.2'})
    self.assertTrue(options.vp_arg.is_accessible())
    self.assertEqual(options.vp_arg.get(), 'abc')
    self.assertTrue(options.vp_arg2.is_accessible())
    self.assertEqual(options.vp_arg2.get(), 123)
    self.assertTrue(options.vp_arg3.is_accessible())
    self.assertEqual(options.vp_arg3.get(), '123')
    self.assertTrue(options.vp_arg4.is_accessible())
    self.assertIsNone(options.vp_arg4.get())
    self.assertTrue(options.vp_pos_arg.is_accessible())
    self.assertEqual(options.vp_pos_arg.get(), 1.2)
コード例 #4
0
  def run(self, pipeline):
    """Execute the entire pipeline and returns an DirectPipelineResult."""

    # TODO: Move imports to top. Pipeline <-> Runner dependency cause problems
    # with resolving imports when they are at top.
    # pylint: disable=wrong-import-position
    from apache_beam.runners.direct.consumer_tracking_pipeline_visitor import \
      ConsumerTrackingPipelineVisitor
    from apache_beam.runners.direct.evaluation_context import EvaluationContext
    from apache_beam.runners.direct.executor import Executor
    from apache_beam.runners.direct.transform_evaluator import \
      TransformEvaluatorRegistry

    MetricsEnvironment.set_metrics_supported(True)
    logging.info('Running pipeline with DirectRunner.')
    self.visitor = ConsumerTrackingPipelineVisitor()
    pipeline.visit(self.visitor)

    evaluation_context = EvaluationContext(
        pipeline.options,
        BundleFactory(stacked=pipeline.options.view_as(DirectOptions)
                      .direct_runner_use_stacked_bundle),
        self.visitor.root_transforms,
        self.visitor.value_to_consumers,
        self.visitor.step_names,
        self.visitor.views)

    evaluation_context.use_pvalue_cache(self._cache)

    executor = Executor(self.visitor.value_to_consumers,
                        TransformEvaluatorRegistry(evaluation_context),
                        evaluation_context)
    # Start the executor. This is a non-blocking call, it will start the
    # execution in background threads and return.

    if pipeline.options:
      RuntimeValueProvider.set_runtime_options(pipeline.options._options_id, {})
    executor.start(self.visitor.root_transforms)
    result = DirectPipelineResult(executor, evaluation_context)

    if self._cache:
      # We are running in eager mode, block until the pipeline execution
      # completes in order to have full results in the cache.
      result.wait_until_finish()
      self._cache.finalize()

      # Unset runtime options after the pipeline finishes.
      # TODO: Move this to a post finish hook and clean for all cases.
      if pipeline.options:
        RuntimeValueProvider.unset_runtime_options(pipeline.options._options_id)

    return result
コード例 #5
0
    def test_value_provider_options(self):
        class UserOptions(PipelineOptions):
            @classmethod
            def _add_argparse_args(cls, parser):
                parser.add_value_provider_argument(
                    '--vp_arg', help='This flag is a value provider')

                parser.add_value_provider_argument('--vp_arg2',
                                                   default=1,
                                                   type=int)

                parser.add_argument('--non_vp_arg', default=1, type=int)

        # Provide values: if not provided, the option becomes of the type runtime vp
        options = UserOptions(['--vp_arg', 'hello'])
        self.assertIsInstance(options.vp_arg, StaticValueProvider)
        self.assertIsInstance(options.vp_arg2, RuntimeValueProvider)
        self.assertIsInstance(options.non_vp_arg, int)

        # Values can be overwritten
        options = UserOptions(vp_arg=5,
                              vp_arg2=StaticValueProvider(value_type=str,
                                                          value='bye'),
                              non_vp_arg=RuntimeValueProvider(
                                  option_name='foo',
                                  value_type=int,
                                  default_value=10))
        self.assertEqual(options.vp_arg, 5)
        self.assertTrue(options.vp_arg2.is_accessible(),
                        '%s is not accessible' % options.vp_arg2)
        self.assertEqual(options.vp_arg2.get(), 'bye')
        self.assertFalse(options.non_vp_arg.is_accessible())

        with self.assertRaises(RuntimeError):
            options.non_vp_arg.get()
コード例 #6
0
    def add_value_provider_argument(self, *args, **kwargs):
        """ValueProvider arguments can be either of type keyword or positional.
    At runtime, even positional arguments will need to be supplied in the
    key/value form.
    """
        # Extract the option name from positional argument ['pos_arg']
        assert args != () and len(args[0]) >= 1
        if args[0][0] != '-':
            option_name = args[0]
            if kwargs.get('nargs') is None:  # make them optionally templated
                kwargs['nargs'] = '?'
        else:
            # or keyword arguments like [--kw_arg, -k, -w] or [--kw-arg]
            option_name = [i.replace('--', '') for i in args
                           if i[:2] == '--'][0]

        # reassign the type to make room for using
        # StaticValueProvider as the type for add_argument
        value_type = kwargs.get('type') or str
        kwargs['type'] = _static_value_provider_of(value_type)

        # reassign default to default_value to make room for using
        # RuntimeValueProvider as the default for add_argument
        default_value = kwargs.get('default')
        kwargs['default'] = RuntimeValueProvider(option_name=option_name,
                                                 value_type=value_type,
                                                 default_value=default_value)

        # have add_argument do most of the work
        self.add_argument(*args, **kwargs)
コード例 #7
0
  def test_options_id(self):
    class Opt1(PipelineOptions):
      @classmethod
      def _add_argparse_args(cls, parser):
        parser.add_value_provider_argument('--arg1')

    class Opt2(PipelineOptions):
      @classmethod
      def _add_argparse_args(cls, parser):
        parser.add_value_provider_argument('--arg2')

    opt1 = Opt1()
    opt2 = Opt2()
    self.assertFalse(opt1.arg1.is_accessible())
    self.assertFalse(opt2.arg2.is_accessible())
    RuntimeValueProvider.set_runtime_options(
        opt1.arg1.options_id, {'arg1': 'val1'})
    self.assertTrue(opt1.arg1.is_accessible())
    self.assertFalse(opt2.arg2.is_accessible())
コード例 #8
0
ファイル: filebasedsource_test.py プロジェクト: ssisk/beam
    def test_string_or_value_provider_only(self):
        str_file_pattern = tempfile.NamedTemporaryFile(delete=False).name
        self.assertEqual(str_file_pattern,
                         FileBasedSource(str_file_pattern)._pattern.value)

        static_vp_file_pattern = StaticValueProvider(value_type=str,
                                                     value=str_file_pattern)
        self.assertEqual(static_vp_file_pattern,
                         FileBasedSource(static_vp_file_pattern)._pattern)

        runtime_vp_file_pattern = RuntimeValueProvider(
            option_name='arg', value_type=str, default_value=str_file_pattern)
        self.assertEqual(runtime_vp_file_pattern,
                         FileBasedSource(runtime_vp_file_pattern)._pattern)

        invalid_file_pattern = 123
        with self.assertRaises(TypeError):
            FileBasedSource(invalid_file_pattern)
コード例 #9
0
 def process(self, an_int):
   logging.info('The string_value is %s' % self.string_vp.get())
   # Another option (where you don't need to pass the value at all) is:
   logging.info('The string value is %s' %
                RuntimeValueProvider.get_value('string_value', str, ''))
コード例 #10
0
ファイル: json_value_test.py プロジェクト: wileeam/beam
 def test_runtime_value_provider_to(self):
     RuntimeValueProvider.runtime_options = None
     rvp = RuntimeValueProvider('arg', 123, int)
     self.assertEquals(JsonValue(is_null=True), to_json_value(rvp))