예제 #1
0
  def run_pipeline(self, pipeline):
    use_fnapi_runner = True

    # Streaming mode is not yet supported on the FnApiRunner.
    if pipeline._options.view_as(StandardOptions).streaming:
      use_fnapi_runner = False

    from apache_beam.pipeline import PipelineVisitor
    from apache_beam.runners.common import DoFnSignature
    from apache_beam.runners.dataflow.native_io.iobase import NativeSource
    from apache_beam.runners.dataflow.native_io.iobase import _NativeWrite
    from apache_beam.testing.test_stream import TestStream

    class _FnApiRunnerSupportVisitor(PipelineVisitor):
      """Visitor determining if a Pipeline can be run on the FnApiRunner."""

      def __init__(self):
        self.supported_by_fnapi_runner = True

      def visit_transform(self, applied_ptransform):
        transform = applied_ptransform.transform
        # The FnApiRunner does not support streaming execution.
        if isinstance(transform, TestStream):
          self.supported_by_fnapi_runner = False
        # The FnApiRunner does not support reads from NativeSources.
        if (isinstance(transform, beam.io.Read) and
            isinstance(transform.source, NativeSource)):
          self.supported_by_fnapi_runner = False
        # The FnApiRunner does not support the use of _NativeWrites.
        if isinstance(transform, _NativeWrite):
          self.supported_by_fnapi_runner = False
        if isinstance(transform, beam.ParDo):
          dofn = transform.dofn
          # The FnApiRunner does not support execution of SplittableDoFns.
          if DoFnSignature(dofn).is_splittable_dofn():
            self.supported_by_fnapi_runner = False
          # The FnApiRunner does not support execution of CombineFns with
          # deferred side inputs.
          if isinstance(dofn, CombineValuesDoFn):
            args, kwargs = transform.raw_side_inputs
            args_to_check = itertools.chain(args,
                                            kwargs.values())
            if any(isinstance(arg, ArgumentPlaceholder)
                   for arg in args_to_check):
              self.supported_by_fnapi_runner = False

    # Check whether all transforms used in the pipeline are supported by the
    # FnApiRunner.
    visitor = _FnApiRunnerSupportVisitor()
    pipeline.visit(visitor)
    if not visitor.supported_by_fnapi_runner:
      use_fnapi_runner = False

    if use_fnapi_runner:
      from apache_beam.runners.portability.fn_api_runner import FnApiRunner
      runner = FnApiRunner()
    else:
      runner = BundleBasedDirectRunner()

    return runner.run_pipeline(pipeline)
예제 #2
0
  def run_pipeline(self, pipeline):
    use_fnapi_runner = True

    # Streaming mode is not yet supported on the FnApiRunner.
    if pipeline.options.view_as(StandardOptions).streaming:
      use_fnapi_runner = False

    from apache_beam.pipeline import PipelineVisitor
    from apache_beam.runners.common import DoFnSignature
    from apache_beam.runners.dataflow.native_io.iobase import NativeSource
    from apache_beam.runners.dataflow.native_io.iobase import _NativeWrite
    from apache_beam.testing.test_stream import TestStream

    class _FnApiRunnerSupportVisitor(PipelineVisitor):
      """Visitor determining if a Pipeline can be run on the FnApiRunner."""

      def __init__(self):
        self.supported_by_fnapi_runner = True

      def visit_transform(self, applied_ptransform):
        transform = applied_ptransform.transform
        # The FnApiRunner does not support streaming execution.
        if isinstance(transform, TestStream):
          self.supported_by_fnapi_runner = False
        # The FnApiRunner does not support reads from NativeSources.
        if (isinstance(transform, beam.io.Read) and
            isinstance(transform.source, NativeSource)):
          self.supported_by_fnapi_runner = False
        # The FnApiRunner does not support the use of _NativeWrites.
        if isinstance(transform, _NativeWrite):
          self.supported_by_fnapi_runner = False
        if isinstance(transform, beam.ParDo):
          dofn = transform.dofn
          # The FnApiRunner does not support execution of SplittableDoFns.
          if DoFnSignature(dofn).is_splittable_dofn():
            self.supported_by_fnapi_runner = False
          # The FnApiRunner does not support execution of CombineFns with
          # deferred side inputs.
          if isinstance(dofn, CombineValuesDoFn):
            args, kwargs = transform.raw_side_inputs
            args_to_check = itertools.chain(args,
                                            kwargs.values())
            if any(isinstance(arg, ArgumentPlaceholder)
                   for arg in args_to_check):
              self.supported_by_fnapi_runner = False

    # Check whether all transforms used in the pipeline are supported by the
    # FnApiRunner.
    visitor = _FnApiRunnerSupportVisitor()
    pipeline.visit(visitor)
    if not visitor.supported_by_fnapi_runner:
      use_fnapi_runner = False

    if use_fnapi_runner:
      from apache_beam.runners.portability.fn_api_runner import FnApiRunner
      runner = FnApiRunner()
    else:
      runner = BundleBasedDirectRunner()

    return runner.run_pipeline(pipeline)
예제 #3
0
  def run_pipeline(self, pipeline, options):

    from apache_beam.pipeline import PipelineVisitor
    from apache_beam.runners.dataflow.native_io.iobase import NativeSource
    from apache_beam.runners.dataflow.native_io.iobase import _NativeWrite
    from apache_beam.testing.test_stream import TestStream

    class _FnApiRunnerSupportVisitor(PipelineVisitor):
      """Visitor determining if a Pipeline can be run on the FnApiRunner."""

      def accept(self, pipeline):
        self.supported_by_fnapi_runner = True
        pipeline.visit(self)
        return self.supported_by_fnapi_runner

      def visit_transform(self, applied_ptransform):
        transform = applied_ptransform.transform
        # The FnApiRunner does not support streaming execution.
        if isinstance(transform, TestStream):
          self.supported_by_fnapi_runner = False
        # The FnApiRunner does not support reads from NativeSources.
        if (isinstance(transform, beam.io.Read) and
            isinstance(transform.source, NativeSource)):
          self.supported_by_fnapi_runner = False
        # The FnApiRunner does not support the use of _NativeWrites.
        if isinstance(transform, _NativeWrite):
          self.supported_by_fnapi_runner = False
        if isinstance(transform, beam.ParDo):
          dofn = transform.dofn
          # The FnApiRunner does not support execution of CombineFns with
          # deferred side inputs.
          if isinstance(dofn, CombineValuesDoFn):
            args, kwargs = transform.raw_side_inputs
            args_to_check = itertools.chain(args,
                                            kwargs.values())
            if any(isinstance(arg, ArgumentPlaceholder)
                   for arg in args_to_check):
              self.supported_by_fnapi_runner = False

    # Check whether all transforms used in the pipeline are supported by the
    # FnApiRunner, and the pipeline was not meant to be run as streaming.
    use_fnapi_runner = (
        _FnApiRunnerSupportVisitor().accept(pipeline)
        and not options.view_as(StandardOptions).streaming)

    # Also ensure grpc is available.
    try:
      # pylint: disable=unused-variable
      import grpc
    except ImportError:
      use_fnapi_runner = False

    if use_fnapi_runner:
      from apache_beam.runners.portability.fn_api_runner import FnApiRunner
      runner = FnApiRunner()
    else:
      runner = BundleBasedDirectRunner()

    return runner.run_pipeline(pipeline, options)
예제 #4
0
  def run_pipeline(self, pipeline, options):

    from apache_beam.pipeline import PipelineVisitor
    from apache_beam.runners.dataflow.native_io.iobase import NativeSource
    from apache_beam.runners.dataflow.native_io.iobase import _NativeWrite
    from apache_beam.testing.test_stream import TestStream

    class _FnApiRunnerSupportVisitor(PipelineVisitor):
      """Visitor determining if a Pipeline can be run on the FnApiRunner."""

      def accept(self, pipeline):
        self.supported_by_fnapi_runner = True
        pipeline.visit(self)
        return self.supported_by_fnapi_runner

      def visit_transform(self, applied_ptransform):
        transform = applied_ptransform.transform
        # The FnApiRunner does not support streaming execution.
        if isinstance(transform, TestStream):
          self.supported_by_fnapi_runner = False
        # The FnApiRunner does not support reads from NativeSources.
        if (isinstance(transform, beam.io.Read) and
            isinstance(transform.source, NativeSource)):
          self.supported_by_fnapi_runner = False
        # The FnApiRunner does not support the use of _NativeWrites.
        if isinstance(transform, _NativeWrite):
          self.supported_by_fnapi_runner = False
        if isinstance(transform, beam.ParDo):
          dofn = transform.dofn
          # The FnApiRunner does not support execution of CombineFns with
          # deferred side inputs.
          if isinstance(dofn, CombineValuesDoFn):
            args, kwargs = transform.raw_side_inputs
            args_to_check = itertools.chain(args,
                                            kwargs.values())
            if any(isinstance(arg, ArgumentPlaceholder)
                   for arg in args_to_check):
              self.supported_by_fnapi_runner = False

    # Check whether all transforms used in the pipeline are supported by the
    # FnApiRunner, and the pipeline was not meant to be run as streaming.
    use_fnapi_runner = (
        _FnApiRunnerSupportVisitor().accept(pipeline))

    # Also ensure grpc is available.
    try:
      # pylint: disable=unused-import
      import grpc
    except ImportError:
      use_fnapi_runner = False

    if use_fnapi_runner:
      from apache_beam.runners.portability.fn_api_runner import FnApiRunner
      runner = FnApiRunner()
    else:
      runner = BundleBasedDirectRunner()

    return runner.run_pipeline(pipeline, options)
 def _pipeline_runner():
     with beam.Pipeline(runner=FnApiRunner()) as p:
         for i in range(NUM_PARALLEL_STAGES):
             _build_serial_stages(p, NUM_SERIAL_STAGES, size, i)
예제 #6
0
def create_fn_api_runner():
  from apache_beam.runners.portability.fn_api_runner import FnApiRunner
  return FnApiRunner()
예제 #7
0
def run():
    with beam.Pipeline(runner=FnApiRunner()) as p:
        (p | beam.Create(get_tweets.run())
         | beam.ParDo(Geolocate())
         | beam.Map(print))
예제 #8
0
def create_fn_api_runner():
    # pylint: disable=import-outside-toplevel
    from apache_beam.runners.portability.fn_api_runner import FnApiRunner
    return FnApiRunner()