Beispiel #1
0
    def run_pipeline(self, pipeline, options):

        from apache_beam.pipeline import PipelineVisitor
        from apache_beam.runners.dataflow.native_io.iobase import NativeSource
        from apache_beam.runners.dataflow.native_io.iobase import _NativeWrite
        from apache_beam.testing.test_stream import TestStream

        class _FnApiRunnerSupportVisitor(PipelineVisitor):
            """Visitor determining if a Pipeline can be run on the FnApiRunner."""
            def accept(self, pipeline):
                self.supported_by_fnapi_runner = True
                pipeline.visit(self)
                return self.supported_by_fnapi_runner

            def visit_transform(self, applied_ptransform):
                transform = applied_ptransform.transform
                # The FnApiRunner does not support streaming execution.
                if isinstance(transform, TestStream):
                    self.supported_by_fnapi_runner = False
                # The FnApiRunner does not support reads from NativeSources.
                if (isinstance(transform, beam.io.Read)
                        and isinstance(transform.source, NativeSource)):
                    self.supported_by_fnapi_runner = False
                # The FnApiRunner does not support the use of _NativeWrites.
                if isinstance(transform, _NativeWrite):
                    self.supported_by_fnapi_runner = False
                if isinstance(transform, beam.ParDo):
                    dofn = transform.dofn
                    # The FnApiRunner does not support execution of CombineFns with
                    # deferred side inputs.
                    if isinstance(dofn, CombineValuesDoFn):
                        args, kwargs = transform.raw_side_inputs
                        args_to_check = itertools.chain(args, kwargs.values())
                        if any(
                                isinstance(arg, ArgumentPlaceholder)
                                for arg in args_to_check):
                            self.supported_by_fnapi_runner = False
                    if userstate.is_stateful_dofn(dofn):
                        _, timer_specs = userstate.get_dofn_specs(dofn)
                        for timer in timer_specs:
                            if timer.time_domain == TimeDomain.REAL_TIME:
                                self.supported_by_fnapi_runner = False

        # Check whether all transforms used in the pipeline are supported by the
        # FnApiRunner, and the pipeline was not meant to be run as streaming.
        if _FnApiRunnerSupportVisitor().accept(pipeline):
            from apache_beam.portability.api import beam_provision_api_pb2
            from apache_beam.runners.portability.fn_api_runner import fn_runner
            from apache_beam.runners.portability.portable_runner import JobServiceHandle
            all_options = options.get_all_options()
            encoded_options = JobServiceHandle.encode_pipeline_options(
                all_options)
            provision_info = fn_runner.ExtendedProvisionInfo(
                beam_provision_api_pb2.ProvisionInfo(
                    pipeline_options=encoded_options))
            runner = fn_runner.FnApiRunner(provision_info=provision_info)
        else:
            runner = BundleBasedDirectRunner()

        return runner.run_pipeline(pipeline, options)
Beispiel #2
0
 def _run_job(self):
     self.set_state(beam_job_api_pb2.JobState.RUNNING)
     with JobLogHandler(self._log_queues):
         try:
             result = fn_runner.FnApiRunner(
                 provision_info=self._provision_info).run_via_runner_api(
                     self._pipeline_proto)
             _LOGGER.info('Successfully completed job.')
             self.set_state(beam_job_api_pb2.JobState.DONE)
             self.result = result
         except:  # pylint: disable=bare-except
             _LOGGER.exception('Error running pipeline.')
             _LOGGER.exception(traceback)
             self.set_state(beam_job_api_pb2.JobState.FAILED)
             raise
Beispiel #3
0
 def _run_job(self):
   self.set_state(beam_job_api_pb2.JobState.RUNNING)
   with JobLogHandler(self._log_queues) as log_handler:
     self._update_dependencies()
     try:
       start = time.time()
       result = fn_runner.FnApiRunner(
           provision_info=self._provision_info).run_via_runner_api(
               self._pipeline_proto)
       _LOGGER.info(
           'Successfully completed job in %s seconds.', time.time() - start)
       self.set_state(beam_job_api_pb2.JobState.DONE)
       self.result = result
     except:  # pylint: disable=bare-except
       self._log_queues.put(
           beam_job_api_pb2.JobMessage(
               message_id=log_handler._next_id(),
               time=time.strftime('%Y-%m-%d %H:%M:%S.'),
               importance=beam_job_api_pb2.JobMessage.JOB_MESSAGE_ERROR,
               message_text=traceback.format_exc()))
       _LOGGER.exception('Error running pipeline.')
       self.set_state(beam_job_api_pb2.JobState.FAILED)
       raise
Beispiel #4
0
 def _invoke_runner(self):
     self.set_state(beam_job_api_pb2.JobState.RUNNING)
     return fn_runner.FnApiRunner(
         provision_info=self._provision_info).run_via_runner_api(
             self._pipeline_proto, self.pipeline_options())