Beispiel #1
0
 def _start_local_runner_subprocess_job_service(cls):
   cls._maybe_kill_subprocess()
   # TODO(robertwb): Consider letting the subprocess pick one and
   # communicate it back...
   port = cls._pick_unused_port()
   logging.info('Starting server on port %d.', port)
   cls._subprocess = subprocess.Popen(cls._subprocess_command(port))
   address = 'localhost:%d' % port
   job_service = beam_job_api_pb2_grpc.JobServiceStub(
       GRPCChannelFactory.insecure_channel(address))
   logging.info('Waiting for server to be ready...')
   start = time.time()
   timeout = 30
   while True:
     time.sleep(0.1)
     if cls._subprocess.poll() is not None:
       raise RuntimeError(
           'Subprocess terminated unexpectedly with exit code %d.' %
           cls._subprocess.returncode)
     elif time.time() - start > timeout:
       raise RuntimeError(
           'Pipeline timed out waiting for job service subprocess.')
     else:
       try:
         job_service.GetState(
             beam_job_api_pb2.GetJobStateRequest(job_id='[fake]'))
         break
       except grpc.RpcError as exn:
         if exn.code() != grpc.StatusCode.UNAVAILABLE:
           # We were able to contact the service for our fake state request.
           break
   logging.info('Server ready.')
   return address
Beispiel #2
0
    def __init__(
            self,
            control_address,
            worker_count,
            credentials=None,
            worker_id=None,
            # Caching is disabled by default
            state_cache_size=0,
            profiler_factory=None):
        self._alive = True
        self._worker_count = worker_count
        self._worker_index = 0
        self._worker_id = worker_id
        self._state_cache = StateCache(state_cache_size)
        if credentials is None:
            logging.info('Creating insecure control channel for %s.',
                         control_address)
            self._control_channel = GRPCChannelFactory.insecure_channel(
                control_address)
        else:
            logging.info('Creating secure control channel for %s.',
                         control_address)
            self._control_channel = GRPCChannelFactory.secure_channel(
                control_address, credentials)
        grpc.channel_ready_future(self._control_channel).result(timeout=60)
        logging.info('Control channel established.')

        self._control_channel = grpc.intercept_channel(
            self._control_channel, WorkerIdInterceptor(self._worker_id))
        self._data_channel_factory = data_plane.GrpcClientDataChannelFactory(
            credentials, self._worker_id)
        self._state_handler_factory = GrpcStateHandlerFactory(
            self._state_cache, credentials)
        self._profiler_factory = profiler_factory
        self._fns = {}
        # BundleProcessor cache across all workers.
        self._bundle_processor_cache = BundleProcessorCache(
            state_handler_factory=self._state_handler_factory,
            data_channel_factory=self._data_channel_factory,
            fns=self._fns)
        # workers for process/finalize bundle.
        self.workers = queue.Queue()
        # one worker for progress/split request.
        self.progress_worker = SdkWorker(
            self._bundle_processor_cache,
            profiler_factory=self._profiler_factory)
        # one thread is enough for getting the progress report.
        # Assumption:
        # Progress report generation should not do IO or wait on other resources.
        #  Without wait, having multiple threads will not improve performance and
        #  will only add complexity.
        self._progress_thread_pool = futures.ThreadPoolExecutor(max_workers=1)
        # finalize and process share one thread pool.
        self._process_thread_pool = futures.ThreadPoolExecutor(
            max_workers=self._worker_count)
        self._responses = queue.Queue()
        self._process_bundle_queue = queue.Queue()
        self._unscheduled_process_bundle = {}
        logging.info('Initializing SDKHarness with %s workers.',
                     self._worker_count)
Beispiel #3
0
 def create_state_handler(self, api_service_descriptor):
   if not api_service_descriptor:
     return self._throwing_state_handler
   url = api_service_descriptor.url
   if url not in self._state_handler_cache:
     with self._lock:
       if url not in self._state_handler_cache:
         # Options to have no limits (-1) on the size of the messages
         # received or sent over the data plane. The actual buffer size is
         # controlled in a layer above.
         options = [('grpc.max_receive_message_length', -1),
                    ('grpc.max_send_message_length', -1)]
         if self._credentials is None:
           logging.info('Creating insecure state channel for %s.', url)
           grpc_channel = GRPCChannelFactory.insecure_channel(
               url, options=options)
         else:
           logging.info('Creating secure state channel for %s.', url)
           grpc_channel = GRPCChannelFactory.secure_channel(
               url, self._credentials, options=options)
         logging.info('State channel established.')
         # Add workerId to the grpc channel
         grpc_channel = grpc.intercept_channel(grpc_channel,
                                               WorkerIdInterceptor())
         self._state_handler_cache[url] = GrpcStateHandler(
             beam_fn_api_pb2_grpc.BeamFnStateStub(grpc_channel))
   return self._state_handler_cache[url]
Beispiel #4
0
    def create_data_channel(self, remote_grpc_port):
        url = remote_grpc_port.api_service_descriptor.url
        if url not in self._data_channel_cache:
            with self._lock:
                if url not in self._data_channel_cache:
                    logging.info('Creating client data channel for %s', url)
                    # Options to have no limits (-1) on the size of the messages
                    # received or sent over the data plane. The actual buffer size
                    # is controlled in a layer above.
                    channel_options = [("grpc.max_receive_message_length", -1),
                                       ("grpc.max_send_message_length", -1)]
                    grpc_channel = None
                    if self._credentials is None:
                        grpc_channel = GRPCChannelFactory.insecure_channel(
                            url, options=channel_options)
                    else:
                        grpc_channel = GRPCChannelFactory.secure_channel(
                            url, self._credentials, options=channel_options)
                    # Add workerId to the grpc channel
                    grpc_channel = grpc.intercept_channel(
                        grpc_channel, WorkerIdInterceptor(self._worker_id))
                    self._data_channel_cache[url] = GrpcClientDataChannel(
                        beam_fn_api_pb2_grpc.BeamFnDataStub(grpc_channel))

        return self._data_channel_cache[url]
    def __init__(self,
                 status_address,
                 bundle_process_cache=None,
                 enable_heap_dump=False,
                 log_lull_timeout_ns=DEFAULT_LOG_LULL_TIMEOUT_NS):
        """Initialize FnApiWorkerStatusHandler.

    Args:
      status_address: The URL Runner uses to host the WorkerStatus server.
      bundle_process_cache: The BundleProcessor cache dict from sdk worker.
    """
        self._alive = True
        self._bundle_process_cache = bundle_process_cache
        ch = GRPCChannelFactory.insecure_channel(status_address)
        grpc.channel_ready_future(ch).result(timeout=60)
        self._status_channel = grpc.intercept_channel(ch,
                                                      WorkerIdInterceptor())
        self._status_stub = beam_fn_api_pb2_grpc.BeamFnWorkerStatusStub(
            self._status_channel)
        self._responses = queue.Queue()
        self.log_lull_timeout_ns = log_lull_timeout_ns
        self._last_full_thread_dump_secs = 0.0
        self._last_lull_logged_secs = 0.0
        self._server = threading.Thread(target=lambda: self._serve(),
                                        name='fn_api_status_handler')
        self._server.daemon = True
        self._enable_heap_dump = enable_heap_dump
        self._server.start()
        self._lull_logger = threading.Thread(
            target=lambda: self._log_lull_in_bundle_processor(
                self._bundle_process_cache),
            name='lull_operation_logger')
        self._lull_logger.daemon = True
        self._lull_logger.start()
Beispiel #6
0
  def create_data_channel(self, remote_grpc_port):
    url = remote_grpc_port.api_service_descriptor.url
    if url not in self._data_channel_cache:
      with self._lock:
        if url not in self._data_channel_cache:
          logging.info('Creating channel for %s', url)
          # Options to have no limits (-1) on the size of the messages
          # received or sent over the data plane. The actual buffer size
          # is controlled in a layer above.
          channel_options = [("grpc.max_receive_message_length", -1),
                             ("grpc.max_send_message_length", -1)]
          grpc_channel = None
          if self._credentials is None:
            grpc_channel = GRPCChannelFactory.insecure_channel(
                url, options=channel_options)
          else:
            grpc_channel = GRPCChannelFactory.secure_channel(
                url, self._credentials, options=channel_options)
          # Add workerId to the grpc channel
          grpc_channel = grpc.intercept_channel(grpc_channel,
                                                WorkerIdInterceptor())
          self._data_channel_cache[url] = GrpcClientDataChannel(
              beam_fn_api_pb2_grpc.BeamFnDataStub(grpc_channel))

    return self._data_channel_cache[url]
Beispiel #7
0
  def create_data_channel_from_url(self, url):
    # type: (str) -> Optional[GrpcClientDataChannel]
    if not url:
      return None
    if url not in self._data_channel_cache:
      with self._lock:
        if url not in self._data_channel_cache:
          _LOGGER.info('Creating client data channel for %s', url)
          # Options to have no limits (-1) on the size of the messages
          # received or sent over the data plane. The actual buffer size
          # is controlled in a layer above.
          channel_options = [("grpc.max_receive_message_length", -1),
                             ("grpc.max_send_message_length", -1)]
          grpc_channel = None
          if self._credentials is None:
            grpc_channel = GRPCChannelFactory.insecure_channel(
                url, options=channel_options)
          else:
            grpc_channel = GRPCChannelFactory.secure_channel(
                url, self._credentials, options=channel_options)
          # Add workerId to the grpc channel
          grpc_channel = grpc.intercept_channel(
              grpc_channel, WorkerIdInterceptor(self._worker_id))
          self._data_channel_cache[url] = GrpcClientDataChannel(
              beam_fn_api_pb2_grpc.BeamFnDataStub(grpc_channel),
              self._data_buffer_time_limit_ms)

    return self._data_channel_cache[url]
Beispiel #8
0
 def create_state_handler(self, api_service_descriptor):
     if not api_service_descriptor:
         return self._throwing_state_handler
     url = api_service_descriptor.url
     if url not in self._state_handler_cache:
         with self._lock:
             if url not in self._state_handler_cache:
                 # Options to have no limits (-1) on the size of the messages
                 # received or sent over the data plane. The actual buffer size is
                 # controlled in a layer above.
                 options = [('grpc.max_receive_message_length', -1),
                            ('grpc.max_send_message_length', -1)]
                 if self._credentials is None:
                     _LOGGER.info('Creating insecure state channel for %s.',
                                  url)
                     grpc_channel = GRPCChannelFactory.insecure_channel(
                         url, options=options)
                 else:
                     _LOGGER.info('Creating secure state channel for %s.',
                                  url)
                     grpc_channel = GRPCChannelFactory.secure_channel(
                         url, self._credentials, options=options)
                 _LOGGER.info('State channel established.')
                 # Add workerId to the grpc channel
                 grpc_channel = grpc.intercept_channel(
                     grpc_channel, WorkerIdInterceptor())
                 self._state_handler_cache[url] = CachingStateHandler(
                     self._state_cache,
                     GrpcStateHandler(
                         beam_fn_api_pb2_grpc.BeamFnStateStub(
                             grpc_channel)))
     return self._state_handler_cache[url]
 def _start_local_runner_subprocess_job_service(cls):
     cls._maybe_kill_subprocess()
     # TODO(robertwb): Consider letting the subprocess pick one and
     # communicate it back...
     # pylint: disable=unbalanced-tuple-unpacking
     job_port, expansion_port = cls._pick_unused_ports(num_ports=2)
     _LOGGER.info('Starting server on port %d.', job_port)
     cls._subprocess = subprocess.Popen(
         cls._subprocess_command(job_port, expansion_port))
     address = 'localhost:%d' % job_port
     job_service = beam_job_api_pb2_grpc.JobServiceStub(
         GRPCChannelFactory.insecure_channel(address))
     _LOGGER.info('Waiting for server to be ready...')
     start = time.time()
     timeout = 30
     while True:
         time.sleep(0.1)
         if cls._subprocess.poll() is not None:
             raise RuntimeError(
                 'Subprocess terminated unexpectedly with exit code %d.' %
                 cls._subprocess.returncode)
         elif time.time() - start > timeout:
             raise RuntimeError(
                 'Pipeline timed out waiting for job service subprocess.')
         else:
             try:
                 job_service.GetState(
                     beam_job_api_pb2.GetJobStateRequest(job_id='[fake]'))
                 break
             except grpc.RpcError as exn:
                 if exn.code() != grpc.StatusCode.UNAVAILABLE:
                     # We were able to contact the service for our fake state request.
                     break
     _LOGGER.info('Server ready.')
     return address
Beispiel #10
0
  def __init__(self,
               control_address,  # type: str
               credentials=None,
               worker_id=None,  # type: Optional[str]
               # Caching is disabled by default
               state_cache_size=0,
               # time-based data buffering is disabled by default
               data_buffer_time_limit_ms=0,
               profiler_factory=None,  # type: Optional[Callable[..., Profile]]
               status_address=None,  # type: Optional[str, unicode]
               ):
    self._alive = True
    self._worker_index = 0
    self._worker_id = worker_id
    self._state_cache = StateCache(state_cache_size)
    if credentials is None:
      _LOGGER.info('Creating insecure control channel for %s.', control_address)
      self._control_channel = GRPCChannelFactory.insecure_channel(
          control_address)
    else:
      _LOGGER.info('Creating secure control channel for %s.', control_address)
      self._control_channel = GRPCChannelFactory.secure_channel(
          control_address, credentials)
    grpc.channel_ready_future(self._control_channel).result(timeout=60)
    _LOGGER.info('Control channel established.')

    self._control_channel = grpc.intercept_channel(
        self._control_channel, WorkerIdInterceptor(self._worker_id))
    self._data_channel_factory = data_plane.GrpcClientDataChannelFactory(
        credentials, self._worker_id, data_buffer_time_limit_ms)
    self._state_handler_factory = GrpcStateHandlerFactory(
        self._state_cache, credentials)
    self._profiler_factory = profiler_factory
    self._fns = {}  # type: Dict[str, beam_fn_api_pb2.ProcessBundleDescriptor]
    # BundleProcessor cache across all workers.
    self._bundle_processor_cache = BundleProcessorCache(
        state_handler_factory=self._state_handler_factory,
        data_channel_factory=self._data_channel_factory,
        fns=self._fns)

    if status_address:
      try:
        self._status_handler = FnApiWorkerStatusHandler(
            status_address, self._bundle_processor_cache)
      except Exception:
        traceback_string = traceback.format_exc()
        _LOGGER.warning(
            'Error creating worker status request handler, '
            'skipping status report. Trace back: %s' % traceback_string)
    else:
      self._status_handler = None

    # TODO(BEAM-8998) use common UnboundedThreadPoolExecutor to process bundle
    #  progress once dataflow runner's excessive progress polling is removed.
    self._report_progress_executor = futures.ThreadPoolExecutor(max_workers=1)
    self._worker_thread_pool = UnboundedThreadPoolExecutor()
    self._responses = queue.Queue(
    )  # type: queue.Queue[beam_fn_api_pb2.InstructionResponse]
    _LOGGER.info('Initializing SDKHarness with unbounded number of workers.')
Beispiel #11
0
 def start_worker(self):
   stub = beam_fn_api_pb2_grpc.BeamFnExternalWorkerPoolStub(
       GRPCChannelFactory.insecure_channel(
           self._external_payload.endpoint.url))
   response = stub.NotifyRunnerAvailable(
       beam_fn_api_pb2.NotifyRunnerAvailableRequest(
           control_endpoint=endpoints_pb2.ApiServiceDescriptor(
               url=self.control_address),
           params=self._external_payload.params))
   if response.error:
     raise RuntimeError("Error starting worker: %s" % response.error)
Beispiel #12
0
 def start_worker(self):
   stub = beam_fn_api_pb2_grpc.BeamFnExternalWorkerPoolStub(
       GRPCChannelFactory.insecure_channel(
           self._external_payload.endpoint.url))
   response = stub.NotifyRunnerAvailable(
       beam_fn_api_pb2.NotifyRunnerAvailableRequest(
           worker_id='worker_%s' % uuid.uuid4(),
           control_endpoint=endpoints_pb2.ApiServiceDescriptor(
               url=self.control_address),
           logging_endpoint=self.logging_api_service_descriptor(),
           params=self._external_payload.params))
   if response.error:
     raise RuntimeError("Error starting worker: %s" % response.error)
Beispiel #13
0
    def __init__(self, log_service_descriptor):
        super(FnApiLogRecordHandler, self).__init__()

        self._alive = True
        self._dropped_logs = 0
        self._log_entry_queue = queue.Queue(maxsize=self._QUEUE_SIZE)

        ch = GRPCChannelFactory.insecure_channel(log_service_descriptor.url)
        # Make sure the channel is ready to avoid [BEAM-4649]
        grpc.channel_ready_future(ch).result(timeout=60)
        self._log_channel = grpc.intercept_channel(ch, WorkerIdInterceptor())
        self._reader = threading.Thread(
            target=lambda: self._read_log_control_messages(),
            name='read_log_control_messages')
        self._reader.daemon = True
        self._reader.start()
Beispiel #14
0
    def __init__(
            self,
            control_address,
            credentials=None,
            worker_id=None,
            # Caching is disabled by default
            state_cache_size=0,
            profiler_factory=None):
        self._alive = True
        self._worker_index = 0
        self._worker_id = worker_id
        self._state_cache = StateCache(state_cache_size)
        if credentials is None:
            _LOGGER.info('Creating insecure control channel for %s.',
                         control_address)
            self._control_channel = GRPCChannelFactory.insecure_channel(
                control_address)
        else:
            _LOGGER.info('Creating secure control channel for %s.',
                         control_address)
            self._control_channel = GRPCChannelFactory.secure_channel(
                control_address, credentials)
        grpc.channel_ready_future(self._control_channel).result(timeout=60)
        _LOGGER.info('Control channel established.')

        self._control_channel = grpc.intercept_channel(
            self._control_channel, WorkerIdInterceptor(self._worker_id))
        self._data_channel_factory = data_plane.GrpcClientDataChannelFactory(
            credentials, self._worker_id)
        self._state_handler_factory = GrpcStateHandlerFactory(
            self._state_cache, credentials)
        self._profiler_factory = profiler_factory
        self._fns = {}
        # BundleProcessor cache across all workers.
        self._bundle_processor_cache = BundleProcessorCache(
            state_handler_factory=self._state_handler_factory,
            data_channel_factory=self._data_channel_factory,
            fns=self._fns)

        # TODO(BEAM-8998) use common UnboundedThreadPoolExecutor to process bundle
        #  progress once dataflow runner's excessive progress polling is removed.
        self._report_progress_executor = futures.ThreadPoolExecutor(
            max_workers=1)
        self._worker_thread_pool = UnboundedThreadPoolExecutor()
        self._responses = queue.Queue()
        _LOGGER.info(
            'Initializing SDKHarness with unbounded number of workers.')
Beispiel #15
0
 def start_worker(self):
     # type: () -> None
     stub = beam_fn_api_pb2_grpc.BeamFnExternalWorkerPoolStub(
         GRPCChannelFactory.insecure_channel(
             self._external_payload.endpoint.url))
     control_descriptor = endpoints_pb2.ApiServiceDescriptor(
         url=self.control_address)
     response = stub.StartWorker(
         beam_fn_api_pb2.StartWorkerRequest(
             worker_id=self.worker_id,
             control_endpoint=control_descriptor,
             artifact_endpoint=control_descriptor,
             provision_endpoint=control_descriptor,
             logging_endpoint=self.logging_api_service_descriptor(),
             params=self._external_payload.params))
     if response.error:
         raise RuntimeError("Error starting worker: %s" % response.error)
Beispiel #16
0
    def __init__(self,
                 control_address,
                 worker_count,
                 credentials=None,
                 worker_id=None,
                 profiler_factory=None):
        self._alive = True
        self._worker_count = worker_count
        self._worker_index = 0
        self._worker_id = worker_id
        if credentials is None:
            logging.info('Creating insecure control channel for %s.',
                         control_address)
            self._control_channel = GRPCChannelFactory.insecure_channel(
                control_address)
        else:
            logging.info('Creating secure control channel for %s.',
                         control_address)
            self._control_channel = GRPCChannelFactory.secure_channel(
                control_address, credentials)
        grpc.channel_ready_future(self._control_channel).result(timeout=60)
        logging.info('Control channel established.')

        self._control_channel = grpc.intercept_channel(
            self._control_channel, WorkerIdInterceptor(self._worker_id))
        self._data_channel_factory = data_plane.GrpcClientDataChannelFactory(
            credentials)
        self._state_handler_factory = GrpcStateHandlerFactory(credentials)
        self._profiler_factory = profiler_factory
        self.workers = queue.Queue()
        # one thread is enough for getting the progress report.
        # Assumption:
        # Progress report generation should not do IO or wait on other resources.
        #  Without wait, having multiple threads will not improve performance and
        #  will only add complexity.
        self._progress_thread_pool = futures.ThreadPoolExecutor(max_workers=1)
        self._process_thread_pool = futures.ThreadPoolExecutor(
            max_workers=self._worker_count)
        self._instruction_id_vs_worker = {}
        self._fns = {}
        self._responses = queue.Queue()
        self._process_bundle_queue = queue.Queue()
        self._unscheduled_process_bundle = {}
        logging.info('Initializing SDKHarness with %s workers.',
                     self._worker_count)
Beispiel #17
0
    def __init__(self, log_service_descriptor):
        # type: (endpoints_pb2.ApiServiceDescriptor) -> None
        super().__init__()

        self._alive = True
        self._dropped_logs = 0
        self._log_entry_queue = queue.Queue(
            maxsize=self._QUEUE_SIZE
        )  # type: queue.Queue[Union[beam_fn_api_pb2.LogEntry, Sentinel]]

        ch = GRPCChannelFactory.insecure_channel(log_service_descriptor.url)
        # Make sure the channel is ready to avoid [BEAM-4649]
        grpc.channel_ready_future(ch).result(timeout=60)
        self._log_channel = grpc.intercept_channel(ch, WorkerIdInterceptor())
        self._reader = threading.Thread(
            target=lambda: self._read_log_control_messages(),
            name='read_log_control_messages')
        self._reader.daemon = True
        self._reader.start()
Beispiel #18
0
  def __init__(self, status_address, bundle_process_cache=None):
    """Initialize FnApiWorkerStatusHandler.

    Args:
      status_address: The URL Runner uses to host the WorkerStatus server.
      bundle_process_cache: The BundleProcessor cache dict from sdk worker.
    """
    self._alive = True
    self._bundle_process_cache = bundle_process_cache
    ch = GRPCChannelFactory.insecure_channel(status_address)
    grpc.channel_ready_future(ch).result(timeout=60)
    self._status_channel = grpc.intercept_channel(ch, WorkerIdInterceptor())
    self._status_stub = beam_fn_api_pb2_grpc.BeamFnWorkerStatusStub(
        self._status_channel)
    self._responses = queue.Queue()
    self._server = threading.Thread(
        target=lambda: self._serve(), name='fn_api_status_handler')
    self._server.daemon = True
    self._server.start()
Beispiel #19
0
  def __init__(self,
               control_address,  # type: str
               credentials=None,
               worker_id=None,  # type: Optional[str]
               # Caching is disabled by default
               state_cache_size=0,
               profiler_factory=None  # type: Optional[Callable[..., Profile]]
              ):
    self._alive = True
    self._worker_index = 0
    self._worker_id = worker_id
    self._state_cache = StateCache(state_cache_size)
    if credentials is None:
      _LOGGER.info('Creating insecure control channel for %s.', control_address)
      self._control_channel = GRPCChannelFactory.insecure_channel(
          control_address)
    else:
      _LOGGER.info('Creating secure control channel for %s.', control_address)
      self._control_channel = GRPCChannelFactory.secure_channel(
          control_address, credentials)
    grpc.channel_ready_future(self._control_channel).result(timeout=60)
    _LOGGER.info('Control channel established.')

    self._control_channel = grpc.intercept_channel(
        self._control_channel, WorkerIdInterceptor(self._worker_id))
    self._data_channel_factory = data_plane.GrpcClientDataChannelFactory(
        credentials, self._worker_id)
    self._state_handler_factory = GrpcStateHandlerFactory(self._state_cache,
                                                          credentials)
    self._profiler_factory = profiler_factory
    self._fns = {}  # type: Dict[str, beam_fn_api_pb2.ProcessBundleDescriptor]
    # BundleProcessor cache across all workers.
    self._bundle_processor_cache = BundleProcessorCache(
        state_handler_factory=self._state_handler_factory,
        data_channel_factory=self._data_channel_factory,
        fns=self._fns)
    self._worker_thread_pool = UnboundedThreadPoolExecutor()
    self._responses = queue.Queue()  # type: queue.Queue[beam_fn_api_pb2.InstructionResponse]
    _LOGGER.info('Initializing SDKHarness with unbounded number of workers.')
Beispiel #20
0
  def __init__(
      self, control_address, worker_count, credentials=None, worker_id=None,
      profiler_factory=None):
    self._alive = True
    self._worker_count = worker_count
    self._worker_index = 0
    self._worker_id = worker_id
    if credentials is None:
      logging.info('Creating insecure control channel for %s.', control_address)
      self._control_channel = GRPCChannelFactory.insecure_channel(
          control_address)
    else:
      logging.info('Creating secure control channel for %s.', control_address)
      self._control_channel = GRPCChannelFactory.secure_channel(
          control_address, credentials)
    grpc.channel_ready_future(self._control_channel).result(timeout=60)
    logging.info('Control channel established.')

    self._control_channel = grpc.intercept_channel(
        self._control_channel, WorkerIdInterceptor(self._worker_id))
    self._data_channel_factory = data_plane.GrpcClientDataChannelFactory(
        credentials)
    self._state_handler_factory = GrpcStateHandlerFactory(credentials)
    self._profiler_factory = profiler_factory
    self.workers = queue.Queue()
    # one thread is enough for getting the progress report.
    # Assumption:
    # Progress report generation should not do IO or wait on other resources.
    #  Without wait, having multiple threads will not improve performance and
    #  will only add complexity.
    self._progress_thread_pool = futures.ThreadPoolExecutor(max_workers=1)
    self._process_thread_pool = futures.ThreadPoolExecutor(
        max_workers=self._worker_count)
    self._instruction_id_vs_worker = {}
    self._fns = {}
    self._responses = queue.Queue()
    self._process_bundle_queue = queue.Queue()
    self._unscheduled_process_bundle = {}
    logging.info('Initializing SDKHarness with %s workers.', self._worker_count)
Beispiel #21
0
    def run_pipeline(self, pipeline, options):
        portable_options = options.view_as(PortableOptions)
        job_endpoint = portable_options.job_endpoint

        # TODO: https://issues.apache.org/jira/browse/BEAM-5525
        # portable runner specific default
        if options.view_as(SetupOptions).sdk_location == 'default':
            options.view_as(SetupOptions).sdk_location = 'container'

        if not job_endpoint:
            # TODO Provide a way to specify a container Docker URL
            # https://issues.apache.org/jira/browse/BEAM-6328
            docker = DockerizedJobServer()
            job_endpoint = docker.start()

        # This is needed as we start a worker server if one is requested
        # but none is provided.
        if portable_options.environment_type == 'LOOPBACK':
            portable_options.environment_config, server = (
                BeamFnExternalWorkerPoolServicer.start(
                    sdk_worker_main._get_worker_count(options)))
            cleanup_callbacks = [functools.partial(server.stop, 1)]
        else:
            cleanup_callbacks = []

        proto_pipeline = pipeline.to_runner_api(
            default_environment=PortableRunner._create_environment(
                portable_options))

        # Some runners won't detect the GroupByKey transform unless it has no
        # subtransforms.  Remove all sub-transforms until BEAM-4605 is resolved.
        for _, transform_proto in list(
                proto_pipeline.components.transforms.items()):
            if transform_proto.spec.urn == common_urns.primitives.GROUP_BY_KEY.urn:
                for sub_transform in transform_proto.subtransforms:
                    del proto_pipeline.components.transforms[sub_transform]
                del transform_proto.subtransforms[:]

        # Preemptively apply combiner lifting, until all runners support it.
        # This optimization is idempotent.
        if not options.view_as(StandardOptions).streaming:
            stages = list(
                fn_api_runner_transforms.leaf_transform_stages(
                    proto_pipeline.root_transform_ids,
                    proto_pipeline.components))
            stages = fn_api_runner_transforms.lift_combiners(
                stages,
                fn_api_runner_transforms.TransformContext(
                    proto_pipeline.components))
            proto_pipeline = fn_api_runner_transforms.with_stages(
                proto_pipeline, stages)

        # TODO: Define URNs for options.
        # convert int values: https://issues.apache.org/jira/browse/BEAM-5509
        p_options = {
            'beam:option:' + k + ':v1': (str(v) if type(v) == int else v)
            for k, v in options.get_all_options().items() if v is not None
        }

        channel = GRPCChannelFactory.insecure_channel(job_endpoint)
        grpc.channel_ready_future(channel).result()
        job_service = beam_job_api_pb2_grpc.JobServiceStub(channel)

        # Sends the PrepareRequest but retries in case the channel is not ready
        def send_prepare_request(max_retries=5):
            num_retries = 0
            while True:
                try:
                    # This reports channel is READY but connections may fail
                    # Seems to be only an issue on Mac with port forwardings
                    grpc.channel_ready_future(channel).result()
                    return job_service.Prepare(
                        beam_job_api_pb2.PrepareJobRequest(
                            job_name='job',
                            pipeline=proto_pipeline,
                            pipeline_options=job_utils.dict_to_struct(
                                p_options)))
                except grpc._channel._Rendezvous as e:
                    num_retries += 1
                    if num_retries > max_retries:
                        raise e

        prepare_response = send_prepare_request()
        if prepare_response.artifact_staging_endpoint.url:
            stager = portable_stager.PortableStager(
                GRPCChannelFactory.insecure_channel(
                    prepare_response.artifact_staging_endpoint.url),
                prepare_response.staging_session_token)
            retrieval_token, _ = stager.stage_job_resources(
                options, staging_location='')
        else:
            retrieval_token = None
        run_response = job_service.Run(
            beam_job_api_pb2.RunJobRequest(
                preparation_id=prepare_response.preparation_id,
                retrieval_token=retrieval_token))
        return PipelineResult(job_service, run_response.job_id,
                              cleanup_callbacks)
Beispiel #22
0
  def __init__(
      self,
      control_address,  # type: str
      credentials=None,  # type: Optional[grpc.ChannelCredentials]
      worker_id=None,  # type: Optional[str]
      # Caching is disabled by default
      state_cache_size=0,  # type: int
      # time-based data buffering is disabled by default
      data_buffer_time_limit_ms=0,  # type: int
      profiler_factory=None,  # type: Optional[Callable[..., Profile]]
      status_address=None,  # type: Optional[str]
      # Heap dump through status api is disabled by default
      enable_heap_dump=False,  # type: bool
  ):
    # type: (...) -> None
    self._alive = True
    self._worker_index = 0
    self._worker_id = worker_id
    self._state_cache = StateCache(state_cache_size)
    options = [('grpc.max_receive_message_length', -1),
               ('grpc.max_send_message_length', -1)]
    if credentials is None:
      _LOGGER.info('Creating insecure control channel for %s.', control_address)
      self._control_channel = GRPCChannelFactory.insecure_channel(
          control_address, options=options)
    else:
      _LOGGER.info('Creating secure control channel for %s.', control_address)
      self._control_channel = GRPCChannelFactory.secure_channel(
          control_address, credentials, options=options)
    grpc.channel_ready_future(self._control_channel).result(timeout=60)
    _LOGGER.info('Control channel established.')

    self._control_channel = grpc.intercept_channel(
        self._control_channel, WorkerIdInterceptor(self._worker_id))
    self._data_channel_factory = data_plane.GrpcClientDataChannelFactory(
        credentials, self._worker_id, data_buffer_time_limit_ms)
    self._state_handler_factory = GrpcStateHandlerFactory(
        self._state_cache, credentials)
    self._profiler_factory = profiler_factory

    def default_factory(id):
      # type: (str) -> beam_fn_api_pb2.ProcessBundleDescriptor
      return self._control_stub.GetProcessBundleDescriptor(
          beam_fn_api_pb2.GetProcessBundleDescriptorRequest(
              process_bundle_descriptor_id=id))

    self._fns = KeyedDefaultDict(default_factory)
    # BundleProcessor cache across all workers.
    self._bundle_processor_cache = BundleProcessorCache(
        state_handler_factory=self._state_handler_factory,
        data_channel_factory=self._data_channel_factory,
        fns=self._fns)

    if status_address:
      try:
        self._status_handler = FnApiWorkerStatusHandler(
            status_address, self._bundle_processor_cache,
            enable_heap_dump)  # type: Optional[FnApiWorkerStatusHandler]
      except Exception:
        traceback_string = traceback.format_exc()
        _LOGGER.warning(
            'Error creating worker status request handler, '
            'skipping status report. Trace back: %s' % traceback_string)
    else:
      self._status_handler = None

    # TODO(BEAM-8998) use common
    # thread_pool_executor.shared_unbounded_instance() to process bundle
    # progress once dataflow runner's excessive progress polling is removed.
    self._report_progress_executor = futures.ThreadPoolExecutor(max_workers=1)
    self._worker_thread_pool = thread_pool_executor.shared_unbounded_instance()
    self._responses = queue.Queue(
    )  # type: queue.Queue[Union[beam_fn_api_pb2.InstructionResponse, Sentinel]]
    _LOGGER.info('Initializing SDKHarness with unbounded number of workers.')