Ejemplo n.º 1
0
class BeamTimeBasedOutputStream(create_OutputStream):
    def __init__(self):
        super(BeamTimeBasedOutputStream).__init__()
        self._flush_event = False
        self._periodic_flusher = PeriodicThread(1, self.notify_flush)
        self._periodic_flusher.daemon = True
        self._periodic_flusher.start()
        self._output_stream = None

    def write(self, b: bytes):
        self._output_stream.write(b)
        if self._flush_event:
            self._output_stream.flush()
            self._flush_event = False

    def reset_output_stream(self, output_stream: create_OutputStream):
        self._output_stream = output_stream

    def notify_flush(self):
        self._flush_event = True

    def close(self):
        if self._periodic_flusher:
            self._periodic_flusher.cancel()
            self._periodic_flusher = None
Ejemplo n.º 2
0
class BundleProcessorCache(object):
    """A cache for ``BundleProcessor``s.

  ``BundleProcessor`` objects are cached by the id of their
  ``beam_fn_api_pb2.ProcessBundleDescriptor``.

  Attributes:
    fns (dict): A dictionary that maps bundle descriptor IDs to instances of
      ``beam_fn_api_pb2.ProcessBundleDescriptor``.
    state_handler_factory (``StateHandlerFactory``): Used to create state
      handlers to be used by a ``bundle_processor.BundleProcessor`` during
      processing.
    data_channel_factory (``data_plane.DataChannelFactory``)
    active_bundle_processors (dict): A dictionary, indexed by instruction IDs,
      containing ``bundle_processor.BundleProcessor`` objects that are currently
      active processing the corresponding instruction.
    cached_bundle_processors (dict): A dictionary, indexed by bundle processor
      id, of cached ``bundle_processor.BundleProcessor`` that are not currently
      performing processing.
  """
    def __init__(
        self,
        state_handler_factory,  # type: StateHandlerFactory
        data_channel_factory,  # type: data_plane.DataChannelFactory
        fns  # type: Mapping[str, beam_fn_api_pb2.ProcessBundleDescriptor]
    ):
        self.fns = fns
        self.state_handler_factory = state_handler_factory
        self.data_channel_factory = data_channel_factory
        self.active_bundle_processors = {
        }  # type: Dict[str, Tuple[str, bundle_processor.BundleProcessor]]
        self.cached_bundle_processors = collections.defaultdict(
            list
        )  # type: DefaultDict[str, List[bundle_processor.BundleProcessor]]
        self.last_access_times = collections.defaultdict(
            float)  # type: DefaultDict[str, float]
        self._schedule_periodic_shutdown()

    def register(self, bundle_descriptor):
        # type: (beam_fn_api_pb2.ProcessBundleDescriptor) -> None
        """Register a ``beam_fn_api_pb2.ProcessBundleDescriptor`` by its id."""
        self.fns[bundle_descriptor.id] = bundle_descriptor

    def get(self, instruction_id, bundle_descriptor_id):
        # type: (str, str) -> bundle_processor.BundleProcessor
        """
    Return the requested ``BundleProcessor``, creating it if necessary.

    Moves the ``BundleProcessor`` from the inactive to the active cache.
    """
        try:
            # pop() is threadsafe
            processor = self.cached_bundle_processors[
                bundle_descriptor_id].pop()
        except IndexError:
            processor = bundle_processor.BundleProcessor(
                self.fns[bundle_descriptor_id],
                self.state_handler_factory.create_state_handler(
                    self.fns[bundle_descriptor_id].state_api_service_descriptor
                ), self.data_channel_factory)
        self.active_bundle_processors[
            instruction_id] = bundle_descriptor_id, processor
        return processor

    def lookup(self, instruction_id):
        # type: (str) -> Optional[bundle_processor.BundleProcessor]
        """
    Return the requested ``BundleProcessor`` from the cache.
    """
        return self.active_bundle_processors.get(instruction_id,
                                                 (None, None))[-1]

    def discard(self, instruction_id):
        # type: (str) -> None
        """
    Remove the ``BundleProcessor`` from the cache.
    """
        self.active_bundle_processors[instruction_id][1].shutdown()
        del self.active_bundle_processors[instruction_id]

    def release(self, instruction_id):
        # type: (str) -> None
        """
    Release the requested ``BundleProcessor``.

    Resets the ``BundleProcessor`` and moves it from the active to the
    inactive cache.
    """
        descriptor_id, processor = self.active_bundle_processors.pop(
            instruction_id)
        processor.reset()
        self.last_access_times[descriptor_id] = time.time()
        self.cached_bundle_processors[descriptor_id].append(processor)

    def shutdown(self):
        """
    Shutdown all ``BundleProcessor``s in the cache.
    """
        if self.periodic_shutdown:
            self.periodic_shutdown.cancel()
            self.periodic_shutdown.join()
            self.periodic_shutdown = None

        for instruction_id in self.active_bundle_processors:
            self.active_bundle_processors[instruction_id][1].shutdown()
            del self.active_bundle_processors[instruction_id]
        for cached_bundle_processors in self.cached_bundle_processors.values():
            BundleProcessorCache._shutdown_cached_bundle_processors(
                cached_bundle_processors)

    def _schedule_periodic_shutdown(self):
        def shutdown_inactive_bundle_processors():
            for descriptor_id, last_access_time in self.last_access_times.items(
            ):
                if (time.time() - last_access_time >
                        DEFAULT_BUNDLE_PROCESSOR_CACHE_SHUTDOWN_THRESHOLD_S):
                    BundleProcessorCache._shutdown_cached_bundle_processors(
                        self.cached_bundle_processors[descriptor_id])

        self.periodic_shutdown = PeriodicThread(
            DEFAULT_BUNDLE_PROCESSOR_CACHE_SHUTDOWN_THRESHOLD_S,
            shutdown_inactive_bundle_processors)
        self.periodic_shutdown.daemon = True
        self.periodic_shutdown.start()

    @staticmethod
    def _shutdown_cached_bundle_processors(cached_bundle_processors):
        try:
            while True:
                # pop() is threadsafe
                bundle_processor = cached_bundle_processors.pop()
                bundle_processor.shutdown()
        except IndexError:
            pass
Ejemplo n.º 3
0
class BundleProcessorCache(object):
  """A cache for ``BundleProcessor``s.

  ``BundleProcessor`` objects are cached by the id of their
  ``beam_fn_api_pb2.ProcessBundleDescriptor``.

  Attributes:
    fns (dict): A dictionary that maps bundle descriptor IDs to instances of
      ``beam_fn_api_pb2.ProcessBundleDescriptor``.
    state_handler_factory (``StateHandlerFactory``): Used to create state
      handlers to be used by a ``bundle_processor.BundleProcessor`` during
      processing.
    data_channel_factory (``data_plane.DataChannelFactory``)
    active_bundle_processors (dict): A dictionary, indexed by instruction IDs,
      containing ``bundle_processor.BundleProcessor`` objects that are currently
      active processing the corresponding instruction.
    cached_bundle_processors (dict): A dictionary, indexed by bundle processor
      id, of cached ``bundle_processor.BundleProcessor`` that are not currently
      performing processing.
  """
  periodic_shutdown = None  # type: Optional[PeriodicThread]

  def __init__(
      self,
      state_handler_factory,  # type: StateHandlerFactory
      data_channel_factory,  # type: data_plane.DataChannelFactory
      fns  # type: MutableMapping[str, beam_fn_api_pb2.ProcessBundleDescriptor]
  ):
    # type: (...) -> None
    self.fns = fns
    self.state_handler_factory = state_handler_factory
    self.data_channel_factory = data_channel_factory
    self.known_not_running_instruction_ids = collections.OrderedDict(
    )  # type: collections.OrderedDict[str, bool]
    self.failed_instruction_ids = collections.OrderedDict(
    )  # type: collections.OrderedDict[str, bool]
    self.active_bundle_processors = {
    }  # type: Dict[str, Tuple[str, bundle_processor.BundleProcessor]]
    self.cached_bundle_processors = collections.defaultdict(
        list)  # type: DefaultDict[str, List[bundle_processor.BundleProcessor]]
    self.last_access_times = collections.defaultdict(
        float)  # type: DefaultDict[str, float]
    self._schedule_periodic_shutdown()
    self._lock = threading.Lock()

  def register(self, bundle_descriptor):
    # type: (beam_fn_api_pb2.ProcessBundleDescriptor) -> None

    """Register a ``beam_fn_api_pb2.ProcessBundleDescriptor`` by its id."""
    self.fns[bundle_descriptor.id] = bundle_descriptor

  def activate(self, instruction_id):
    # type: (str) -> None

    """Makes the ``instruction_id`` known to the bundle processor.

    Allows ``lookup`` to return ``None``. Necessary if ``lookup`` can occur
    before ``get``.
    """
    with self._lock:
      self.known_not_running_instruction_ids[instruction_id] = True

  def get(self, instruction_id, bundle_descriptor_id):
    # type: (str, str) -> bundle_processor.BundleProcessor

    """
    Return the requested ``BundleProcessor``, creating it if necessary.

    Moves the ``BundleProcessor`` from the inactive to the active cache.
    """
    with self._lock:
      try:
        # pop() is threadsafe
        processor = self.cached_bundle_processors[bundle_descriptor_id].pop()
        self.active_bundle_processors[
          instruction_id] = bundle_descriptor_id, processor
        try:
          del self.known_not_running_instruction_ids[instruction_id]
        except KeyError:
          # The instruction may have not been pre-registered before execution
          # since activate() may have never been invoked
          pass
        return processor
      except IndexError:
        pass

    # Make sure we instantiate the processor while not holding the lock.
    processor = bundle_processor.BundleProcessor(
        self.fns[bundle_descriptor_id],
        self.state_handler_factory.create_state_handler(
            self.fns[bundle_descriptor_id].state_api_service_descriptor),
        self.data_channel_factory)
    with self._lock:
      self.active_bundle_processors[
        instruction_id] = bundle_descriptor_id, processor
      try:
        del self.known_not_running_instruction_ids[instruction_id]
      except KeyError:
        # The instruction may have not been pre-registered before execution
        # since activate() may have never been invoked
        pass
    return processor

  def lookup(self, instruction_id):
    # type: (str) -> Optional[bundle_processor.BundleProcessor]

    """
    Return the requested ``BundleProcessor`` from the cache.

    Will return ``None`` if the BundleProcessor is known but not yet ready. Will
    raise an error if the ``instruction_id`` is not known or has been discarded.
    """
    with self._lock:
      if instruction_id in self.failed_instruction_ids:
        raise RuntimeError(
            'Bundle processing associated with %s has failed. '
            'Check prior failing response for details.' % instruction_id)
      processor = self.active_bundle_processors.get(
          instruction_id, (None, None))[-1]
      if processor:
        return processor
      if instruction_id in self.known_not_running_instruction_ids:
        return None
      raise RuntimeError('Unknown process bundle id %s.' % instruction_id)

  def discard(self, instruction_id):
    # type: (str) -> None

    """
    Marks the instruction id as failed shutting down the ``BundleProcessor``.
    """
    with self._lock:
      self.failed_instruction_ids[instruction_id] = True
      while len(self.failed_instruction_ids) > MAX_FAILED_INSTRUCTIONS:
        self.failed_instruction_ids.popitem(last=False)
      processor = self.active_bundle_processors[instruction_id][1]
      del self.active_bundle_processors[instruction_id]

    # Perform the shutdown while not holding the lock.
    processor.shutdown()

  def release(self, instruction_id):
    # type: (str) -> None

    """
    Release the requested ``BundleProcessor``.

    Resets the ``BundleProcessor`` and moves it from the active to the
    inactive cache.
    """
    with self._lock:
      self.known_not_running_instruction_ids[instruction_id] = True
      while len(self.known_not_running_instruction_ids
                ) > MAX_KNOWN_NOT_RUNNING_INSTRUCTIONS:
        self.known_not_running_instruction_ids.popitem(last=False)
      descriptor_id, processor = (
          self.active_bundle_processors.pop(instruction_id))

    # Make sure that we reset the processor while not holding the lock.
    processor.reset()
    with self._lock:
      self.last_access_times[descriptor_id] = time.time()
      self.cached_bundle_processors[descriptor_id].append(processor)

  def shutdown(self):
    # type: () -> None

    """
    Shutdown all ``BundleProcessor``s in the cache.
    """
    if self.periodic_shutdown:
      self.periodic_shutdown.cancel()
      self.periodic_shutdown.join()
      self.periodic_shutdown = None

    for instruction_id in list(self.active_bundle_processors.keys()):
      self.discard(instruction_id)
    for cached_bundle_processors in self.cached_bundle_processors.values():
      BundleProcessorCache._shutdown_cached_bundle_processors(
          cached_bundle_processors)

  def _schedule_periodic_shutdown(self):
    # type: () -> None
    def shutdown_inactive_bundle_processors():
      # type: () -> None
      for descriptor_id, last_access_time in self.last_access_times.items():
        if (time.time() - last_access_time >
            DEFAULT_BUNDLE_PROCESSOR_CACHE_SHUTDOWN_THRESHOLD_S):
          BundleProcessorCache._shutdown_cached_bundle_processors(
              self.cached_bundle_processors[descriptor_id])

    self.periodic_shutdown = PeriodicThread(
        DEFAULT_BUNDLE_PROCESSOR_CACHE_SHUTDOWN_THRESHOLD_S,
        shutdown_inactive_bundle_processors)
    self.periodic_shutdown.daemon = True
    self.periodic_shutdown.start()

  @staticmethod
  def _shutdown_cached_bundle_processors(cached_bundle_processors):
    # type: (List[bundle_processor.BundleProcessor]) -> None
    try:
      while True:
        # pop() is threadsafe
        bundle_processor = cached_bundle_processors.pop()
        bundle_processor.shutdown()
    except IndexError:
      pass