class BeamTimeBasedOutputStream(create_OutputStream): def __init__(self): super(BeamTimeBasedOutputStream).__init__() self._flush_event = False self._periodic_flusher = PeriodicThread(1, self.notify_flush) self._periodic_flusher.daemon = True self._periodic_flusher.start() self._output_stream = None def write(self, b: bytes): self._output_stream.write(b) if self._flush_event: self._output_stream.flush() self._flush_event = False def reset_output_stream(self, output_stream: create_OutputStream): self._output_stream = output_stream def notify_flush(self): self._flush_event = True def close(self): if self._periodic_flusher: self._periodic_flusher.cancel() self._periodic_flusher = None
class BundleProcessorCache(object): """A cache for ``BundleProcessor``s. ``BundleProcessor`` objects are cached by the id of their ``beam_fn_api_pb2.ProcessBundleDescriptor``. Attributes: fns (dict): A dictionary that maps bundle descriptor IDs to instances of ``beam_fn_api_pb2.ProcessBundleDescriptor``. state_handler_factory (``StateHandlerFactory``): Used to create state handlers to be used by a ``bundle_processor.BundleProcessor`` during processing. data_channel_factory (``data_plane.DataChannelFactory``) active_bundle_processors (dict): A dictionary, indexed by instruction IDs, containing ``bundle_processor.BundleProcessor`` objects that are currently active processing the corresponding instruction. cached_bundle_processors (dict): A dictionary, indexed by bundle processor id, of cached ``bundle_processor.BundleProcessor`` that are not currently performing processing. """ def __init__( self, state_handler_factory, # type: StateHandlerFactory data_channel_factory, # type: data_plane.DataChannelFactory fns # type: Mapping[str, beam_fn_api_pb2.ProcessBundleDescriptor] ): self.fns = fns self.state_handler_factory = state_handler_factory self.data_channel_factory = data_channel_factory self.active_bundle_processors = { } # type: Dict[str, Tuple[str, bundle_processor.BundleProcessor]] self.cached_bundle_processors = collections.defaultdict( list ) # type: DefaultDict[str, List[bundle_processor.BundleProcessor]] self.last_access_times = collections.defaultdict( float) # type: DefaultDict[str, float] self._schedule_periodic_shutdown() def register(self, bundle_descriptor): # type: (beam_fn_api_pb2.ProcessBundleDescriptor) -> None """Register a ``beam_fn_api_pb2.ProcessBundleDescriptor`` by its id.""" self.fns[bundle_descriptor.id] = bundle_descriptor def get(self, instruction_id, bundle_descriptor_id): # type: (str, str) -> bundle_processor.BundleProcessor """ Return the requested ``BundleProcessor``, creating it if necessary. Moves the ``BundleProcessor`` from the inactive to the active cache. """ try: # pop() is threadsafe processor = self.cached_bundle_processors[ bundle_descriptor_id].pop() except IndexError: processor = bundle_processor.BundleProcessor( self.fns[bundle_descriptor_id], self.state_handler_factory.create_state_handler( self.fns[bundle_descriptor_id].state_api_service_descriptor ), self.data_channel_factory) self.active_bundle_processors[ instruction_id] = bundle_descriptor_id, processor return processor def lookup(self, instruction_id): # type: (str) -> Optional[bundle_processor.BundleProcessor] """ Return the requested ``BundleProcessor`` from the cache. """ return self.active_bundle_processors.get(instruction_id, (None, None))[-1] def discard(self, instruction_id): # type: (str) -> None """ Remove the ``BundleProcessor`` from the cache. """ self.active_bundle_processors[instruction_id][1].shutdown() del self.active_bundle_processors[instruction_id] def release(self, instruction_id): # type: (str) -> None """ Release the requested ``BundleProcessor``. Resets the ``BundleProcessor`` and moves it from the active to the inactive cache. """ descriptor_id, processor = self.active_bundle_processors.pop( instruction_id) processor.reset() self.last_access_times[descriptor_id] = time.time() self.cached_bundle_processors[descriptor_id].append(processor) def shutdown(self): """ Shutdown all ``BundleProcessor``s in the cache. """ if self.periodic_shutdown: self.periodic_shutdown.cancel() self.periodic_shutdown.join() self.periodic_shutdown = None for instruction_id in self.active_bundle_processors: self.active_bundle_processors[instruction_id][1].shutdown() del self.active_bundle_processors[instruction_id] for cached_bundle_processors in self.cached_bundle_processors.values(): BundleProcessorCache._shutdown_cached_bundle_processors( cached_bundle_processors) def _schedule_periodic_shutdown(self): def shutdown_inactive_bundle_processors(): for descriptor_id, last_access_time in self.last_access_times.items( ): if (time.time() - last_access_time > DEFAULT_BUNDLE_PROCESSOR_CACHE_SHUTDOWN_THRESHOLD_S): BundleProcessorCache._shutdown_cached_bundle_processors( self.cached_bundle_processors[descriptor_id]) self.periodic_shutdown = PeriodicThread( DEFAULT_BUNDLE_PROCESSOR_CACHE_SHUTDOWN_THRESHOLD_S, shutdown_inactive_bundle_processors) self.periodic_shutdown.daemon = True self.periodic_shutdown.start() @staticmethod def _shutdown_cached_bundle_processors(cached_bundle_processors): try: while True: # pop() is threadsafe bundle_processor = cached_bundle_processors.pop() bundle_processor.shutdown() except IndexError: pass
class BundleProcessorCache(object): """A cache for ``BundleProcessor``s. ``BundleProcessor`` objects are cached by the id of their ``beam_fn_api_pb2.ProcessBundleDescriptor``. Attributes: fns (dict): A dictionary that maps bundle descriptor IDs to instances of ``beam_fn_api_pb2.ProcessBundleDescriptor``. state_handler_factory (``StateHandlerFactory``): Used to create state handlers to be used by a ``bundle_processor.BundleProcessor`` during processing. data_channel_factory (``data_plane.DataChannelFactory``) active_bundle_processors (dict): A dictionary, indexed by instruction IDs, containing ``bundle_processor.BundleProcessor`` objects that are currently active processing the corresponding instruction. cached_bundle_processors (dict): A dictionary, indexed by bundle processor id, of cached ``bundle_processor.BundleProcessor`` that are not currently performing processing. """ periodic_shutdown = None # type: Optional[PeriodicThread] def __init__( self, state_handler_factory, # type: StateHandlerFactory data_channel_factory, # type: data_plane.DataChannelFactory fns # type: MutableMapping[str, beam_fn_api_pb2.ProcessBundleDescriptor] ): # type: (...) -> None self.fns = fns self.state_handler_factory = state_handler_factory self.data_channel_factory = data_channel_factory self.known_not_running_instruction_ids = collections.OrderedDict( ) # type: collections.OrderedDict[str, bool] self.failed_instruction_ids = collections.OrderedDict( ) # type: collections.OrderedDict[str, bool] self.active_bundle_processors = { } # type: Dict[str, Tuple[str, bundle_processor.BundleProcessor]] self.cached_bundle_processors = collections.defaultdict( list) # type: DefaultDict[str, List[bundle_processor.BundleProcessor]] self.last_access_times = collections.defaultdict( float) # type: DefaultDict[str, float] self._schedule_periodic_shutdown() self._lock = threading.Lock() def register(self, bundle_descriptor): # type: (beam_fn_api_pb2.ProcessBundleDescriptor) -> None """Register a ``beam_fn_api_pb2.ProcessBundleDescriptor`` by its id.""" self.fns[bundle_descriptor.id] = bundle_descriptor def activate(self, instruction_id): # type: (str) -> None """Makes the ``instruction_id`` known to the bundle processor. Allows ``lookup`` to return ``None``. Necessary if ``lookup`` can occur before ``get``. """ with self._lock: self.known_not_running_instruction_ids[instruction_id] = True def get(self, instruction_id, bundle_descriptor_id): # type: (str, str) -> bundle_processor.BundleProcessor """ Return the requested ``BundleProcessor``, creating it if necessary. Moves the ``BundleProcessor`` from the inactive to the active cache. """ with self._lock: try: # pop() is threadsafe processor = self.cached_bundle_processors[bundle_descriptor_id].pop() self.active_bundle_processors[ instruction_id] = bundle_descriptor_id, processor try: del self.known_not_running_instruction_ids[instruction_id] except KeyError: # The instruction may have not been pre-registered before execution # since activate() may have never been invoked pass return processor except IndexError: pass # Make sure we instantiate the processor while not holding the lock. processor = bundle_processor.BundleProcessor( self.fns[bundle_descriptor_id], self.state_handler_factory.create_state_handler( self.fns[bundle_descriptor_id].state_api_service_descriptor), self.data_channel_factory) with self._lock: self.active_bundle_processors[ instruction_id] = bundle_descriptor_id, processor try: del self.known_not_running_instruction_ids[instruction_id] except KeyError: # The instruction may have not been pre-registered before execution # since activate() may have never been invoked pass return processor def lookup(self, instruction_id): # type: (str) -> Optional[bundle_processor.BundleProcessor] """ Return the requested ``BundleProcessor`` from the cache. Will return ``None`` if the BundleProcessor is known but not yet ready. Will raise an error if the ``instruction_id`` is not known or has been discarded. """ with self._lock: if instruction_id in self.failed_instruction_ids: raise RuntimeError( 'Bundle processing associated with %s has failed. ' 'Check prior failing response for details.' % instruction_id) processor = self.active_bundle_processors.get( instruction_id, (None, None))[-1] if processor: return processor if instruction_id in self.known_not_running_instruction_ids: return None raise RuntimeError('Unknown process bundle id %s.' % instruction_id) def discard(self, instruction_id): # type: (str) -> None """ Marks the instruction id as failed shutting down the ``BundleProcessor``. """ with self._lock: self.failed_instruction_ids[instruction_id] = True while len(self.failed_instruction_ids) > MAX_FAILED_INSTRUCTIONS: self.failed_instruction_ids.popitem(last=False) processor = self.active_bundle_processors[instruction_id][1] del self.active_bundle_processors[instruction_id] # Perform the shutdown while not holding the lock. processor.shutdown() def release(self, instruction_id): # type: (str) -> None """ Release the requested ``BundleProcessor``. Resets the ``BundleProcessor`` and moves it from the active to the inactive cache. """ with self._lock: self.known_not_running_instruction_ids[instruction_id] = True while len(self.known_not_running_instruction_ids ) > MAX_KNOWN_NOT_RUNNING_INSTRUCTIONS: self.known_not_running_instruction_ids.popitem(last=False) descriptor_id, processor = ( self.active_bundle_processors.pop(instruction_id)) # Make sure that we reset the processor while not holding the lock. processor.reset() with self._lock: self.last_access_times[descriptor_id] = time.time() self.cached_bundle_processors[descriptor_id].append(processor) def shutdown(self): # type: () -> None """ Shutdown all ``BundleProcessor``s in the cache. """ if self.periodic_shutdown: self.periodic_shutdown.cancel() self.periodic_shutdown.join() self.periodic_shutdown = None for instruction_id in list(self.active_bundle_processors.keys()): self.discard(instruction_id) for cached_bundle_processors in self.cached_bundle_processors.values(): BundleProcessorCache._shutdown_cached_bundle_processors( cached_bundle_processors) def _schedule_periodic_shutdown(self): # type: () -> None def shutdown_inactive_bundle_processors(): # type: () -> None for descriptor_id, last_access_time in self.last_access_times.items(): if (time.time() - last_access_time > DEFAULT_BUNDLE_PROCESSOR_CACHE_SHUTDOWN_THRESHOLD_S): BundleProcessorCache._shutdown_cached_bundle_processors( self.cached_bundle_processors[descriptor_id]) self.periodic_shutdown = PeriodicThread( DEFAULT_BUNDLE_PROCESSOR_CACHE_SHUTDOWN_THRESHOLD_S, shutdown_inactive_bundle_processors) self.periodic_shutdown.daemon = True self.periodic_shutdown.start() @staticmethod def _shutdown_cached_bundle_processors(cached_bundle_processors): # type: (List[bundle_processor.BundleProcessor]) -> None try: while True: # pop() is threadsafe bundle_processor = cached_bundle_processors.pop() bundle_processor.shutdown() except IndexError: pass