def __init__(self, input_queue: InternalQueue, output_queue: InternalQueue): super().__init__(self.__class__.__name__, queue=input_queue) self._input_queue: InternalQueue = input_queue self._output_queue: InternalQueue = output_queue self._operator: Optional[Operator] = None self._current_input_tuple: Optional[Union[Tuple, InputExhausted]] = None self._current_input_link: Optional[LinkIdentity] = None self._current_input_tuple_iter: Optional[Iterator[Union[ Tuple, InputExhausted]]] = None self._input_links: List[LinkIdentity] = list() self._input_link_map: MutableMapping[LinkIdentity, int] = dict() self.context = Context(self) self._async_rpc_server = AsyncRPCServer(output_queue, context=self.context) self._async_rpc_client = AsyncRPCClient(output_queue, context=self.context) self._print_log_handler = PrintLogHandler( lambda msg: self._async_rpc_client.send( ActorVirtualIdentity(name="CONTROLLER"), set_one_of(ControlCommandV2, PythonPrintV2(message=msg)))) logger.add(self._print_log_handler, level='PRINT', filter="operators")
def report_exception(self) -> None: """ Report the traceback of current stack when an exception occurs. """ self._print_log_handler.flush() message: str = traceback.format_exc(limit=-1) control_command = set_one_of(ControlCommandV2, LocalOperatorExceptionV2(message=message)) self._async_rpc_client.send(ActorVirtualIdentity(name="CONTROLLER"), control_command)
def test_network_receiver_can_receive_data_messages_end_of_upstream( self, data_payload, output_queue, input_queue, network_receiver_thread, network_sender_thread): network_receiver_thread.start() network_sender_thread.start() worker_id = ActorVirtualIdentity(name="test") input_queue.put(DataElement(tag=worker_id, payload=EndOfUpstream())) element: DataElement = output_queue.get() assert element.payload == EndOfUpstream() assert element.tag == worker_id
def test_network_receiver_can_receive_data_messages( self, data_payload, output_queue, input_queue, network_receiver_thread, network_sender_thread): network_receiver_thread.start() network_sender_thread.start() worker_id = ActorVirtualIdentity(name="test") input_queue.put(DataElement(tag=worker_id, payload=data_payload)) element: DataElement = output_queue.get() assert len(element.payload.frame) == len(data_payload.frame) assert element.tag == worker_id
def test_network_receiver_can_receive_control_messages( self, data_payload, output_queue, input_queue, network_receiver_thread, network_sender_thread): network_receiver_thread.start() network_sender_thread.start() worker_id = ActorVirtualIdentity(name="test") control_payload = set_one_of(ControlPayloadV2, ControlInvocationV2()) input_queue.put(ControlElement(tag=worker_id, payload=control_payload)) element: ControlElement = output_queue.get() assert element.payload == control_payload assert element.tag == worker_id
def complete(self) -> None: """ Complete the DataProcessor, marking state to COMPLETED, and notify the controller. """ # flush the buffered console prints self._print_log_handler.flush() self._operator.close() self.context.state_manager.transit_to(WorkerState.COMPLETED) control_command = set_one_of(ControlCommandV2, WorkerExecutionCompletedV2()) self._async_rpc_client.send(ActorVirtualIdentity(name="CONTROLLER"), control_command)
class BatchToTupleConverter: SOURCE_STARTER = ActorVirtualIdentity("SOURCE_STARTER") def __init__(self): self._input_map: Dict[ActorVirtualIdentity, LinkIdentity] = dict() self._upstream_map: defaultdict[ LinkIdentity, Set[ActorVirtualIdentity]] = defaultdict(set) self._current_link: Optional[LinkIdentity] = None def register_input(self, identifier: ActorVirtualIdentity, input_: LinkIdentity) -> None: self._upstream_map[input_].add(identifier) self._input_map[identifier] = input_ def process_data_payload( self, from_: ActorVirtualIdentity, payload: DataPayload ) -> Iterator[Union[Tuple, InputExhausted, Marker]]: # special case used to yield for source op if from_ == BatchToTupleConverter.SOURCE_STARTER: yield InputExhausted() yield EndOfAllMarker() return link = self._input_map[from_] if self._current_link is None or self._current_link != link: self._current_link = link yield SenderChangeMarker(link) if isinstance(payload, InputDataFrame): for field_accessor in ArrowTableTupleProvider(payload.frame): yield Tuple({ name: field_accessor for name in payload.frame.column_names }) elif isinstance(payload, EndOfUpstream): self._upstream_map[link].remove(from_) if len(self._upstream_map[link]) == 0: del self._upstream_map[link] yield InputExhausted() if len(self._upstream_map) == 0: yield EndOfAllMarker() else: raise NotImplementedError()
def mock_receiver_actor(self): return ActorVirtualIdentity("receiver")
def mock_controller(self): return ActorVirtualIdentity("CONTROLLER")
def mock_sender_actor(self): return ActorVirtualIdentity("sender")