def _process_with_component( self, selector: Selector, component: PipelineComponent, raw_job: ProcessJob): for pack in selector.select(raw_job.pack): # First, perform the component action on the pack try: if isinstance(component, Caster): # Replacing the job pack with the casted version. raw_job.alter_pack(component.cast(pack)) elif isinstance(component, BaseBatchProcessor): pack.set_control_component(component.name) component.process(pack) elif isinstance(component, Evaluator): pack.set_control_component(component.name) component.consume_next( pack, self._predict_to_gold[raw_job.id] ) elif isinstance(component, BaseProcessor): # Should be BasePackProcessor: # All other processor are considered to be # streaming processor like this. pack.set_control_component(component.name) component.process(pack) # After the component action, make sure the entry is # added into the index. pack.add_all_remaining_entries() except ValueError as e: raise ProcessExecutionException( f'Exception occurred when running ' f'{component.name}') from e
def __next__(self) -> ProcessJob: if self.__process_manager.current_queue_index == -1: if self.__data_exhausted: # Both the buffer is empty and the data input is exhausted. raise StopIteration try: job_pack = next(self.__data_iter) job = ProcessJob(job_pack, False) if len(self.__pipeline.evaluator_indices) > 0: gold_copy = job_pack.view() self.__pipeline.add_gold_packs({job.id: gold_copy}) self.__process_manager.add_to_queue(queue_index=0, job=job) self.__process_manager.current_queue_index = 0 self.__process_manager.current_processor_index = 0 return job except StopIteration: self.__data_exhausted = True job = ProcessJob(None, True) self.__process_manager.add_to_queue(queue_index=0, job=job) self.__process_manager.current_queue_index = 0 self.__process_manager.current_processor_index = 0 return job else: q_index = self.__process_manager.current_queue_index u_index = self.__process_manager.unprocessed_queue_indices[q_index] return self.__process_manager.current_queue[u_index]
def add_to_queue(self, queue_index: int, job: ProcessJob): if queue_index > len(self._queues): raise ValueError(f"Queue number {queue_index} exceeds queue " f"size {len(self._queues)}") else: # change the job status job.set_status(ProcessJobStatus.UNPROCESSED) self._queues[queue_index].append(job)
def add_to_queue(self, queue_index: int, job: ProcessJob): """ Add a job to a particular queue. Args: queue_index: The queue that the job is to be added. job: The job to be added. Returns: """ if queue_index > len(self._queues): raise ValueError(f"Queue number {queue_index} exceeds queue " f"size {len(self._queues)}") else: # When a job is added to a queue, it will be # consider as unprocessed. job.set_status(ProcessJobStatus.UNPROCESSED) self._queues[queue_index].append(job)