예제 #1
0
 def _process_with_component(
         self, selector: Selector, component: PipelineComponent,
         raw_job: ProcessJob):
     for pack in selector.select(raw_job.pack):
         # First, perform the component action on the pack
         try:
             if isinstance(component, Caster):
                 # Replacing the job pack with the casted version.
                 raw_job.alter_pack(component.cast(pack))
             elif isinstance(component, BaseBatchProcessor):
                 pack.set_control_component(component.name)
                 component.process(pack)
             elif isinstance(component, Evaluator):
                 pack.set_control_component(component.name)
                 component.consume_next(
                     pack, self._predict_to_gold[raw_job.id]
                 )
             elif isinstance(component, BaseProcessor):
                 # Should be BasePackProcessor:
                 # All other processor are considered to be
                 # streaming processor like this.
                 pack.set_control_component(component.name)
                 component.process(pack)
             # After the component action, make sure the entry is
             # added into the index.
             pack.add_all_remaining_entries()
         except ValueError as e:
             raise ProcessExecutionException(
                 f'Exception occurred when running '
                 f'{component.name}') from e
예제 #2
0
    def __next__(self) -> ProcessJob:
        if self.__process_manager.current_queue_index == -1:
            if self.__data_exhausted:
                # Both the buffer is empty and the data input is exhausted.
                raise StopIteration
            try:
                job_pack = next(self.__data_iter)
                job = ProcessJob(job_pack, False)

                if len(self.__pipeline.evaluator_indices) > 0:
                    gold_copy = job_pack.view()
                    self.__pipeline.add_gold_packs({job.id: gold_copy})

                self.__process_manager.add_to_queue(queue_index=0, job=job)
                self.__process_manager.current_queue_index = 0
                self.__process_manager.current_processor_index = 0
                return job
            except StopIteration:
                self.__data_exhausted = True
                job = ProcessJob(None, True)
                self.__process_manager.add_to_queue(queue_index=0, job=job)
                self.__process_manager.current_queue_index = 0
                self.__process_manager.current_processor_index = 0
                return job
        else:
            q_index = self.__process_manager.current_queue_index
            u_index = self.__process_manager.unprocessed_queue_indices[q_index]
            return self.__process_manager.current_queue[u_index]
예제 #3
0
 def add_to_queue(self, queue_index: int, job: ProcessJob):
     if queue_index > len(self._queues):
         raise ValueError(f"Queue number {queue_index} exceeds queue "
                          f"size {len(self._queues)}")
     else:
         # change the job status
         job.set_status(ProcessJobStatus.UNPROCESSED)
         self._queues[queue_index].append(job)
예제 #4
0
    def add_to_queue(self, queue_index: int, job: ProcessJob):
        """
        Add a job to a particular queue.

        Args:
            queue_index: The queue that the job is to be added.
            job: The job to be added.

        Returns:

        """
        if queue_index > len(self._queues):
            raise ValueError(f"Queue number {queue_index} exceeds queue "
                             f"size {len(self._queues)}")
        else:
            # When a job is added to a queue, it will be
            # consider as unprocessed.
            job.set_status(ProcessJobStatus.UNPROCESSED)
            self._queues[queue_index].append(job)