def _process_with_component( self, selector: Selector, component: PipelineComponent, raw_job: ProcessJob): for pack in selector.select(raw_job.pack): # First, perform the component action on the pack try: if isinstance(component, Caster): # Replacing the job pack with the casted version. raw_job.alter_pack(component.cast(pack)) elif isinstance(component, BaseBatchProcessor): pack.set_control_component(component.name) component.process(pack) elif isinstance(component, Evaluator): pack.set_control_component(component.name) component.consume_next( pack, self._predict_to_gold[raw_job.id] ) elif isinstance(component, BaseProcessor): # Should be BasePackProcessor: # All other processor are considered to be # streaming processor like this. pack.set_control_component(component.name) component.process(pack) # After the component action, make sure the entry is # added into the index. pack.add_all_remaining_entries() except ValueError as e: raise ProcessExecutionException( f'Exception occurred when running ' f'{component.name}') from e
def add(self, component: PipelineComponent, config: Optional[Union[Config, Dict[str, Any]]] = None, selector: Optional[Selector] = None): self._processors_index[component.name] = len(self.components) if isinstance(component, BaseReader): raise ProcessFlowException("Reader need to be set via set_reader()") if isinstance(component, Evaluator): # This will ask the job to keep a copy of the gold standard. self.evaluator_indices.append(len(self.components)) component.assign_manager(self._proc_mgr, self._pack_manager) self._components.append(component) self.processor_configs.append(component.make_configs(config)) if selector is None: self._selectors.append(DummySelector()) else: self._selectors.append(selector)
def add( self, component: PipelineComponent, config: Optional[Union[Config, Dict[str, Any]]] = None, selector: Optional[Selector] = None, ) -> "Pipeline": """ Adds a pipeline component to the pipeline. The pipeline components will form a chain based on the insertion order. The customized `config` and `selector` (:class:`~forte.data.selector.Selector`) will be associated with this particular component. If the `config` or the `selector` is not provided, the default ones will be used. Here, note that the same component instance can be added multiple times to the pipeline. In such cases, the instance will only be setup at the first insertion (i.e. its `initialize` function will only be called once). The subsequent insertion of the same component instance will not change the behavior nor the states of the instance. Thus, a different `config` cannot be provided (should be `None`) when added the second time, otherwise a `ProcessorConfigError` will be thrown. In the case where one want to them to behave differently, a different instance should be used. Args: component (PipelineComponent): The component to be inserted next to the pipeline. config (Union[Config, Dict[str, Any]): The custom configuration to be used for the added component. Default None, which means the `default_configs()` of the component will be used. selector (Selector): The selector used to pick the corresponding data pack to be consumed by the component. Default None, which means the whole pack will be used. Returns: The pipeline itself, which enables one to chain the creation of the pipeline, i.e., you can do: .. code-block:: python Pipeline().set_reader(your_reader()).add( your_processor()).add(anther_processor()) """ if isinstance(component, BaseReader): raise ProcessFlowException( "Reader need to be set via set_reader()") if isinstance(component, Evaluator): # This will ask the job to keep a copy of the gold standard. self.evaluator_indices.append(len(self.components)) if component not in self.__component_set: # The case where the component is not found. self._components.append(component) self.__component_set.add(component) self.component_configs.append(component.make_configs(config)) else: if config is None: self._components.append(component) # We insert a `None` value here just to make the config list # to match the component list, but this config should not be # used. self.component_configs.append(None) else: raise ProcessorConfigError( f"The same instance of a component named {component.name} " f" has already been added to" f" the pipeline, we do not accept a different configuration" f" for it. If you would like to use a differently" f" configured component, please create another instance." f" If you intend to re-use the component instance, please" f" do not provide the `config` (or provide a `None`).") if selector is None: self._selectors.append(self.__default_selector) else: self._selectors.append(selector) return self