Ejemplo n.º 1
0
    def add_processor(self,
                      processor: BaseProcessor,
                      config: Optional[HParams] = None,
                      selector: Optional[Selector] = None):
        self._processors_index[processor.component_name] = len(self.processors)

        self._processors.append(processor)
        self.processor_configs.append(config)

        if selector is None:
            self._selectors.append(DummySelector())
        else:
            self._selectors.append(selector)
Ejemplo n.º 2
0
    def add_processor(self, processor: BaseProcessor,
                      config: Optional[Union[HParams, Dict[str, Any]]] = None,
                      selector: Optional[Selector] = None):
        self._processors_index[processor.component_name] = len(self.processors)

        self._processors.append(processor)

        if config is None:
            config = processor.default_configs()
        config = HParams(config, processor.default_configs())

        self.processor_configs.append(config)

        if selector is None:
            self._selectors.append(DummySelector())
        else:
            self._selectors.append(selector)
Ejemplo n.º 3
0
    def add(self, component: PipelineComponent,
            config: Optional[Union[Config, Dict[str, Any]]] = None,
            selector: Optional[Selector] = None):
        self._processors_index[component.name] = len(self.components)

        if isinstance(component, BaseReader):
            raise ProcessFlowException("Reader need to be set via set_reader()")

        if isinstance(component, Evaluator):
            # This will ask the job to keep a copy of the gold standard.
            self.evaluator_indices.append(len(self.components))

        component.assign_manager(self._proc_mgr, self._pack_manager)
        self._components.append(component)
        self.processor_configs.append(component.make_configs(config))

        if selector is None:
            self._selectors.append(DummySelector())
        else:
            self._selectors.append(selector)
Ejemplo n.º 4
0
 def __init__(self):
     super().__init__()
     self.selector = DummySelector()
Ejemplo n.º 5
0
    def __init__(
        self,
        resource: Optional[Resources] = None,
        ontology_file: Optional[str] = None,
        enforce_consistency: bool = False,
        do_init_type_check: bool = False,
    ):
        r"""

        Args:
            resource: The ``Resources`` object, which is a global registry used
                in the pipeline. Objects defined as ``Resources`` will be
                passed on to the processors in the
                pipeline for initialization.
            ontology_file: The path to the input ontology specification file,
                which should be a json file, and it should have all the entries
                inside with no import as key.
            enforce_consistency: This boolean determines whether the
                pipeline will check the content expectations specified in each
                pipeline component. Each component will check whether the input
                pack contains the expected data
                via checking the meta-data, and throws a
                :class:`~forte.common.exception.ExpectedEntryNotFound` if it
                fails. When this function is called with enforce is ``True``,
                all the pipeline components would check if the input datapack
                record matches
                with the expected types and attributes if function
                ``expected_types_and_attributes`` is implemented
                for the processor. For example, processor A requires entry type
                of ``ft.onto.base_ontology.Sentence``, and processor B would
                produce this type in the output datapack, so ``record`` function
                of processor B writes the record of this type in the datapack
                and processor A implements ``expected_types_and_attributes`` to
                add this type. Then when the pipeline runs with
                `enforce_consistency=True`, processor A would check if this
                type exists in the record of the output of the
                previous pipeline component.
            do_init_type_check: Determine whether to check records types and
                attributes during pipeline initialization. Default to `False`.
                If this boolean is set to `True`, each component in the
                pipeline will be validated by comparing its
                ``expected_types_and_attributes`` with the accumulated
                ``records`` from all the downstream components.
        """
        self._reader: BaseReader
        self._reader_config: Optional[Config] = None

        # These variables defines the units in the pipeline, they should be
        # of the same length
        self._components: List[PipelineComponent] = []
        self._selectors: List[Selector] = []
        self._configs: List[Optional[Config]] = []

        # Maintain a set of the pipeline components to fast check whether
        # the component is already there.
        self.__component_set: Set[PipelineComponent] = set()

        # Will initialize at `initialize` because the processors length is
        # unknown.
        self._proc_mgr: ProcessManager = None  # type: ignore

        self.evaluator_indices: List[int] = []

        # needed for evaluator
        self._predict_to_gold: Dict[int, PackType] = {}

        if resource is None:
            self.resource = Resources()
        else:
            self.resource = resource

        if ontology_file is None:
            with resources.path("forte.ontology_specs",
                                "base_ontology.json") as data_path:
                ontology_file = str(data_path)

        if ontology_file is not None:
            with open(ontology_file, "r") as f:
                spec_dict = json.load(f)
                self.resource.update(onto_specs_path=ontology_file)
                self.resource.update(onto_specs_dict=spec_dict)

        # The flag indicating whether this pipeline is initialized.
        self._initialized: bool = False
        # The flag indicating whether we want to enforce type consistency
        #  between the processors.
        self._check_type_consistency: bool = False

        # Create one copy of the dummy selector to reduce class creation.
        self.__default_selector: Selector = DummySelector()

        # needed for time profiling of pipeline
        self._enable_profiling: bool = False
        self._profiler: List[float] = []

        self._check_type_consistency = enforce_consistency

        # Indicate whether do type checking during pipeline initialization
        self._do_init_type_check: bool = do_init_type_check
Ejemplo n.º 6
0
 def __init__(self):
     self.component_name = get_full_module_name(self)
     self.selector = DummySelector()