예제 #1
0
    def initialize(self, resources: Resources, configs: Config):
        # Populate the _request. The self._request_ready help avoid parsing
        # the feature scheme multiple times during `initialize`.
        if not self._request_ready:
            for key, value in configs.items():
                if key == "feature_scheme":
                    self._request["schemes"] = parse_feature_extractors(
                        configs.feature_scheme)
                else:
                    self._request[key] = value
            self._request_ready = True

        batcher_config = configs.batcher
        # Assign context type from here to make sure batcher is using the
        # same context type as predictor.
        batcher_context = configs["batcher"].get("context_type", None)
        if (batcher_context is None
                or batcher_context == self._request["context_type"]):
            batcher_config.context_type = self._request["context_type"]
        else:
            raise ProcessorConfigError(
                "The 'context_type' configuration value should be the same "
                "for the processor and the batcher, now for the processor the "
                f"value is {self._request['context_type']} and for the "
                f"batcher the value is {batcher_context}. It is also fine if "
                f"this value for batch config is left empty.")
        self.do_eval = configs.do_eval

        # This needs to be called later since batcher config needs to be loaded.
        super().initialize(resources, configs)
        for tag, scheme in self._request["schemes"].items():
            # Add input feature to the batcher.
            if scheme["type"] == extractor_utils.DATA_INPUT:
                self.batcher.add_feature_scheme(tag, scheme)  # type: ignore
예제 #2
0
def parse_feature_extractors(scheme_configs: Config) -> Dict[str, Any]:
    feature_requests: Dict[str, Any] = {}

    for tag, scheme_config in scheme_configs.items():
        assert (
            "extractor" in scheme_config
        ), "Field not found for data request scheme: `extractor`"
        assert (
            "type" in scheme_config
        ), "Field not found for data request scheme: `type`"
        assert scheme_config["type"] in [
            "data_input",
            "data_output",
        ], "Type field must be either data_input or data_output."

        feature_requests[tag] = {}

        if scheme_config["type"] == "data_input":
            feature_requests[tag]["type"] = DATA_INPUT
        elif scheme_config["type"] == "data_output":
            feature_requests[tag]["type"] = DATA_OUTPUT

        extractor_class = get_class(scheme_config["extractor"]["class_name"])
        extractor: BaseExtractor = extractor_class()
        if not isinstance(extractor, BaseExtractor):
            raise RuntimeError(
                "Invalid extractor: ", scheme_config["extractor"]
            )

        extractor.initialize(config=scheme_config["extractor"]["config"])

        # Load vocab from disk if provided.
        if "vocab_path" in scheme_config["extractor"]:
            with open(
                scheme_config["extractor"]["vocab_path"], "rb"
            ) as vocab_file:
                extractor.vocab = pickle.load(vocab_file)

        feature_requests[tag]["extractor"] = extractor

        if "converter" not in scheme_config:
            # Create default converter if there is no given converter
            feature_requests[tag]["converter"] = Converter({})
        else:
            converter_class = get_class(
                scheme_config["converter"]["class_name"]
            )
            converter: Converter = converter_class()
            if not isinstance(converter, Converter):
                raise RuntimeError(
                    "Invalid converter: ", scheme_config["converter"]
                )
            feature_requests[tag]["converter"] = converter

    return feature_requests