def initialize(self, resources: Resources, configs: Config): # Populate the _request. The self._request_ready help avoid parsing # the feature scheme multiple times during `initialize`. if not self._request_ready: for key, value in configs.items(): if key == "feature_scheme": self._request["schemes"] = parse_feature_extractors( configs.feature_scheme) else: self._request[key] = value self._request_ready = True batcher_config = configs.batcher # Assign context type from here to make sure batcher is using the # same context type as predictor. batcher_context = configs["batcher"].get("context_type", None) if (batcher_context is None or batcher_context == self._request["context_type"]): batcher_config.context_type = self._request["context_type"] else: raise ProcessorConfigError( "The 'context_type' configuration value should be the same " "for the processor and the batcher, now for the processor the " f"value is {self._request['context_type']} and for the " f"batcher the value is {batcher_context}. It is also fine if " f"this value for batch config is left empty.") self.do_eval = configs.do_eval # This needs to be called later since batcher config needs to be loaded. super().initialize(resources, configs) for tag, scheme in self._request["schemes"].items(): # Add input feature to the batcher. if scheme["type"] == extractor_utils.DATA_INPUT: self.batcher.add_feature_scheme(tag, scheme) # type: ignore
def parse_feature_extractors(scheme_configs: Config) -> Dict[str, Any]: feature_requests: Dict[str, Any] = {} for tag, scheme_config in scheme_configs.items(): assert ( "extractor" in scheme_config ), "Field not found for data request scheme: `extractor`" assert ( "type" in scheme_config ), "Field not found for data request scheme: `type`" assert scheme_config["type"] in [ "data_input", "data_output", ], "Type field must be either data_input or data_output." feature_requests[tag] = {} if scheme_config["type"] == "data_input": feature_requests[tag]["type"] = DATA_INPUT elif scheme_config["type"] == "data_output": feature_requests[tag]["type"] = DATA_OUTPUT extractor_class = get_class(scheme_config["extractor"]["class_name"]) extractor: BaseExtractor = extractor_class() if not isinstance(extractor, BaseExtractor): raise RuntimeError( "Invalid extractor: ", scheme_config["extractor"] ) extractor.initialize(config=scheme_config["extractor"]["config"]) # Load vocab from disk if provided. if "vocab_path" in scheme_config["extractor"]: with open( scheme_config["extractor"]["vocab_path"], "rb" ) as vocab_file: extractor.vocab = pickle.load(vocab_file) feature_requests[tag]["extractor"] = extractor if "converter" not in scheme_config: # Create default converter if there is no given converter feature_requests[tag]["converter"] = Converter({}) else: converter_class = get_class( scheme_config["converter"]["class_name"] ) converter: Converter = converter_class() if not isinstance(converter, Converter): raise RuntimeError( "Invalid converter: ", scheme_config["converter"] ) feature_requests[tag]["converter"] = converter return feature_requests