def process_nlp_text_immediate(text: str, processor: ServerProcessor, username: str = "", password: str = "") -> JsonObjectType: """ Function to send text immediately to the relevant processor. Args: text: text to run the NLP over processor: NLP processor; a class:`crate_anon.nlp_webserver.procs.Processor` username: username in use password: plaintext password Returns: a :class:`NlpServerResult` """ if processor.proctype == PROCTYPE_GATE: return process_nlp_gate(text, processor, username, password) else: if not processor.parser: processor.set_parser() return process_nlp_internal(text=text, processor=processor)
def __init__(self, nlprp_request: JsonObjectType) -> None: """ Args: nlprp_request: dictionary from the (entire) JSON NLPRP request Raises: :exc:`NlprpError` for malformed requests """ self.nlprp_request = nlprp_request args = json_get_toplevel_args(nlprp_request) # The processors being requested. We fetch all of them now, so they # can be iterated through fast for each document. requested_processors = json_get_array(args, NKeys.PROCESSORS, required=True) self.processors = [ ServerProcessor.get_processor_nlprp(d) for d in requested_processors ] # Queue? self.queue = json_get_bool(args, NKeys.QUEUE, default=False) # Client job ID self.client_job_id = json_get_str(args, NKeys.CLIENT_JOB_ID, default="") # Include the source text in the reply? self.include_text = json_get_bool(args, NKeys.INCLUDE_TEXT) # Content: list of objects (each with text and metadata) self.content = json_get_array(args, NKeys.CONTENT, required=True)
def get_processor_cached(_processor_id: str) -> ServerProcessor: """ Cache lookups for speed. (All documents will share the same set of processors, so there'll be a fair bit of duplication.) """ nonlocal processor_cache try: return processor_cache[_processor_id] except KeyError: _processor = ServerProcessor.get_processor_from_id( _processor_id) # may raise # noqa processor_cache[_processor_id] = _processor return _processor
def get_remote_processors(self) -> List[ServerProcessor]: """ Returns the list of available processors from the remote. If that list has not already been fetched, or unless it was pre-specified upon construction, fetch it from the server. """ # Make request list_procs_request = make_nlprp_dict() list_procs_request[NKeys.COMMAND] = NlprpCommands.LIST_PROCESSORS request_json = to_json_str(list_procs_request) # Send request, get response json_response = self._post_get_json(request_json, may_fail=False) status = json_get_int(json_response, NKeys.STATUS) if not HttpStatus.is_good_answer(status): errors = json_get_array(json_response, NKeys.ERRORS) for err in errors: log.error(f"Error received: {err!r}") raise HTTPError(f"Response status was: {status}") processors = [] # type: List[ServerProcessor] proclist = json_response[NKeys.PROCESSORS] # type: JsonArrayType for procinfo in proclist: proc = ServerProcessor( # Mandatory: name=procinfo[NKeys.NAME], title=procinfo[NKeys.TITLE], version=procinfo[NKeys.VERSION], is_default_version=procinfo.get( NKeys.IS_DEFAULT_VERSION, True), description=procinfo[NKeys.DESCRIPTION], # Optional: schema_type=procinfo.get(NKeys.SCHEMA_TYPE, NlprpValues.UNKNOWN), sql_dialect=procinfo.get(NKeys.SQL_DIALECT, ""), tabular_schema=procinfo.get(NKeys.TABULAR_SCHEMA) ) processors.append(proc) return processors
from crate_anon.nlp_webserver.constants import ( KEY_PROCTYPE, NlpServerConfigKeys, ) from crate_anon.nlp_webserver.server_processor import ServerProcessor from crate_anon.nlp_webserver.settings import SETTINGS log = logging.getLogger(__name__) proc_file = SETTINGS[NlpServerConfigKeys.PROCESSORS_PATH] # from processor_file import PROCESSORS # doesn't work, need importlib # Import the processors module using the full path as it is configurable spec = importlib.util.spec_from_file_location("processors", proc_file) processors = importlib.util.module_from_spec(spec) spec.loader.exec_module(processors) for proc in processors.PROCESSORS: x = ServerProcessor( name=proc[NlprpKeys.NAME], title=proc[NlprpKeys.TITLE], version=proc[NlprpKeys.VERSION], is_default_version=proc[NlprpKeys.IS_DEFAULT_VERSION], description=proc[NlprpKeys.DESCRIPTION], proctype=proc.get(KEY_PROCTYPE), # may be None schema_type=proc[NlprpKeys.SCHEMA_TYPE], # 'unknown' or 'tabular' sql_dialect=proc.get(NlprpKeys.SQL_DIALECT), tabular_schema=proc.get(NlprpKeys.TABULAR_SCHEMA)) # Doing this here saves time per request x.set_parser()