예제 #1
0
def process_nlp_text_immediate(text: str,
                               processor: ServerProcessor,
                               username: str = "",
                               password: str = "") -> JsonObjectType:
    """
    Function to send text immediately to the relevant processor.

    Args:
        text:
            text to run the NLP over
        processor:
            NLP processor; a class:`crate_anon.nlp_webserver.procs.Processor`
        username:
            username in use
        password:
            plaintext password

    Returns:
        a :class:`NlpServerResult`
    """
    if processor.proctype == PROCTYPE_GATE:
        return process_nlp_gate(text, processor, username, password)
    else:
        if not processor.parser:
            processor.set_parser()
        return process_nlp_internal(text=text, processor=processor)
예제 #2
0
    def __init__(self, nlprp_request: JsonObjectType) -> None:
        """
        Args:
            nlprp_request: dictionary from the (entire) JSON NLPRP request

        Raises:
            :exc:`NlprpError` for malformed requests
        """
        self.nlprp_request = nlprp_request

        args = json_get_toplevel_args(nlprp_request)

        # The processors being requested. We fetch all of them now, so they
        # can be iterated through fast for each document.
        requested_processors = json_get_array(args,
                                              NKeys.PROCESSORS,
                                              required=True)
        self.processors = [
            ServerProcessor.get_processor_nlprp(d)
            for d in requested_processors
        ]

        # Queue?
        self.queue = json_get_bool(args, NKeys.QUEUE, default=False)

        # Client job ID
        self.client_job_id = json_get_str(args,
                                          NKeys.CLIENT_JOB_ID,
                                          default="")

        # Include the source text in the reply?
        self.include_text = json_get_bool(args, NKeys.INCLUDE_TEXT)

        # Content: list of objects (each with text and metadata)
        self.content = json_get_array(args, NKeys.CONTENT, required=True)
예제 #3
0
 def get_processor_cached(_processor_id: str) -> ServerProcessor:
     """
     Cache lookups for speed. (All documents will share the same set
     of processors, so there'll be a fair bit of duplication.)
     """
     nonlocal processor_cache
     try:
         return processor_cache[_processor_id]
     except KeyError:
         _processor = ServerProcessor.get_processor_from_id(
             _processor_id)  # may raise  # noqa
         processor_cache[_processor_id] = _processor
         return _processor
예제 #4
0
    def get_remote_processors(self) -> List[ServerProcessor]:
        """
        Returns the list of available processors from the remote. If that list
        has not already been fetched, or unless it was pre-specified upon
        construction, fetch it from the server.
        """
        # Make request
        list_procs_request = make_nlprp_dict()
        list_procs_request[NKeys.COMMAND] = NlprpCommands.LIST_PROCESSORS
        request_json = to_json_str(list_procs_request)

        # Send request, get response
        json_response = self._post_get_json(request_json, may_fail=False)

        status = json_get_int(json_response, NKeys.STATUS)
        if not HttpStatus.is_good_answer(status):
            errors = json_get_array(json_response, NKeys.ERRORS)
            for err in errors:
                log.error(f"Error received: {err!r}")
            raise HTTPError(f"Response status was: {status}")

        processors = []  # type: List[ServerProcessor]
        proclist = json_response[NKeys.PROCESSORS]  # type: JsonArrayType
        for procinfo in proclist:
            proc = ServerProcessor(
                # Mandatory:
                name=procinfo[NKeys.NAME],
                title=procinfo[NKeys.TITLE],
                version=procinfo[NKeys.VERSION],
                is_default_version=procinfo.get(
                    NKeys.IS_DEFAULT_VERSION, True),
                description=procinfo[NKeys.DESCRIPTION],
                # Optional:
                schema_type=procinfo.get(NKeys.SCHEMA_TYPE,
                                         NlprpValues.UNKNOWN),
                sql_dialect=procinfo.get(NKeys.SQL_DIALECT, ""),
                tabular_schema=procinfo.get(NKeys.TABULAR_SCHEMA)
            )
            processors.append(proc)
        return processors
예제 #5
0
파일: procs.py 프로젝트: Phdmani/crate
from crate_anon.nlp_webserver.constants import (
    KEY_PROCTYPE,
    NlpServerConfigKeys,
)
from crate_anon.nlp_webserver.server_processor import ServerProcessor
from crate_anon.nlp_webserver.settings import SETTINGS

log = logging.getLogger(__name__)

proc_file = SETTINGS[NlpServerConfigKeys.PROCESSORS_PATH]
# from processor_file import PROCESSORS  # doesn't work, need importlib

# Import the processors module using the full path as it is configurable
spec = importlib.util.spec_from_file_location("processors", proc_file)
processors = importlib.util.module_from_spec(spec)
spec.loader.exec_module(processors)

for proc in processors.PROCESSORS:
    x = ServerProcessor(
        name=proc[NlprpKeys.NAME],
        title=proc[NlprpKeys.TITLE],
        version=proc[NlprpKeys.VERSION],
        is_default_version=proc[NlprpKeys.IS_DEFAULT_VERSION],
        description=proc[NlprpKeys.DESCRIPTION],
        proctype=proc.get(KEY_PROCTYPE),  # may be None
        schema_type=proc[NlprpKeys.SCHEMA_TYPE],  # 'unknown' or 'tabular'
        sql_dialect=proc.get(NlprpKeys.SQL_DIALECT),
        tabular_schema=proc.get(NlprpKeys.TABULAR_SCHEMA))
    # Doing this here saves time per request
    x.set_parser()