def __init__(self, registry=None, nlp_engine=None, app_tracer=None, enable_trace_pii=False, default_score_threshold=None, use_recognizer_store=False, default_language="en"): """ AnalyzerEngine class: Orchestrating the detection of PII entities and all related logic :param registry: instance of type RecognizerRegistry :param nlp_engine: instance of type NlpEngine (for example SpacyNlpEngine) :param app_tracer: instance of type AppTracer, used to trace the logic used during each request :param enable_trace_pii: bool, defines whether PII values should be traced or not. :param default_score_threshold: Minimum confidence value for detected entities to be returned :param use_recognizer_store Whether to call the Presidio Recognizer Store on every request to gather responses from custom recognizers as well (only applicable for the full Presidio service) """ if not nlp_engine: logger.info("nlp_engine not provided. Creating new " "SpacyNlpEngine instance") nlp_engine = NLP_ENGINES["spacy"]() if not registry: logger.info("Recognizer registry not provided. " "Creating default RecognizerRegistry instance") if use_recognizer_store: recognizer_store_api = RecognizerStoreApi() else: recognizer_store_api = None registry = RecognizerRegistry( recognizer_store_api=recognizer_store_api) if not app_tracer: app_tracer = AppTracer() # load nlp module self.nlp_engine = nlp_engine # prepare registry self.registry = registry # load all recognizers if not registry.recognizers: registry.load_predefined_recognizers() self.app_tracer = app_tracer self.enable_trace_pii = enable_trace_pii self.default_score_threshold = default_score_threshold \ if default_score_threshold \ else 0.0 self.default_language = default_language
def __init__(self, registry=None, nlp_engine=None, app_tracer=None, enable_trace_pii=False, default_score_threshold=None): """ AnalyzerEngine class: Orchestrating the detection of PII entities and all related logic :param registry: instance of type RecognizerRegistry :param nlp_engine: instance of type NlpEngine (for example SpacyNlpEngine) :param app_tracer: instance of type AppTracer, used to trace the logic used during each request :param enable_trace_pii: bool, defines whether PII values should be traced or not. :param default_score_threshold: Minimum confidence value for detected entities to be returned """ if not nlp_engine: logger.info("nlp_engine not provided. Creating new " "SpacyNlpEngine instance") from presidio_analyzer.nlp_engine import SpacyNlpEngine nlp_engine = SpacyNlpEngine() if not registry: logger.info("Recognizer registry not provided. " "Creating default RecognizerRegistry instance") from presidio_analyzer import RecognizerRegistry registry = RecognizerRegistry() if not app_tracer: app_tracer = AppTracer() # load nlp module self.nlp_engine = nlp_engine # prepare registry self.registry = registry # load all recognizers if not registry.recognizers: registry.load_predefined_recognizers() self.app_tracer = app_tracer self.enable_trace_pii = enable_trace_pii if default_score_threshold is None: self.default_score_threshold = 0 else: self.default_score_threshold = default_score_threshold
def test_when_add_recognizer_then_also_outputs_others(nlp_engine): pattern = Pattern("rocket pattern", r"\W*(rocket)\W*", 0.8) pattern_recognizer = PatternRecognizer( "ROCKET", name="Rocket recognizer", patterns=[pattern], supported_language="en", ) registry = RecognizerRegistry() registry.add_recognizer(pattern_recognizer) registry.load_predefined_recognizers() assert len(registry.recognizers) > 1 analyzer = AnalyzerEngine(registry=registry, nlp_engine=nlp_engine) text = "Michael Jones has a rocket" results = analyzer.analyze(text=text, language="en") assert len(results) == 2
def __init__( self, registry: RecognizerRegistry = None, nlp_engine: NlpEngine = None, app_tracer: AppTracer = None, log_decision_process: bool = False, default_score_threshold: float = 0, supported_languages: List[str] = None, ): if not supported_languages: supported_languages = ["en"] if not nlp_engine: logger.info("nlp_engine not provided, creating default.") provider = NlpEngineProvider() nlp_engine = provider.create_engine() if not registry: logger.info("registry not provided, creating default.") registry = RecognizerRegistry() if not app_tracer: app_tracer = AppTracer() self.app_tracer = app_tracer self.supported_languages = supported_languages self.nlp_engine = nlp_engine self.registry = registry # load all recognizers if not registry.recognizers: registry.load_predefined_recognizers( nlp_engine=self.nlp_engine, languages=self.supported_languages ) self.log_decision_process = log_decision_process self.default_score_threshold = default_score_threshold