def load_predefined_recognizers(self): # TODO: Change the code to dynamic loading - # Task #598: Support loading of the pre-defined recognizers # from the given path. # Currently this is not integrated into the init method to speed up # loading time if these are not actually needed (SpaCy for example) is # time consuming to load self.recognizers.extend([ CreditCardRecognizer(), DomainRecognizer(), EmailRecognizer(), IbanRecognizer(), IpRecognizer(), NhsRecognizer(), UsBankRecognizer(), UsLicenseRecognizer(), UsItinRecognizer(), UsPassportRecognizer(), UsPhoneRecognizer(), UsSsnRecognizer() ]) # Okera addition if 'PRESIDIO_DISABLE_ML' in os.environ and \ os.environ['PRESIDIO_DISABLE_ML'] == 'true': logging.info("Disabling ML recognizer.") else: logging.info("Enabling ML recognizer.") self.recognizers.extend([SpacyRecognizer()])
def load_recognizers(self, path): # TODO: Change the code to dynamic loading - # Task #598: Support loading of the pre-defined recognizers # from the given path. self.recognizers.extend( [CreditCardRecognizer(), UsPhoneRecognizer(), DomainRecognizer()])
def load_predefined_recognizers(self): # TODO: Change the code to dynamic loading - # Task #598: Support loading of the pre-defined recognizers # from the given path. # Currently this is not integrated into the init method to speed up # loading time if these are not actually needed (SpaCy for example) is # time consuming to load self.recognizers.extend([ CreditCardRecognizer(), CryptoRecognizer(), DomainRecognizer(), EmailRecognizer(), IbanRecognizer(), IpRecognizer(), NhsRecognizer(), UsBankRecognizer(), UsLicenseRecognizer(), UsItinRecognizer(), UsPassportRecognizer(), UsPhoneRecognizer(), UsSsnRecognizer(), SpacyRecognizer() ])
from unittest import TestCase from assertions import assert_result from analyzer.predefined_recognizers import DomainRecognizer from analyzer.entity_recognizer import EntityRecognizer domain_recognizer = DomainRecognizer() entities = ["DOMAIN_NAME"] class TestDomainRecognizer(TestCase): def test_invalid_domain(self): domain = 'microsoft.' results = domain_recognizer.analyze(domain, entities) assert len(results) == 0 def test_invalid_domain_with_exact_context(self): domain = 'microsoft.' context = 'my domain is ' results = domain_recognizer.analyze(context + domain, entities) assert len(results) == 0 def test_valid_domain(self): domain = 'microsoft.com' results = domain_recognizer.analyze(domain, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 13, EntityRecognizer.MAX_SCORE)