コード例 #1
0
    def load_predefined_recognizers(self):
        #   TODO: Change the code to dynamic loading -
        # Task #598:  Support loading of the pre-defined recognizers
        # from the given path.
        # Currently this is not integrated into the init method to speed up
        # loading time if these are not actually needed (SpaCy for example) is
        # time consuming to load
        self.recognizers.extend([
            CreditCardRecognizer(),
            DomainRecognizer(),
            EmailRecognizer(),
            IbanRecognizer(),
            IpRecognizer(),
            NhsRecognizer(),
            UsBankRecognizer(),
            UsLicenseRecognizer(),
            UsItinRecognizer(),
            UsPassportRecognizer(),
            UsPhoneRecognizer(),
            UsSsnRecognizer()
        ])

        # Okera addition
        if 'PRESIDIO_DISABLE_ML' in os.environ and \
                os.environ['PRESIDIO_DISABLE_ML'] == 'true':
            logging.info("Disabling ML recognizer.")
        else:
            logging.info("Enabling ML recognizer.")
            self.recognizers.extend([SpacyRecognizer()])
コード例 #2
0
 def load_recognizers(self, path):
     #   TODO: Change the code to dynamic loading -
     # Task #598:  Support loading of the pre-defined recognizers
     # from the given path.
     self.recognizers.extend(
         [CreditCardRecognizer(),
          UsPhoneRecognizer(),
          DomainRecognizer()])
コード例 #3
0
 def load_predefined_recognizers(self):
     #   TODO: Change the code to dynamic loading -
     # Task #598:  Support loading of the pre-defined recognizers
     # from the given path.
     # Currently this is not integrated into the init method to speed up
     # loading time if these are not actually needed (SpaCy for example) is
     # time consuming to load
     self.recognizers.extend([
         CreditCardRecognizer(),
         CryptoRecognizer(),
         DomainRecognizer(),
         EmailRecognizer(),
         IbanRecognizer(),
         IpRecognizer(),
         NhsRecognizer(),
         UsBankRecognizer(),
         UsLicenseRecognizer(),
         UsItinRecognizer(),
         UsPassportRecognizer(),
         UsPhoneRecognizer(),
         UsSsnRecognizer(),
         SpacyRecognizer()
     ])
コード例 #4
0
from unittest import TestCase

from assertions import assert_result
from analyzer.predefined_recognizers import DomainRecognizer
from analyzer.entity_recognizer import EntityRecognizer

domain_recognizer = DomainRecognizer()
entities = ["DOMAIN_NAME"]


class TestDomainRecognizer(TestCase):
    def test_invalid_domain(self):
        domain = 'microsoft.'
        results = domain_recognizer.analyze(domain, entities)

        assert len(results) == 0

    def test_invalid_domain_with_exact_context(self):
        domain = 'microsoft.'
        context = 'my domain is '
        results = domain_recognizer.analyze(context + domain, entities)

        assert len(results) == 0

    def test_valid_domain(self):
        domain = 'microsoft.com'
        results = domain_recognizer.analyze(domain, entities)

        assert len(results) == 1
        assert_result(results[0], entities[0], 0, 13,
                      EntityRecognizer.MAX_SCORE)