예제 #1
0
    def load_predefined_recognizers(self):
        #   TODO: Change the code to dynamic loading -
        # Task #598:  Support loading of the pre-defined recognizers
        # from the given path.
        # Currently this is not integrated into the init method to speed up
        # loading time if these are not actually needed (SpaCy for example) is
        # time consuming to load
        self.recognizers.extend([
            CreditCardRecognizer(),
            DomainRecognizer(),
            EmailRecognizer(),
            IbanRecognizer(),
            IpRecognizer(),
            NhsRecognizer(),
            UsBankRecognizer(),
            UsLicenseRecognizer(),
            UsItinRecognizer(),
            UsPassportRecognizer(),
            UsPhoneRecognizer(),
            UsSsnRecognizer()
        ])

        # Okera addition
        if 'PRESIDIO_DISABLE_ML' in os.environ and \
                os.environ['PRESIDIO_DISABLE_ML'] == 'true':
            logging.info("Disabling ML recognizer.")
        else:
            logging.info("Enabling ML recognizer.")
            self.recognizers.extend([SpacyRecognizer()])
예제 #2
0
 def load_predefined_recognizers(self):
     #   TODO: Change the code to dynamic loading -
     # Task #598:  Support loading of the pre-defined recognizers
     # from the given path.
     # Currently this is not integrated into the init method to speed up
     # loading time if these are not actually needed (SpaCy for example) is
     # time consuming to load
     self.recognizers.extend([
         CreditCardRecognizer(),
         CryptoRecognizer(),
         DomainRecognizer(),
         EmailRecognizer(),
         IbanRecognizer(),
         IpRecognizer(),
         NhsRecognizer(),
         UsBankRecognizer(),
         UsLicenseRecognizer(),
         UsItinRecognizer(),
         UsPassportRecognizer(),
         UsPhoneRecognizer(),
         UsSsnRecognizer(),
         SpacyRecognizer()
     ])
예제 #3
0
from unittest import TestCase

import os
import pytest

from analyzer import PatternRecognizer, Pattern
from analyzer.predefined_recognizers import CreditCardRecognizer, \
    UsPhoneRecognizer, DomainRecognizer, UsItinRecognizer, \
    UsLicenseRecognizer, UsBankRecognizer, UsPassportRecognizer, \
    IpRecognizer, UsSsnRecognizer
from analyzer.nlp_engine import SpacyNlpEngine, NlpArtifacts

ip_recognizer = IpRecognizer()
us_ssn_recognizer = UsSsnRecognizer()
phone_recognizer = UsPhoneRecognizer()
us_itin_recognizer = UsItinRecognizer()
us_license_recognizer = UsLicenseRecognizer()
us_bank_recognizer = UsBankRecognizer()
us_passport_recognizer = UsPassportRecognizer()


@pytest.fixture(scope="class")
def sentences_with_context(request):
    """ Loads up a group of sentences with relevant context words
    """

    path = os.path.dirname(__file__) + '/data/context_sentences_tests.txt'
    f = open(path, "r")
    if not f.mode == 'r':
        return []
    content = f.read()