class SgFinRecognizer(PatternRecognizer): """ Recognize SG FIN/NRIC number using regex. :param patterns: List of patterns to be used by this recognizer :param context: List of context words to increase confidence in detection :param supported_language: Language this recognizer supports :param supported_entity: The entity this recognizer can detect """ PATTERNS = [ Pattern("Nric (weak)", r"(?i)(\b[A-Z][0-9]{7}[A-Z]\b)", 0.3), Pattern("Nric (medium)", r"(?i)(\b[STFG][0-9]{7}[A-Z]\b)", 0.5), ] CONTEXT = ["fin", "fin#", "nric", "nric#"] def __init__( self, patterns: Optional[List[Pattern]] = None, context: Optional[List[str]] = None, supported_language: str = "en", supported_entity: str = "SG_NRIC_FIN", ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT super().__init__( supported_entity=supported_entity, patterns=patterns, context=context, supported_language=supported_language, )
class SgFinRecognizer(PatternRecognizer): """ Recognizes SG FIN/NRIC number using regex """ PATTERNS = [ Pattern("Nric (weak)", r"(?i)(\b[A-Z][0-9]{7}[A-Z]\b)", 0.3), Pattern("Nric (medium)", r"(?i)(\b[STFG][0-9]{7}[A-Z]\b)", 0.5), ] CONTEXT = ["fin", "fin#", "nric", "nric#"] def __init__( self, patterns=None, context=None, supported_language="en", supported_entity="SG_NRIC_FIN", ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT super().__init__( supported_entity=supported_entity, patterns=patterns, context=context, supported_language=supported_language, )
class IpRecognizer(PatternRecognizer): """ Recognizes IP address using regex """ PATTERNS = [ Pattern( "IPv4", r"\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b", # noqa: E501 0.6, ), Pattern( "IPv6", r"\s*(?!.*::.*::)(?:(?!:)|:(?=:))(?:[0-9a-f]{0,4}(?:(?<=::)|(?<!::):)){6}(?:[0-9a-f]{0,4}(?:(?<=::)|(?<!::):)[0-9a-f]{0,4}(?:(?<=::)|(?<!:)|(?<=:)(?<!::):)|(?:25[0-4]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-4]|2[0-4]\d|1\d\d|[1-9]?\d)){3})\s*", # noqa: E501 0.6, ), ] CONTEXT = ["ip", "ipv4", "ipv6"] def __init__( self, patterns=None, context=None, supported_language="en", supported_entity="IP_ADDRESS", ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT super().__init__( supported_entity=supported_entity, patterns=patterns, context=context, supported_language=supported_language, )
class UsPhoneRecognizer(PatternRecognizer): """ Recognizes US Phone numbers using regex """ PATTERNS = [ Pattern( "Phone (strong)", r"(\(\d{3}\)\s*\d{3}[-\.\s]??\d{4}|d{3}[-\.\s]\d{3}[-\.\s]\\d{4})", 0.7, ), Pattern("Phone (medium)", r"\b(\d{3}[-\.\s]\d{3}[-\.\s]??\d{4})\b", 0.5), Pattern("Phone (weak)", r"(\b\d{10}\b)", 0.05), ] # pylint: disable=line-too-long,abstract-method CONTEXT = ["phone", "number", "telephone", "cell", "mobile", "call"] def __init__( self, patterns=None, context=None, supported_language="en", supported_entity="PHONE_NUMBER", ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT super().__init__( supported_entity=supported_entity, patterns=patterns, context=context, supported_language=supported_language, )
def __init__(self): patterns = [ Pattern('IPv4', IP_V4_REGEX, 0.6), Pattern('IPv6', IP_V6_REGEX, 0.6) ] super().__init__(supported_entity="IP_ADDRESS", patterns=patterns, context=IP_CONTEXT)
def __init__(self): patterns = [ Pattern('Nric (weak) ', WEAK_REGEX, 0.3), Pattern('Nric (medium) ', MEDIUM_REGEX, 0.5), ] super().__init__(supported_entity="SG_NRIC_FIN", patterns=patterns, context=CONTEXT)
def __init__(self): patterns = [Pattern('Driver License - WA (weak) ', WA_WEAK_REGEX, 0.4), Pattern('Driver License - Alphanumeric (weak) ', ALPHANUMERIC_REGEX, 0.3), Pattern('Driver License - Digits (very weak)', DIGITS_REGEX, 0.01)] super().__init__(supported_entity="US_DRIVER_LICENSE", patterns=patterns, context=LICENSE_CONTEXT)
def __init__(self): patterns = [ Pattern('Itin (very weak)', VERY_WEAK_REGEX, 0.05), Pattern('Itin (weak)', WEAK_REGEX, 0.3), Pattern('Itin (medium)', MEDIUM_REGEX, 0.5) ] super().__init__(supported_entity="US_ITIN", patterns=patterns, context=CONTEXT)
class UsLicenseRecognizer(PatternRecognizer): """ Recognizes US driver license using regex. :param patterns: List of patterns to be used by this recognizer :param context: List of context words to increase confidence in detection :param supported_language: Language this recognizer supports :param supported_entity: The entity this recognizer can detect """ PATTERNS = [ Pattern( "Driver License - WA (weak)", r"\b((?=.*\d)([A-Z][A-Z0-9*]{11})|(?=.*\*)([A-Z][A-Z0-9*]{11}))\b", # noqa: E501 0.4, ), Pattern( "Driver License - Alphanumeric (weak)", r"\b([A-Z][0-9]{3,6}|[A-Z][0-9]{5,9}|[A-Z][0-9]{6,8}|[A-Z][0-9]{4,8}|[A-Z][0-9]{9,11}|[A-Z]{1,2}[0-9]{5,6}|H[0-9]{8}|V[0-9]{6}|X[0-9]{8}|A-Z]{2}[0-9]{2,5}|[A-Z]{2}[0-9]{3,7}|[0-9]{2}[A-Z]{3}[0-9]{5,6}|[A-Z][0-9]{13,14}|[A-Z][0-9]{18}|[A-Z][0-9]{6}R|[A-Z][0-9]{9}|[A-Z][0-9]{1,12}|[0-9]{9}[A-Z]|[A-Z]{2}[0-9]{6}[A-Z]|[0-9]{8}[A-Z]{2}|[0-9]{3}[A-Z]{2}[0-9]{4}|[A-Z][0-9][A-Z][0-9][A-Z]|[0-9]{7,8}[A-Z])\b", # noqa: E501 0.3, ), Pattern( "Driver License - Digits (very weak)", r"\b([0-9]{6,14}|[0-9]{16})\b", # noqa: E501 0.01, ), ] CONTEXT = [ "driver", "license", "permit", "lic", "identification", "dl", "dls", "cdls", "id", "lic#", "driving", ] def __init__( self, patterns: Optional[List[Pattern]] = None, context: Optional[List[str]] = None, supported_language: str = "en", supported_entity: str = "US_DRIVER_LICENSE", ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT super().__init__( supported_entity=supported_entity, supported_language=supported_language, patterns=patterns, context=context, )
def test_no_entity_for_pattern_recognizer(): with pytest.raises(ValueError): patterns = [ Pattern("p1", "someregex", 1.0), Pattern("p1", "someregex", 0.5) ] MockRecognizer(entity=[], patterns=patterns, black_list=[], name=None, context=None)
def __init__(self): patterns = [ Pattern('Phone (strong)', UsPhoneRecognizer.STRONG_REGEX, UsPhoneRecognizer.STRONG_REGEX_SCORE), Pattern('Phone (medium)', UsPhoneRecognizer.MEDIUM_REGEX, UsPhoneRecognizer.MEDIUM_REGEX_SCORE), Pattern('Phone (weak)', UsPhoneRecognizer.WEAK_REGEX, UsPhoneRecognizer.WEAK_REGEX_SCORE) ] super().__init__(supported_entity="PHONE_NUMBER", patterns=patterns, context=CONTEXT)
class UsLicenseRecognizer(PatternRecognizer): """ Recognizes US driver license using regex """ PATTERNS = [ Pattern( "Driver License - WA (weak)", r"\b((?=.*\d)([A-Z][A-Z0-9*]{11})|(?=.*\*)([A-Z][A-Z0-9*]{11}))\b", # noqa: E501 0.4, ), Pattern( "Driver License - Alphanumeric (weak)", r"\b([A-Z][0-9]{3,6}|[A-Z][0-9]{5,9}|[A-Z][0-9]{6,8}|[A-Z][0-9]{4,8}|[A-Z][0-9]{9,11}|[A-Z]{1,2}[0-9]{5,6}|H[0-9]{8}|V[0-9]{6}|X[0-9]{8}|A-Z]{2}[0-9]{2,5}|[A-Z]{2}[0-9]{3,7}|[0-9]{2}[A-Z]{3}[0-9]{5,6}|[A-Z][0-9]{13,14}|[A-Z][0-9]{18}|[A-Z][0-9]{6}R|[A-Z][0-9]{9}|[A-Z][0-9]{1,12}|[0-9]{9}[A-Z]|[A-Z]{2}[0-9]{6}[A-Z]|[0-9]{8}[A-Z]{2}|[0-9]{3}[A-Z]{2}[0-9]{4}|[A-Z][0-9][A-Z][0-9][A-Z]|[0-9]{7,8}[A-Z])\b", # noqa: E501 0.3, ), Pattern( "Driver License - Digits (very weak)", r"\b([0-9]{6,14}|[0-9]{16})\b", # noqa: E501 0.01, ), ] CONTEXT = [ "driver", "license", "permit", "lic", "identification", "dl", "dls", "cdls", "id", "lic#", "driving", ] def __init__( self, patterns=None, context=None, supported_language="en", supported_entity="US_DRIVER_LICENSE", ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT super().__init__( supported_entity=supported_entity, supported_language=supported_language, patterns=patterns, context=context, )
def test_remove_pattern_recognizer(self): pattern = Pattern("spaceship pattern", r'\W*(spaceship)\W*', 0.8) pattern_recognizer = PatternRecognizer("SPACESHIP", name="Spaceship recognizer", patterns=[pattern]) # Make sure the analyzer doesn't get this entity recognizers_store_api_mock = RecognizerStoreApiMock() recognizer_registry = RecognizerRegistry(recognizers_store_api_mock) # Expects zero custom recognizers recognizers = recognizer_registry.get_custom_recognizers() assert len(recognizers) == 0 # Add a new recognizer for the word "rocket" (case insensitive) recognizers_store_api_mock.add_custom_pattern_recognizer( pattern_recognizer) # Expects one custom recognizer recognizers = recognizer_registry.get_custom_recognizers() assert len(recognizers) == 1 # Remove recognizer recognizers_store_api_mock.remove_recognizer("Spaceship recognizer") # Expects zero custom recognizers recognizers = recognizer_registry.get_custom_recognizers() assert len(recognizers) == 0
class DomainRecognizer(PatternRecognizer): """ Recognizes domain names using regex """ # pylint: disable=line-too-long PATTERNS = [ Pattern( "Domain ()", r"\b(((([a-zA-Z0-9])|([a-zA-Z0-9][a-zA-Z0-9\-]{0,86}[a-zA-Z0-9]))\.(([a-zA-Z0-9])|([a-zA-Z0-9][a-zA-Z0-9\-]{0,73}[a-zA-Z0-9]))\.(([a-zA-Z0-9]{2,12}\.[a-zA-Z0-9]{2,12})|([a-zA-Z0-9]{2,25})))|((([a-zA-Z0-9])|([a-zA-Z0-9][a-zA-Z0-9\-]{0,162}[a-zA-Z0-9]))\.(([a-zA-Z0-9]{2,12}\.[a-zA-Z0-9]{2,12})|([a-zA-Z0-9]{2,25}))))\b", # noqa: E501' # noqa: E501 0.5, ), ] CONTEXT = ["domain", "ip"] def __init__( self, patterns=None, context=None, supported_language="en", supported_entity="DOMAIN_NAME", ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT super().__init__( supported_entity=supported_entity, patterns=patterns, context=context, supported_language=supported_language, ) def validate_result(self, pattern_text): result = tldextract.extract(pattern_text) return result.fqdn != ""
def __init__(self): patterns = [ Pattern('IBAN Generic', IBAN_GENERIC_REGEX, IBAN_GENERIC_SCORE) ] super().__init__(supported_entity="IBAN_CODE", patterns=patterns, context=CONTEXT)
def get_all_recognizers(self): """ Returns a list of CustomRecognizer which were created from the recognizers stored in the underlying store """ req = recognizers_store_pb2.RecognizersGetAllRequest() raw_recognizers = [] try: raw_recognizers = self.rs_stub.ApplyGetAll(req).recognizers except grpc.RpcError: logger.info("Failed getting recognizers from the remote store. \ Returning an empty list") return raw_recognizers custom_recognizers = [] for new_recognizer in raw_recognizers: patterns = [] for pat in new_recognizer.patterns: patterns.extend([Pattern(pat.name, pat.regex, pat.score)]) new_custom_recognizer = PatternRecognizer( name=new_recognizer.name, supported_entity=new_recognizer.entity, supported_language=new_recognizer.language, black_list=new_recognizer.blacklist, context=new_recognizer.contextPhrases, patterns=patterns) custom_recognizers.append(new_custom_recognizer) return custom_recognizers
class UsPassportRecognizer(PatternRecognizer): """ Recognizes US Passport number using regex """ # pylint: disable=line-too-long,abstract-method # Weak pattern: all passport numbers are a weak match, e.g., 14019033 PATTERNS = [ Pattern("Passport (very weak)", r"(\b[0-9]{9}\b)", 0.05), ] CONTEXT = [ "us", "united", "states", "passport", "passport#", "travel", "document" ] def __init__( self, patterns=None, context=None, supported_language="en", supported_entity="US_PASSPORT", ): context = context if context else self.CONTEXT patterns = patterns if patterns else self.PATTERNS super().__init__( supported_entity=supported_entity, patterns=patterns, context=context, supported_language=supported_language, )
def from_dict(cls, entity_recognizer_dict): patterns = entity_recognizer_dict.get("patterns") if patterns: patterns_list = [Pattern.from_dict(pat) for pat in patterns] entity_recognizer_dict['patterns'] = patterns_list return cls(**entity_recognizer_dict)
def test_when_context_custom_recognizer_then_succeed(nlp_engine, mock_nlp_artifacts): """This test checks that a custom recognizer is also enhanced by context. However this test also verifies a specific case in which the pattern also includes a preceeding space (' rocket'). This in turn cause for a misalignment between the tokens and the regex match (the token will be just 'rocket'). This misalignment is handled in order to find the correct context window. """ rocket_recognizer = PatternRecognizer( supported_entity="ROCKET", name="rocketrecognizer", context=["cool"], patterns=[Pattern("rocketpattern", r"\\s+(rocket)", 0.3)], ) text = "hi, this is a cool ROCKET" recognizer = rocket_recognizer entities = ["ROCKET"] nlp_artifacts = nlp_engine.process_text(text, "en") results_without_context = recognizer.analyze(text, entities, mock_nlp_artifacts) results_with_context = recognizer.analyze(text, entities, nlp_artifacts) assert len(results_without_context) == len(results_with_context) for res_wo, res_w in zip(results_without_context, results_with_context): assert res_wo.score < res_w.score
class UsPassportRecognizer(PatternRecognizer): """ Recognizes US Passport number using regex. :param patterns: List of patterns to be used by this recognizer :param context: List of context words to increase confidence in detection :param supported_language: Language this recognizer supports :param supported_entity: The entity this recognizer can detect """ # Weak pattern: all passport numbers are a weak match, e.g., 14019033 PATTERNS = [ Pattern("Passport (very weak)", r"(\b[0-9]{9}\b)", 0.05), ] CONTEXT = [ "us", "united", "states", "passport", "passport#", "travel", "document" ] def __init__( self, patterns: Optional[List[Pattern]] = None, context: Optional[List[str]] = None, supported_language: str = "en", supported_entity: str = "US_PASSPORT", ): context = context if context else self.CONTEXT patterns = patterns if patterns else self.PATTERNS super().__init__( supported_entity=supported_entity, patterns=patterns, context=context, supported_language=supported_language, )
class EmailRecognizer(PatternRecognizer): """ Recognizes email addresses using regex """ PATTERNS = [ Pattern( "Email (Medium)", r"\b((([!#$%&'*+\-/=?^_`{|}~\w])|([!#$%&'*+\-/=?^_`{|}~\w][!#$%&'*+\-/=?^_`{|}~\.\w]{0,}[!#$%&'*+\-/=?^_`{|}~\w]))[@]\w+([-.]\w+)*\.\w+([-.]\w+)*)\b", # noqa: E501 0.5, ), ] CONTEXT = ["email"] def __init__( self, patterns=None, context=None, supported_language="en", supported_entity="EMAIL_ADDRESS", ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT super().__init__( supported_entity=supported_entity, patterns=patterns, context=context, supported_language=supported_language, ) def validate_result(self, pattern_text): result = tldextract.extract(pattern_text) return result.fqdn != ""
def create_mock_pattern_recognizer(lang, entity, name): return PatternRecognizer( supported_entity=entity, supported_language=lang, name=name, patterns=[Pattern("pat", regex="REGEX", score=1.0)], )
def test_from_dict(self): expected = my_pattern actual = Pattern.from_dict(my_pattern_dict) assert expected.name == actual.name assert expected.score == actual.score assert expected.regex == actual.regex
def test_added_pattern_recognizer_works(self): pattern = Pattern("rocket pattern", r'\W*(rocket)\W*', 0.8) pattern_recognizer = PatternRecognizer("ROCKET", name="Rocket recognizer", patterns=[pattern]) # Make sure the analyzer doesn't get this entity recognizers_store_api_mock = RecognizerStoreApiMock() analyze_engine = AnalyzerEngine( registry=MockRecognizerRegistry(recognizers_store_api_mock), nlp_engine=MockNlpEngine()) text = "rocket is my favorite transportation" entities = ["CREDIT_CARD", "ROCKET"] results = analyze_engine.analyze(self.unit_test_guid, text=text, entities=entities, language='en', all_fields=False) assert len(results) == 0 # Add a new recognizer for the word "rocket" (case insensitive) recognizers_store_api_mock.add_custom_pattern_recognizer( pattern_recognizer) # Check that the entity is recognized: results = analyze_engine.analyze(self.unit_test_guid, text=text, entities=entities, language='en', all_fields=False) assert len(results) == 1 assert_result(results[0], "ROCKET", 0, 7, 0.8)
def zip_code_deny_list_recognizer(): regex = r"(\b\d{5}(?:\-\d{4})?\b)" zipcode_pattern = Pattern(name="zip code (weak)", regex=regex, score=0.01) zip_recognizer = PatternRecognizer(supported_entity="ZIP", deny_list=["999"], patterns=[zipcode_pattern]) return zip_recognizer
class UsItinRecognizer(PatternRecognizer): """ Recognizes US ITIN (Individual Taxpayer Identification Number) using regex. :param patterns: List of patterns to be used by this recognizer :param context: List of context words to increase confidence in detection :param supported_language: Language this recognizer supports :param supported_entity: The entity this recognizer can detect """ PATTERNS = [ Pattern( "Itin (very weak)", r"(\b(9\d{2})[- ]{1}((7[0-9]{1}|8[0-8]{1})|(9[0-2]{1})|(9[4-9]{1}))(\d{4})\b)|(\b(9\d{2})((7[0-9]{1}|8[0-8]{1})|(9[0-2]{1})|(9[4-9]{1}))[- ]{1}(\d{4})\b)", # noqa: E501 0.05, ), Pattern( "Itin (weak)", r"\b(9\d{2})((7[0-9]{1}|8[0-8]{1})|(9[0-2]{1})|(9[4-9]{1}))(\d{4})\b", # noqa: E501 0.3, ), Pattern( "Itin (medium)", r"\b(9\d{2})[- ]{1}((7[0-9]{1}|8[0-8]{1})|(9[0-2]{1})|(9[4-9]{1}))[- ]{1}(\d{4})\b", # noqa: E501 0.5, ), ] CONTEXT = [ "individual", "taxpayer", "itin", "tax", "payer", "taxid", "tin" ] def __init__( self, patterns: Optional[List[Pattern]] = None, context: Optional[List[str]] = None, supported_language: str = "en", supported_entity: str = "US_ITIN", ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT super().__init__( supported_entity=supported_entity, patterns=patterns, context=context, supported_language=supported_language, )
def from_dict(cls, entity_recognizer_dict: Dict) -> "PatternRecognizer": """Create instance from a serialized dict.""" patterns = entity_recognizer_dict.get("patterns") if patterns: patterns_list = [Pattern.from_dict(pat) for pat in patterns] entity_recognizer_dict["patterns"] = patterns_list return cls(**entity_recognizer_dict)
def __black_list_to_regex(black_list): """ Converts a list of word to a matching regex, to be analyzed by the regex engine as a part of the analyze logic :param black_list: the list of words to detect :return:the regex of the words for detection """ regex = r"(?:^|(?<= ))(" + '|'.join(black_list) + r")(?:(?= )|$)" return Pattern(name="black_list", regex=regex, score=1.0)
class UsItinRecognizer(PatternRecognizer): """ Recognizes US ITIN (Individual Taxpayer Identification Number) using regex """ # pylint: disable=line-too-long,abstract-method PATTERNS = [ Pattern( "Itin (very weak)", r"(\b(9\d{2})[- ]{1}((7[0-9]{1}|8[0-8]{1})|(9[0-2]{1})|(9[4-9]{1}))(\d{4})\b)|(\b(9\d{2})((7[0-9]{1}|8[0-8]{1})|(9[0-2]{1})|(9[4-9]{1}))[- ]{1}(\d{4})\b)", # noqa: E501 0.05, ), Pattern( "Itin (weak)", r"\b(9\d{2})((7[0-9]{1}|8[0-8]{1})|(9[0-2]{1})|(9[4-9]{1}))(\d{4})\b", # noqa: E501 0.3, ), Pattern( "Itin (medium)", r"\b(9\d{2})[- ]{1}((7[0-9]{1}|8[0-8]{1})|(9[0-2]{1})|(9[4-9]{1}))[- ]{1}(\d{4})\b", # noqa: E501 0.5, ), ] CONTEXT = [ "individual", "taxpayer", "itin", "tax", "payer", "taxid", "tin" ] def __init__( self, patterns=None, context=None, supported_language="en", supported_entity="US_ITIN", ): patterns = patterns if patterns else self.PATTERNS context = context if context else self.CONTEXT super().__init__( supported_entity=supported_entity, patterns=patterns, context=context, supported_language=supported_language, )
class EsNifRecognizer(PatternRecognizer): """ Recognize NIF number using regex and checksum. :param patterns: List of patterns to be used by this recognizer :param context: List of context words to increase confidence in detection :param supported_language: Language this recognizer supports :param supported_entity: The entity this recognizer can detect :param replacement_pairs: List of tuples with potential replacement values for different strings to be used during pattern matching. This can allow a greater variety in input, for example by removing dashes or spaces. """ PATTERNS = [ Pattern( "NIF", r"\b[0-9]?[0-9]{7}[-]?[A-Z]\b", 0.5, ), ] CONTEXT = [ "documento nacional de identidad", "DNI", "NIF", "identificación" ] def __init__( self, patterns: Optional[List[Pattern]] = None, context: Optional[List[str]] = None, supported_language: str = "es", supported_entity: str = "ES_NIF", replacement_pairs: Optional[List[Tuple[str, str]]] = None, ): self.replacement_pairs = (replacement_pairs if replacement_pairs else [("-", ""), (" ", "")]) context = context if context else self.CONTEXT patterns = patterns if patterns else self.PATTERNS super().__init__( supported_entity=supported_entity, patterns=patterns, context=context, supported_language=supported_language, ) def validate_result(self, pattern_text: str) -> bool: # noqa D102 pattern_text = EsNifRecognizer.__sanitize_value(pattern_text) letter = pattern_text[-1] number = int("".join(filter(str.isdigit, pattern_text))) letters = "TRWAGMYFPDXBNJZSQVHLCKE" return letter == letters[number % 23] @staticmethod def __sanitize_value(text: str) -> str: return text.replace("-", "").replace(" ", "")