def test_complete_mobile_v0(self): """ Detection of phone number """ test = "móvil: ESP 780000000" proximity_dict = { "MOBILE": { "left_span_len": 20, "right_span_len": 0, "word_list": ["movil"] } } ner = ner_regex.RegexNer(regexp_config_dict=proximity_dict) result = ner.regex_detection(test, full_text=test) self.assertTrue( "MOBILE" in result, "{} {} {}".format(self.shortDescription(), MSG_MOBILE_NOT_DETECTED, result)) self.assertEqual( clean_text(result["MOBILE"][0][0]), "780000000", MSG_DETECTED.format(self.shortDescription(), MSG_MOBILE_WRONG_DETECTED, result["MOBILE"][0]))
def test_is_not_iban_V0(self): """ Test IBAN is not detected """ test = ("This is the IBAN of the account ES91 2100 0418 " + "4502 0005 1332 4576 .") ner = ner_regex.RegexNer() result = ner._detect_regexp(test, "strict") self.assertTrue( "FINANCIAL_DATA" in result, "{} {} {}".format(self.shortDescription(), MSG_IBAN_NOT_DETECTED, result)) idx = -1 for i, _regexp in enumerate(result["FINANCIAL_DATA"]): if _regexp[1] == "BROAD_REG_IBAN_APPROX_V1": idx = i break self.assertEqual( clean_text(result["FINANCIAL_DATA"][idx][0]), "ES9121000418450200051332", MSG_DETECTED.format(self.shortDescription(), MSG_IBAN_WRONG_DETECTED, result["FINANCIAL_DATA"][idx]))
def test_complete_phone_number(self): """ Detection of phone number """ test = "tel.: ESP 980000007." proximity_dict = { "PHONE": { "left_span_len": 20, "right_span_len": 0, "word_list": ["tel."] } } ner = ner_regex.RegexNer(regexp_config_dict=proximity_dict) result = ner.regex_detection(test, full_text=test) self.assertTrue( "PHONE" in result, "{} {} {}".format(self.shortDescription(), MSG_PHONE_NOT_DETECTED, result)) self.assertEqual( clean_text(result["PHONE"][0][0]), "980000007", MSG_DETECTED.format(self.shortDescription(), MSG_PHONE_WRONG_DETECTED, result["PHONE"][0]))
def test_complete_iban_V1(self): """ Test the detection of the IBAN account """ test = "This is the IBAN of the account ES91 2100 4334471600021142P ." proximity_dict = { "FINANCIAL_DATA": { "left_span_len": 20, "right_span_len": 0, "word_list": ["iban"] } } ner = ner_regex.RegexNer(regexp_config_dict=proximity_dict) result = ner.regex_detection(test, full_text=test) self.assertTrue( "FINANCIAL_DATA" in result, "{} {} {}".format(self.shortDescription(), MSG_IBAN_NOT_DETECTED, result)) idx = -1 for i, _regexp in enumerate(result["FINANCIAL_DATA"]): if _regexp[1] == "BROAD_REG_IBAN_APPROX_V1": idx = i break self.assertEqual( clean_text(result["FINANCIAL_DATA"][idx][0]), "ES9121004334471600021142", MSG_DETECTED.format(self.shortDescription(), MSG_IBAN_WRONG_DETECTED, result["FINANCIAL_DATA"][idx]))
def test_3_broad_phone_number_v0(self): """ Test the detection of a wrong phone number """ test = "Mi teléfono es 45 988 888 888" ner = ner_regex.RegexNer() result = ner._detect_regexp(test, "broad") self.assertTrue( "PHONE" in result, "{} {} {}".format(self.shortDescription(), MSG_PHONE_DETECTED, result)) for i, _regexp in enumerate(result["PHONE"]): if _regexp[1] == "BROAD_REG_PHONE_NUMBER_GEN_V3": idx = i break self.assertEqual( clean_text(result["PHONE"][idx][0].strip()), "988888888", MSG_EXTRACTED.format(self.shortDescription(), MSG_PHONE_NOT_DETECTED, result["PHONE"][idx]))
def test_0_broad_phone_number_v0(self): """ Test the detection of a phone number """ test = "Mi teléfono es 988 888 888 " ner = ner_regex.RegexNer() result = ner._detect_regexp(test, "broad") self.assertTrue( "PHONE" in result, "{} {} {}".format(self.shortDescription(), MSG_PHONE_NOT_DETECTED, result)) idx = -1 for i, _regexp in enumerate(result["PHONE"]): if _regexp[1] == "BROAD_REG_PHONE_NUMBER_APPROX_V3": idx = i break self.assertEqual( clean_text(result["PHONE"][idx][0].strip()), "988888888", "{self.shortDescription()} {MSG_PHONE_NOT_DETECTED}. Extracted {result['PHONE'][idx]}" )
def test_0_CP_MOBILE_NUMBER_V0(self): """ Test the detection of a phone number """ test = "Mi teléfono móvil es 688 888 888 " ner = ner_regex.RegexNer() result = ner._detect_regexp(test, "broad") self.assertTrue( "MOBILE" in result, "{} {} {}".format(self.shortDescription(), MSG_MOBILE_NOT_DETECTED, result)) for i, _regexp in enumerate(result["MOBILE"]): if _regexp[1] == "BROAD_REG_MOBILE_NUMBER_GEN_V3": idx = i break self.assertEqual( clean_text(result["MOBILE"][idx][0].strip()), "688888888", MSG_EXTRACTED.format(self.shortDescription(), MSG_MOBILE_NOT_DETECTED, result["MOBILE"][idx]))
def test_broad_phone_number_v6(self): """ Detection of phone number """ test = "teléfono: ESP 980000001A" ner = ner_regex.RegexNer() result = ner._detect_regexp(test, "broad") self.assertTrue( "PHONE" in result, "{} {} {}".format(self.shortDescription(), MSG_PHONE_NOT_DETECTED, result)) idx = -1 for i, _regexp in enumerate(result["PHONE"]): if _regexp[1] == "BROAD_REG_PHONE_NUMBER_GEN_V3": idx = i break self.assertEqual( clean_text(result["PHONE"][idx][0]), "980000001", MSG_DETECTED.format(self.shortDescription(), MSG_PHONE_WRONG_DETECTED, result["PHONE"][idx]))
def test_strict_dni_v2(self): """ Detection of DNI""" test = "15373458B" ner = ner_regex.RegexNer() result = ner._detect_regexp(test, "strict") self.assertTrue( "ID_DOCUMENT" in result, "{} {} {}".format(self.shortDescription(), MSG_ID_DOCUMENT_NOT_DETECTED, result)) idx = -1 for i, _regexp in enumerate(result["ID_DOCUMENT"]): if _regexp[1] == "STRICT_REG_DNI_V0": idx = i break self.assertEqual( clean_text(result["ID_DOCUMENT"][idx][0]), "15373458B", MSG_DETECTED.format(self.shortDescription(), MSG_DNI_WRONG_DETECTED, result["ID_DOCUMENT"][idx]))
def test_dni_with_dash(self): """ Detection of DNI v0 rule with letter separated by dash """ test = "el dni de Juan es 66666666-Y." ner = ner_regex.RegexNer() result = ner._detect_regexp(test, "strict") self.assertTrue( "ID_DOCUMENT" in result, "{} {} {}".format(self.shortDescription(), MSG_ID_DOCUMENT_NOT_DETECTED, result)) idx = -1 for i, _regexp in enumerate(result["ID_DOCUMENT"]): if _regexp[1] == "STRICT_REG_DNI_V0": idx = i break self.assertEqual( clean_text(result["ID_DOCUMENT"][idx][0]), "66666666Y", MSG_DETECTED.format(self.shortDescription(), MSG_DNI_WRONG_DETECTED, result["ID_DOCUMENT"][idx]))