def test_valid_us_itin_medium_match(self): num = '911-70-1234' results = us_itin_recognizer.analyze(num, entities) assert len(results) == 1 assert_result_within_score_range(results[0], entities[0], 0, 11, 0.5, 0.6)
def test_valid_us_passport_no_context(self): num = '912803456' results = us_passport_recognizer.analyze(num, entities) assert len(results) == 1 assert results[0].score != 0 assert_result_within_score_range(results[0], entities[0], 0, 9, 0, 0.1)
def test_valid_us_itin_weak_match(self): num = '911701234' results = us_itin_recognizer.analyze(num, entities) assert len(results) == 1 assert_result_within_score_range(results[0], entities[0], 0, 9, 0.3, 0.4)
def test_phone_number_strong_match_no_context(self): number = '(425) 882 9090' results = phone_recognizer.analyze(number, entities) assert len(results) == 1 assert results[0].score != 1 assert_result_within_score_range(results[0], entities[0], 0, 14, 0.7, EntityRecognizer.MAX_SCORE)
def test_valid_us_ssn_weak_match(self): num = '078051120' results = us_ssn_recognizer.analyze(num, entities) assert len(results) == 1 assert results[0].score != 0 assert_result_within_score_range(results[0], entities[0], 0, 9, 0.3, 0.4)
def test_date_time_full_date(self): text = 'May 1st, 1977' results = self.prepare_and_analyze(nlp_engine, text) assert len(results) == 1 assert_result_within_score_range(results[0], entities[1], 0, 13, NER_STRENGTH, EntityRecognizer.MAX_SCORE)
def test_valid_us_ssn_medium_match(self): num = '078-05-1120' results = us_ssn_recognizer.analyze(num, entities) assert len(results) == 1 assert results[0].score != 0 assert_result_within_score_range(results[0], entities[0], 0, 11, 0.5, 0.6) assert 0.49 < results[0].score < 0.6
def test_person_full_name_with_context(self): name = 'John Oliver' context = ' is the funniest comedian' text = '{} {}'.format(name, context) results = self.prepare_and_analyze(nlp_engine, text) assert len(results) == 1 assert_result_within_score_range(results[0], entities[0], 0, 11, NER_STRENGTH, EntityRecognizer.MAX_SCORE)
def test_person_first_name_with_context(self): name = 'Dan' context = 'my name is' text = '{} {}'.format(context, name) results = self.prepare_and_analyze(nlp_engine, text) assert len(results) == 1 assert_result_within_score_range(results[0], entities[0], 11, 14, NER_STRENGTH, EntityRecognizer.MAX_SCORE)
def test_date_time_day_in_month_with_year_with_context(self): date = 'May 1st, 1977' context = 'I bought my car on' text = '{} {}'.format(context, date) results = self.prepare_and_analyze(nlp_engine, text) assert len(results) == 1 assert_result_within_score_range(results[0], entities[1], 19, 32, NER_STRENGTH, EntityRecognizer.MAX_SCORE)
def test_person_title_and_last_name_is_also_a_date_with_context_expected_person_only( self): name = 'Mr. May' context = "They call me" text = '{} {}'.format(context, name) results = self.prepare_and_analyze(nlp_engine, text) assert len(results) == 1 assert_result_within_score_range(results[0], entities[0], 17, 20, NER_STRENGTH, EntityRecognizer.MAX_SCORE)
def test_valid_us_driver_license_weak_WA(self): num1 = 'AA1B2**9ABA7' num2 = 'A*1234AB*CD9' results = us_license_recognizer.analyze('{} {}'.format(num1, num2), entities) assert len(results) == 2 assert_result_within_score_range(results[0], entities[0], 0, 12, 0.3, 0.4) assert_result_within_score_range(results[1], entities[0], 13, 25, 0.3, 0.4)
def test_person_last_name_is_also_a_date_with_context_expected_person_only( self): name = 'Dan May' context = "has a bank account" text = '{} {}'.format(name, context) results = self.prepare_and_analyze(nlp_engine, text) assert len(results) == 1 print(results[0].score) print(results[0].entity_type) print(text[results[0].start:results[0].end]) assert_result_within_score_range(results[0], entities[0], 0, 7, NER_STRENGTH, EntityRecognizer.MAX_SCORE)
def test_valid_us_ssn_very_weak_match(self): num1 = '078-051120' num2 = '07805-1120' results = us_ssn_recognizer.analyze('{} {}'.format(num1, num2), entities) assert len(results) == 2 assert results[0].score != 0 assert_result_within_score_range(results[0], entities[0], 0, 10, 0, 0.3) assert results[0].score != 0 assert_result_within_score_range(results[1], entities[0], 11, 21, 0, 0.3)