def test_valid_jcb_credit_card(self): number = '3528000700000000' results = credit_card_recognizer.analyze(number, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 16, EntityRecognizer.MAX_SCORE)
def test_valid_email_no_context(self): email = '*****@*****.**' results = email_recognizer.analyze(email, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 18, EntityRecognizer.MAX_SCORE)
def test_valid_cartebleue_credit_card(self): number = '5555555555554444' results = credit_card_recognizer.analyze(number, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 16, EntityRecognizer.MAX_SCORE)
def test_valid_maestro_credit_card(self): number = '6759649826438453' results = credit_card_recognizer.analyze(number, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 16, EntityRecognizer.MAX_SCORE)
def test_valid_diners_credit_card(self): number = '30569309025904' results = credit_card_recognizer.analyze(number, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 14, EntityRecognizer.MAX_SCORE)
def test_valid_btc(self): wallet = '16Yeky6GMjeNkAiNcBY7ZhrLoMSgg1BoyZ' results = crypto_recognizer.analyze(wallet, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 34, EntityRecognizer.MAX_SCORE)
def test_valid_discover_credit_card(self): number = '6011000400000000' results = credit_card_recognizer.analyze(number, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 16, EntityRecognizer.MAX_SCORE)
def test_person_title_and_last_name_is_also_a_date_expected_person_only( self): text = 'Mr. May' results = self.prepare_and_analyze(nlp_engine, text) assert len(results) == 1 assert_result(results[0], entities[0], 4, 7, NER_STRENGTH)
def test_valid_visa_electron_credit_card(self): number = '4917300800000000' results = credit_card_recognizer.analyze(number, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 16, EntityRecognizer.MAX_SCORE)
def test_valid_airplus_credit_card(self): number = '122000000000003' results = credit_card_recognizer.analyze(number, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 15, EntityRecognizer.MAX_SCORE)
def test_valid_amex_credit_card(self): number = '371449635398431' results = credit_card_recognizer.analyze(number, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 15, EntityRecognizer.MAX_SCORE)
def test_added_pattern_recognizer_works(self): pattern = Pattern("rocket pattern", r'\W*(rocket)\W*', 0.8) pattern_recognizer = PatternRecognizer("ROCKET", name="Rocket recognizer", patterns=[pattern]) # Make sure the analyzer doesn't get this entity recognizers_store_api_mock = RecognizerStoreApiMock() analyze_engine = AnalyzerEngine( registry=MockRecognizerRegistry(recognizers_store_api_mock), nlp_engine=MockNlpEngine()) text = "rocket is my favorite transportation" entities = ["CREDIT_CARD", "ROCKET"] results = analyze_engine.analyze(self.unit_test_guid, text=text, entities=entities, language='en', all_fields=False) assert len(results) == 0 # Add a new recognizer for the word "rocket" (case insensitive) recognizers_store_api_mock.add_custom_pattern_recognizer( pattern_recognizer) # Check that the entity is recognized: results = analyze_engine.analyze(self.unit_test_guid, text=text, entities=entities, language='en', all_fields=False) assert len(results) == 1 assert_result(results[0], "ROCKET", 0, 7, 0.8)
def test_valid_domain(self): domain = 'microsoft.com' results = domain_recognizer.analyze(domain, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 13, EntityRecognizer.MAX_SCORE)
def test_valid_dankort_credit_card(self): number = '5019717010103742' results = credit_card_recognizer.analyze(number, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 16, EntityRecognizer.MAX_SCORE)
def test_valid_email_with_context(self): email = '*****@*****.**' results = email_recognizer.analyze('my email is {}'.format(email), entities) assert len(results) == 1 assert_result(results[0], entities[0], 12, 30, EntityRecognizer.MAX_SCORE)
def test_valid_airplus_credit_card_with_extact_context(self): number = '122000000000003' context = 'my credit card: ' results = credit_card_recognizer.analyze(context + number, entities) assert len(results) == 1 assert_result(results[0], entities[0], 16, 31, EntityRecognizer.MAX_SCORE)
def test_valid_visa_purchasing_credit_card(self): number = '4484070000000000' results = credit_card_recognizer.analyze(number, entities) assert len(results) == 1 assert results[0].score == 1.0 assert_result(results[0], entities[0], 0, 16, EntityRecognizer.MAX_SCORE)
def test_valid_btc_with_exact_context(self): wallet = '16Yeky6GMjeNkAiNcBY7ZhrLoMSgg1BoyZ' context = 'my wallet address is: ' results = crypto_recognizer.analyze(context + wallet, entities) assert len(results) == 1 assert_result(results[0], entities[0], 22, 56, EntityRecognizer.MAX_SCORE)
def _assertion_coach_mentions_glob(global_ID, agg_rost_ment_file, roster_file, mgmt_list, sent_dict, result, team): """ Function to test assertions for coach_mentions_glob(). """ assertions.assert_global_ID(global_ID) assertions.assert_agg_roster_ment_file_format(agg_rost_ment_file, roster_file) assertions.assert_roster_file_format(roster_file) assertions.assert_str_list(mgmt_list) assertions.assert_result(result) assertions.assert_team(team)
def test_valid_domains_lemma_text(self): domain1 = 'microsoft.com' domain2 = 'google.co.il' results = domain_recognizer.analyze( 'my domains: {} {}'.format(domain1, domain2), entities) assert len(results) == 2 assert_result(results[0], entities[0], 12, 25, EntityRecognizer.MAX_SCORE) assert_result(results[1], entities[0], 26, 38, EntityRecognizer.MAX_SCORE)
def test_analyze_with_predefined_recognizers_return_results(self): text = " Credit card: 4095-2609-9393-4932, my phone is 425 8829090" language = "en" entities = ["CREDIT_CARD"] results = self.loaded_analyzer_engine.analyze(text, entities, language, all_fields=False) assert len(results) == 1 assert_result(results[0], "CREDIT_CARD", 14, 33, EntityRecognizer.MAX_SCORE)
def test_multiple_emails_with_lemma_context(self): email1 = '*****@*****.**' email2 = '*****@*****.**' results = email_recognizer.analyze( 'try one of this emails: {} or {}'.format(email1, email2), entities) assert len(results) == 2 assert_result(results[0], entities[0], 24, 42, EntityRecognizer.MAX_SCORE) assert_result(results[1], entities[0], 46, 71, EntityRecognizer.MAX_SCORE)
def test_black_list_keywords_found(self): test_recognizer = MockRecognizer(patterns=[], entity="ENTITY_1", black_list=["phone", "name"], context=None, name=None) results = test_recognizer.analyze( "my phone number is 555-1234, and my name is John", ["ENTITY_1"]) assert len(results) == 2 assert_result(results[0], "ENTITY_1", 3, 8, 1.0) assert_result(results[1], "ENTITY_1", 36, 40, 1.0)
def test_removed_pattern_recognizer_doesnt_work(self): pattern = Pattern("spaceship pattern", r'\W*(spaceship)\W*', 0.8) pattern_recognizer = PatternRecognizer("SPACESHIP", name="Spaceship recognizer", patterns=[pattern]) # Make sure the analyzer doesn't get this entity recognizers_store_api_mock = RecognizerStoreApiMock() analyze_engine = AnalyzerEngine( registry=MockRecognizerRegistry(recognizers_store_api_mock), nlp_engine=MockNlpEngine()) text = "spaceship is my favorite transportation" entities = ["CREDIT_CARD", "SPACESHIP"] results = analyze_engine.analyze(self.unit_test_guid, text=text, entities=entities, language='en', all_fields=False) assert len(results) == 0 # Add a new recognizer for the word "rocket" (case insensitive) recognizers_store_api_mock.add_custom_pattern_recognizer( pattern_recognizer) # Check that the entity is recognized: results = analyze_engine.analyze(self.unit_test_guid, text=text, entities=entities, language='en', all_fields=False) assert len(results) == 1 assert_result(results[0], "SPACESHIP", 0, 10, 0.8) # Remove recognizer recognizers_store_api_mock.remove_recognizer("Spaceship recognizer") # Test again to see we didn't get any results results = analyze_engine.analyze(self.unit_test_guid, text=text, entities=entities, language='en', all_fields=False) assert len(results) == 0
def test_analyze_with_multiple_predefined_recognizers(self): text = " Credit card: 4095-2609-9393-4932, my phone is 425 8829090" language = "en" entities = ["CREDIT_CARD", "PHONE_NUMBER"] # This analyzer engine is different from the global one, as this one # also loads SpaCy so it can detect the phone number entity analyzer_engine_with_spacy = AnalyzerEngine(self.loaded_registry) results = analyzer_engine_with_spacy.analyze(text, entities, language, all_fields=False) assert len(results) == 2 assert_result(results[0], "CREDIT_CARD", 14, 33, EntityRecognizer.MAX_SCORE) expected_score = UsPhoneRecognizer.MEDIUM_REGEX_SCORE + \ PatternRecognizer.CONTEXT_SIMILARITY_FACTOR # 0.5 + 0.35 = 0.85 assert_result(results[1], "PHONE_NUMBER", 48, 59, expected_score)
def test_person_full_name_complex(self): text = 'Richard (Ric) C. Henderson' results = self.prepare_and_analyze(nlp_engine, text) assert len(results) == 3 # Richard assert text[results[0].start:results[0].end] == "Richard" assert_result(results[0], entities[0], 0, 7, NER_STRENGTH) # Ric assert text[results[1].start:results[1].end] == "Ric" assert_result(results[1], entities[0], 9, 12, NER_STRENGTH) # C. Henderson assert text[results[2].start:results[2].end] == "C. Henderson" assert_result(results[2], entities[0], 14, 26, NER_STRENGTH)
def test_valid_credit_cards(self): # init number1 = '4012888888881881' number2 = '4012-8888-8888-1881' number3 = '4012 8888 8888 1881' results = credit_card_recognizer.analyze( '{} {} {}'.format(number1, number2, number3), entities) assert len(results) == 3 assert_result(results[0], entities[0], 0, 16, EntityRecognizer.MAX_SCORE) assert_result(results[1], entities[0], 17, 36, EntityRecognizer.MAX_SCORE) assert_result(results[2], entities[0], 37, 56, EntityRecognizer.MAX_SCORE)
def test_us_bank_account_no_context(self): num = '945456787654' results = us_bank_recognizer.analyze(num, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 12, 0.05)
def test_valid_uk_nhs_with_no_delimeters(self): num = '0032698674' results = nhs_recognizer.analyze(num, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 10, 1.0)
def test_valid_uk_nhs_with_spaces(self): num = '221 395 1837' results = nhs_recognizer.analyze(num, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 12, 1.0)