Example #1
0
    def test_valid_us_itin_weak_match(self):
        num = '911701234'
        results = us_itin_recognizer.analyze(num, entities)

        assert len(results) == 1
        assert_result_within_score_range(results[0], entities[0], 0, 9, 0.3,
                                         0.4)
Example #2
0
def test_valid_us_ssn_weak_match():
    num = '078051120'
    results = us_ssn_recognizer.analyze(num, entities)

    assert len(results) == 1
    assert results[0].score != 0
    assert_result_within_score_range(results[0], entities[0], 0, 9, 0.3, 0.4)
    def test_date_time_full_date(self):
        text = 'May 1st, 1977'
        results = self.prepare_and_analyze(nlp_engine, text)

        assert len(results) == 1
        assert_result_within_score_range(
            results[0], entities[1], 0, 13, NER_STRENGTH, EntityRecognizer.MAX_SCORE)
Example #4
0
    def test_valid_us_passport_no_context(self):
        num = '912803456'
        results = us_passport_recognizer.analyze(num, entities)

        assert len(results) == 1
        assert results[0].score != 0
        assert_result_within_score_range(results[0], entities[0], 0, 9, 0, 0.1)
 def test_person_title_and_last_name_is_also_a_date_with_context_expected_person_only(self):
     name = 'Mr. May'
     context = "They call me"
     text = '{} {}'.format(context, name)
     results = self.prepare_and_analyze(nlp_engine, text)
     assert len(results) == 1
     assert_result_within_score_range(results[0], entities[0], 17, 20, NER_STRENGTH, EntityRecognizer.MAX_SCORE)
Example #6
0
    def test_valid_us_itin_medium_match(self):
        num = '911-70-1234'
        results = us_itin_recognizer.analyze(num, entities)

        assert len(results) == 1
        assert_result_within_score_range(results[0], entities[0], 0, 11, 0.5,
                                         0.6)
    def test_phone_number_strong_match_no_context(self):
        number = '(425) 882 9090'
        results = phone_recognizer.analyze(number, entities)

        assert len(results) == 1
        assert results[0].score != 1
        assert_result_within_score_range(results[0], entities[0], 0, 14, 0.7,
                                         EntityRecognizer.MAX_SCORE)
Example #8
0
def test_valid_us_ssn_medium_match():
    num = '078-05-1120'
    results = us_ssn_recognizer.analyze(num, entities)

    assert len(results) == 1
    assert results[0].score != 0
    assert_result_within_score_range(results[0], entities[0], 0, 11, 0.5, 0.6)
    assert 0.49 < results[0].score < 0.6
Example #9
0
    def test_valid_ipv4(self):
        ip = '192.168.0.1'
        context = 'microsoft.com '
        results = ip_recognizer.analyze(context + ip, entities)

        assert len(results) == 1
        assert_result_within_score_range(
            results[0], entities[0], 14, 25, 0.6, 0.81)
    def test_date_time_day_in_month_with_year_with_context(self):
        date = 'May 1st, 1977'
        context = 'I bought my car on'
        text = '{} {}'.format(context, date)
        results = self.prepare_and_analyze(nlp_engine, text)

        assert len(results) == 1
        assert_result_within_score_range(
            results[0], entities[1], 19, 32, NER_STRENGTH, EntityRecognizer.MAX_SCORE)
    def test_person_first_name_with_context(self):
        name = 'Dan'
        context = 'my name is'
        text = '{} {}'.format(context, name)

        results = self.prepare_and_analyze(nlp_engine, text)
        assert len(results) == 1
        assert_result_within_score_range(
            results[0], entities[0], 11, 14, NER_STRENGTH, EntityRecognizer.MAX_SCORE)
    def test_person_full_name_with_context(self):
        name = 'John Oliver'
        context = ' is the funniest comedian'
        text = '{}{}'.format(name, context)
        results = self.prepare_and_analyze(nlp_engine, text)

        assert len(results) == 1
        assert_result_within_score_range(
            results[0], entities[0], 0, 11, NER_STRENGTH, EntityRecognizer.MAX_SCORE)
    def test_valid_us_driver_license_weak_WA(self):
        num1 = 'AA1B2**9ABA7'
        num2 = 'A*1234AB*CD9'
        results = us_license_recognizer.analyze('{} {}'.format(num1, num2),
                                                entities)

        assert len(results) == 2
        assert_result_within_score_range(results[0], entities[0], 0, 12, 0.3,
                                         0.4)
        assert_result_within_score_range(results[1], entities[0], 13, 25, 0.3,
                                         0.4)
Example #14
0
def test_valid_us_ssn_very_weak_match():
    num1 = '078-051120'
    num2 = '07805-1120'
    results = us_ssn_recognizer.analyze('{} {}'.format(num1, num2), entities)

    assert len(results) == 2

    assert results[0].score != 0
    assert_result_within_score_range(results[0], entities[0], 0, 10, 0, 0.3)

    assert results[0].score != 0
    assert_result_within_score_range(results[1], entities[0], 11, 21, 0, 0.3)
    def test_person_last_name_is_also_a_date_with_context_expected_person_only(self):
        name = 'Dan May'
        context = "has a bank account"
        text = '{} {}'.format(name, context)
        results = self.prepare_and_analyze(nlp_engine, text)

        assert len(results) == 1
        print(results[0].score)
        print(results[0].entity_type)
        print(text[results[0].start:results[0].end])
        assert_result_within_score_range(
            results[0], entities[0], 0, 7, NER_STRENGTH, EntityRecognizer.MAX_SCORE)
Example #16
0
    def test_valid_us_itin_very_weak_match(self):
        num1 = '911-701234'
        num2 = '91170-1234'
        results = us_itin_recognizer.analyze('{} {}'.format(num1, num2),
                                             entities)

        assert len(results) == 2

        assert results[0].score != 0
        assert_result_within_score_range(results[0], entities[0], 0, 10, 0,
                                         0.3)

        assert results[1].score != 0
        assert_result_within_score_range(results[1], entities[0], 11, 21, 0,
                                         0.3)
Example #17
0
def test_when_using_spacy_then_all_spacy_result_found(
    text,
    expected_len,
    expected_positions,
    entity_num,
    nlp_engine,
    nlp_recognizer,
    entities,
    ner_strength,
    max_score,
):
    results = prepare_and_analyze(nlp_engine, nlp_recognizer, text, entities)
    assert len(results) == expected_len
    entity_to_check = entities[entity_num]
    for res, (st_pos, fn_pos) in zip(results, expected_positions):
        assert_result_within_score_range(res, entity_to_check, st_pos, fn_pos,
                                         ner_strength, max_score)
Example #18
0
def test_when_driver_licenes_in_text_then_all_us_driver_licenses_found(
    text,
    expected_len,
    expected_positions,
    expected_score_ranges,
    recognizer,
    entities,
    max_score,
):
    results = recognizer.analyze(text, entities)
    assert len(results) == expected_len
    for res, (st_pos, fn_pos), (st_score,
                                fn_score) in zip(results, expected_positions,
                                                 expected_score_ranges):
        if fn_score == "max":
            fn_score = max_score
        assert_result_within_score_range(res, entities[0], st_pos, fn_pos,
                                         st_score, fn_score)
Example #19
0
def test_all_us_passports(
    text,
    expected_len,
    expected_positions,
    expected_score_ranges,
    recognizer,
    entities,
    max_score,
):
    results = recognizer.analyze(text, entities)
    assert len(results) == expected_len
    for res, (st_pos, fn_pos), (st_score,
                                fn_score) in zip(results, expected_positions,
                                                 expected_score_ranges):
        if fn_score == "max":
            fn_score = max_score
        assert_result_within_score_range(res, entities[0], st_pos, fn_pos,
                                         st_score, fn_score)
def test_when_snn_in_text_than_all_us_ssns_are_found(
    text,
    expected_len,
    expected_positions,
    expected_score_ranges,
    recognizer,
    entities,
    max_score,
):
    results = recognizer.analyze(text, entities)
    results = sorted(results, key=lambda x: x.start)
    assert len(results) == expected_len
    for res, (st_pos, fn_pos), (st_score,
                                fn_score) in zip(results, expected_positions,
                                                 expected_score_ranges):
        if fn_score == "max":
            fn_score = max_score
        assert_result_within_score_range(res, entities[0], st_pos, fn_pos,
                                         st_score, fn_score)
Example #21
0
def test_assert_result_within_score_range_uses_given_range():
    result = RecognizerResult(ENTITY_TYPE, 0, 10, 0.3)
    assert_result_within_score_range(result, ENTITY_TYPE, 0, 10, 0.2, 0.4)

    result = RecognizerResult(ENTITY_TYPE, 0, 10, 0.1)
    assert_result_within_score_range(result, ENTITY_TYPE, 0, 10, 0.05, 0.15)

    result = RecognizerResult(ENTITY_TYPE, 0, 10, 0.9)
    assert_result_within_score_range(result, ENTITY_TYPE, 0, 10, 0.89, 0.91)
Example #22
0
def test_assert_result_within_score_range_uses_given_range_fails():

    with pytest.raises(AssertionError):
        result = RecognizerResult(ENTITY_TYPE, 0, 10, 0.3)
        assert_result_within_score_range(result, ENTITY_TYPE, 0, 10, 0.4, 0.6)

    with pytest.raises(AssertionError):
        result = RecognizerResult(ENTITY_TYPE, 0, 10, 0)
        assert_result_within_score_range(result, ENTITY_TYPE, 0, 10, 0.4, 0.6)

    with pytest.raises(AssertionError):
        result = RecognizerResult(ENTITY_TYPE, 0, 10, 1)
        assert_result_within_score_range(result, ENTITY_TYPE, 0, 10, 0, 0.5)