Ejemplo n.º 1
0
    def test_valid_us_itin_medium_match(self):
        num = '911-70-1234'
        results = us_itin_recognizer.analyze(num, entities)

        assert len(results) == 1
        assert_result_within_score_range(results[0], entities[0], 0, 11, 0.5,
                                         0.6)
    def test_valid_us_passport_no_context(self):
        num = '912803456'
        results = us_passport_recognizer.analyze(num, entities)

        assert len(results) == 1
        assert results[0].score != 0
        assert_result_within_score_range(results[0], entities[0], 0, 9, 0, 0.1)
Ejemplo n.º 3
0
    def test_valid_us_itin_weak_match(self):
        num = '911701234'
        results = us_itin_recognizer.analyze(num, entities)

        assert len(results) == 1
        assert_result_within_score_range(results[0], entities[0], 0, 9, 0.3,
                                         0.4)
Ejemplo n.º 4
0
    def test_phone_number_strong_match_no_context(self):
        number = '(425) 882 9090'
        results = phone_recognizer.analyze(number, entities)

        assert len(results) == 1
        assert results[0].score != 1
        assert_result_within_score_range(results[0], entities[0], 0, 14, 0.7,
                                         EntityRecognizer.MAX_SCORE)
Ejemplo n.º 5
0
    def test_valid_us_ssn_weak_match(self):
        num = '078051120'
        results = us_ssn_recognizer.analyze(num, entities)

        assert len(results) == 1
        assert results[0].score != 0
        assert_result_within_score_range(results[0], entities[0], 0, 9, 0.3,
                                         0.4)
Ejemplo n.º 6
0
    def test_date_time_full_date(self):
        text = 'May 1st, 1977'
        results = self.prepare_and_analyze(nlp_engine, text)

        assert len(results) == 1
        assert_result_within_score_range(results[0], entities[1], 0, 13,
                                         NER_STRENGTH,
                                         EntityRecognizer.MAX_SCORE)
Ejemplo n.º 7
0
    def test_valid_us_ssn_medium_match(self):
        num = '078-05-1120'
        results = us_ssn_recognizer.analyze(num, entities)

        assert len(results) == 1
        assert results[0].score != 0
        assert_result_within_score_range(results[0], entities[0], 0, 11, 0.5,
                                         0.6)
        assert 0.49 < results[0].score < 0.6
Ejemplo n.º 8
0
    def test_person_full_name_with_context(self):
        name = 'John Oliver'
        context = ' is the funniest comedian'
        text = '{} {}'.format(name, context)
        results = self.prepare_and_analyze(nlp_engine, text)

        assert len(results) == 1
        assert_result_within_score_range(results[0], entities[0], 0, 11,
                                         NER_STRENGTH,
                                         EntityRecognizer.MAX_SCORE)
Ejemplo n.º 9
0
    def test_person_first_name_with_context(self):
        name = 'Dan'
        context = 'my name is'
        text = '{} {}'.format(context, name)

        results = self.prepare_and_analyze(nlp_engine, text)
        assert len(results) == 1
        assert_result_within_score_range(results[0], entities[0], 11, 14,
                                         NER_STRENGTH,
                                         EntityRecognizer.MAX_SCORE)
Ejemplo n.º 10
0
    def test_date_time_day_in_month_with_year_with_context(self):
        date = 'May 1st, 1977'
        context = 'I bought my car on'
        text = '{} {}'.format(context, date)
        results = self.prepare_and_analyze(nlp_engine, text)

        assert len(results) == 1
        assert_result_within_score_range(results[0], entities[1], 19, 32,
                                         NER_STRENGTH,
                                         EntityRecognizer.MAX_SCORE)
Ejemplo n.º 11
0
 def test_person_title_and_last_name_is_also_a_date_with_context_expected_person_only(
         self):
     name = 'Mr. May'
     context = "They call me"
     text = '{} {}'.format(context, name)
     results = self.prepare_and_analyze(nlp_engine, text)
     assert len(results) == 1
     assert_result_within_score_range(results[0], entities[0], 17, 20,
                                      NER_STRENGTH,
                                      EntityRecognizer.MAX_SCORE)
    def test_valid_us_driver_license_weak_WA(self):
        num1 = 'AA1B2**9ABA7'
        num2 = 'A*1234AB*CD9'
        results = us_license_recognizer.analyze('{} {}'.format(num1, num2),
                                                entities)

        assert len(results) == 2
        assert_result_within_score_range(results[0], entities[0], 0, 12, 0.3,
                                         0.4)
        assert_result_within_score_range(results[1], entities[0], 13, 25, 0.3,
                                         0.4)
Ejemplo n.º 13
0
    def test_person_last_name_is_also_a_date_with_context_expected_person_only(
            self):
        name = 'Dan May'
        context = "has a bank account"
        text = '{} {}'.format(name, context)
        results = self.prepare_and_analyze(nlp_engine, text)

        assert len(results) == 1
        print(results[0].score)
        print(results[0].entity_type)
        print(text[results[0].start:results[0].end])
        assert_result_within_score_range(results[0], entities[0], 0, 7,
                                         NER_STRENGTH,
                                         EntityRecognizer.MAX_SCORE)
Ejemplo n.º 14
0
    def test_valid_us_ssn_very_weak_match(self):
        num1 = '078-051120'
        num2 = '07805-1120'
        results = us_ssn_recognizer.analyze('{} {}'.format(num1, num2),
                                            entities)

        assert len(results) == 2

        assert results[0].score != 0
        assert_result_within_score_range(results[0], entities[0], 0, 10, 0,
                                         0.3)

        assert results[0].score != 0
        assert_result_within_score_range(results[1], entities[0], 11, 21, 0,
                                         0.3)