def analyze_perf(field_name, entity, runtime_threshold_micros): fieldType = common_pb2.FieldTypes() fieldType.name = field_name types = [fieldType] start_time = datetime.datetime.now() results = match.analyze_text(context + entity, types) analyze_time = datetime.datetime.now() - start_time print('--- analyze_time[{}]: {}.{} seconds'.format( types[0].name, analyze_time.seconds, analyze_time.microseconds)) assert analyze_time.seconds < 1 assert analyze_time.microseconds < runtime_threshold_micros
from analyzer import matcher, common_pb2 from tests import * fieldType = common_pb2.FieldTypes() fieldType.name = common_pb2.FieldTypesEnum.Name(common_pb2.US_SSN) types = [fieldType] def test_valid_us_ssn_very_weak_match(): num1 = '078-051120' num2 = '07805-1120' results = match.analyze_text('{} {}'.format(num1, num2), types) assert len(results) == 2 assert results[0].text == num1 assert results[0].score > 0.01 and results[0].score < 0.31 assert results[1].text == num2 assert results[1].score > 0.01 and results[0].score < 0.31 def test_valid_us_ssn_weak_match(): num = '078051120' results = match.analyze_text(num, types) assert len(results) == 1 assert results[0].text == num assert results[0].score > 0.29 and results[0].score < 0.41 def test_valid_us_ssn_medium_match(): num = '078-05-1120'