def analyze_perf(field_name, entity, runtime_threshold_micros):
    fieldType = common_pb2.FieldTypes()
    fieldType.name = field_name
    types = [fieldType]

    start_time = datetime.datetime.now()
    results = match.analyze_text(context + entity, types)
    analyze_time = datetime.datetime.now() - start_time

    print('--- analyze_time[{}]: {}.{} seconds'.format(
        types[0].name, analyze_time.seconds, analyze_time.microseconds))

    assert analyze_time.seconds < 1
    assert analyze_time.microseconds < runtime_threshold_micros
Beispiel #2
0
from analyzer import matcher, common_pb2
from tests import *

fieldType = common_pb2.FieldTypes()
fieldType.name = common_pb2.FieldTypesEnum.Name(common_pb2.US_SSN)
types = [fieldType]


def test_valid_us_ssn_very_weak_match():
    num1 = '078-051120'
    num2 = '07805-1120'
    results = match.analyze_text('{} {}'.format(num1, num2), types)

    assert len(results) == 2
    assert results[0].text == num1
    assert results[0].score > 0.01 and results[0].score < 0.31
    assert results[1].text == num2
    assert results[1].score > 0.01 and results[0].score < 0.31


def test_valid_us_ssn_weak_match():
    num = '078051120'
    results = match.analyze_text(num, types)

    assert len(results) == 1
    assert results[0].text == num
    assert results[0].score > 0.29 and results[0].score < 0.41


def test_valid_us_ssn_medium_match():
    num = '078-05-1120'