Ejemplo n.º 1
0
def test_numeric_properties_with_integers(thresh_dist, resolution, candidate):
    tokens = NumericComparison(thresh_dist,
                               resolution).tokenize(str(candidate))
    assert len(tokens) == 2 * resolution + 1, "unexpected number of tokens"
    tokens_again = NumericComparison(thresh_dist,
                                     resolution).tokenize(str(candidate))
    assert tokens == tokens_again, "NumericComparison should be deterministic"
    assert len(set(tokens)) == 2 * resolution + 1, "tokens should be unique"
Ejemplo n.º 2
0
def test_numeric_properties(thresh_dist, resolution, precision, candidate):
    adj_dist = thresh_dist * pow(10, precision)
    if int(round(adj_dist)) <= 0:
        with pytest.raises(ValueError):
            NumericComparison(thresh_dist, resolution, precision)
    else:
        tokens = NumericComparison(thresh_dist, resolution,
                                   precision).tokenize(str(candidate))
        assert len(tokens) == 2 * resolution + 1, "unexpected number of tokens"
        tokens_again = NumericComparison(thresh_dist, resolution,
                                         precision).tokenize(str(candidate))
        assert tokens == tokens_again, "NumericComparison should be deterministic"
        assert len(
            set(tokens)) == 2 * resolution + 1, "tokens should be unique"
Ejemplo n.º 3
0
def test_numeric_exceptions():
    with pytest.raises(ValueError) as ex_info:
        NumericComparison(0, 1, 1)
    assert "threhold_distance has to be positive" in str(ex_info.value)
    with pytest.raises(ValueError) as ex_info:
        NumericComparison(1, 0, 1)
    assert "resolution has to be greater than zero" in str(ex_info.value)
    with pytest.raises(ValueError) as ex_info:
        NumericComparison(1, 1, -1)
    assert "fractional_precision" in str(
        ex_info.value) and "less than zero" in str(ex_info.value)
    with pytest.raises(ValueError) as ex_info:
        NumericComparison(0.001, 1, 2)
    assert "not enough fractional precision" in str(ex_info.value)
Ejemplo n.º 4
0
def test_numeric_overlaps(thresh_dist, resolution, precision, candidate):
    assume(int(round(thresh_dist * pow(10, precision))) > 0)
    comp = NumericComparison(threshold_distance=thresh_dist,
                             resolution=resolution,
                             fractional_precision=precision)
    cand_tokens = comp.tokenize(str(candidate))
    numbers = [candidate + thresh_dist * (i * 0.1) for i in range(20)]

    def overlap(other):
        other_tokens = comp.tokenize(str(other))
        return len(set(cand_tokens).intersection(set(other_tokens)))

    overlaps = [overlap(num) for num in numbers]
    assert overlaps[0] == len(cand_tokens)
    assert overlaps[-1] == 0
    assert all(x >= y for x, y in zip(overlaps, overlaps[1:])
               ), 'with increasing distance, the overlap reduces'
Ejemplo n.º 5
0
def test_numeric_overlaps_around_threshdistance(thresh_dist, resolution,
                                                precision, candidate):
    assume(int(round(thresh_dist * pow(10, precision))) > 0)
    comp = NumericComparison(threshold_distance=thresh_dist,
                             resolution=resolution,
                             fractional_precision=precision)
    other = candidate + thresh_dist
    cand_tokens = comp.tokenize(str(candidate))
    other_tokens = comp.tokenize(str(other))
    if other != candidate:
        assert len(
            set(cand_tokens).intersection(set(other_tokens))
        ) == 1, "numbers exactly thresh_dist apart have 1 token in common"
    other = candidate + thresh_dist * 1.51  # 0.5 because of the modulo operation
    assume((other - candidate) > (1.5 * thresh_dist)
           )  # because of fp precision errors, 'other might not have changed'
    other_tokens = comp.tokenize(str(other))
    assert len(
        set(cand_tokens).intersection(set(other_tokens))
    ) == 0, "numbers more than thresh_dist apart have no tokens in common"
    other = candidate + (thresh_dist / (2 * resolution) * random.random())
    other_tokens = comp.tokenize(str(other))
    assert len(set(cand_tokens).intersection(set(other_tokens))) >= len(
        cand_tokens
    ) - 2, "numbers that are not more than the modulus apart have all or all - 2 tokens in common"
Ejemplo n.º 6
0
def test_numeric_overlaps_with_integers(thresh_dist, resolution, candidate):
    comp = NumericComparison(threshold_distance=thresh_dist,
                             resolution=resolution,
                             fractional_precision=0)
    other = candidate + thresh_dist
    cand_tokens = comp.tokenize(str(candidate))
    other_tokens = comp.tokenize(str(other))
    if other != candidate:
        assert len(
            set(cand_tokens).intersection(set(other_tokens))
        ) == 1, "numbers exactly thresh_dist apart have 1 token in common"
    other = candidate + thresh_dist + int(math.ceil(thresh_dist / 2))
    other_tokens = comp.tokenize(str(other))
    assert len(
        set(cand_tokens).intersection(set(other_tokens))
    ) == 0, "numbers more than thresh_dist apart have no tokens in common"
    modulus = int(thresh_dist / (2 * resolution))
    if modulus > 0:
        other = candidate + random.randrange(modulus)
        other_tokens = comp.tokenize(str(other))
        assert len(set(cand_tokens).intersection(set(other_tokens))) >= len(
            cand_tokens
        ) - 2, "numbers that are not more than the modulus apart have all or all - 2 tokens in common"

    if thresh_dist < 20:
        numbers = [candidate + i for i in range(thresh_dist + 10)]
    else:
        numbers = [candidate + int(thresh_dist * (i * 0.1)) for i in range(20)]

    def overlap(other):
        other_tokens = comp.tokenize(str(other))
        return len(set(cand_tokens).intersection(set(other_tokens)))

    overlaps = [overlap(num) for num in numbers]
    assert overlaps[0] == len(cand_tokens)
    assert overlaps[-1] == 0
    assert all(x >= y for x, y in zip(overlaps, overlaps[1:])
               ), 'with increasing distance, the overlap reduces'
Ejemplo n.º 7
0
def test_numeric_empty_input():
    comp = NumericComparison(1, 20, 3)
    assert list(comp.tokenize('')) == []
Ejemplo n.º 8
0
def test_numeric_mix_int_and_floats():
    comp = NumericComparison(1, 20, 3)
    assert comp.tokenize('42') == comp.tokenize('42.000')