Esempio n. 1
0
def test_init():
    hll = HyperLogLog(10)
    assert hll.sizeof() == 4096, "Unexpected size in bytes"

    with pytest.raises(ValueError) as excinfo:
        hll = HyperLogLog(2)
    assert str(excinfo.value) == ("Precision has to be in range 4...16")
Esempio n. 2
0
def test_count():
    precision = 10
    hll = HyperLogLog(precision)
    std = 1.04 / sqrt(1 << precision)

    errors = []

    boundary = 2.5 * (1 << precision)

    cardinality = 0
    for i in range(100000):
        cardinality += 1
        element = "element_{}".format(i)
        hll.add(element)

        if cardinality <= boundary:
            # Ignore small cardinality estimations,
            # they will be tested in another test.
            continue

        error = (cardinality - hll.count()) / float(cardinality)
        errors.append(error)

    avg_error = abs(sum(errors)) / float(len(errors))

    assert avg_error >= 0
    assert avg_error <= std
Esempio n. 3
0
def test_count_small():
    precision = 6
    hll = HyperLogLog(precision)
    std = 1.04 / sqrt(1 << precision)

    short = LOREM_TEXT["text"].split()[:100]
    num_of_unique_words = len(set(short))

    for word in short:
        hll.add(word)

    cardinality = hll.count()
    assert cardinality >= (1 - 2 * std) * num_of_unique_words
    assert cardinality <= (1 + 2 * std) * num_of_unique_words
Esempio n. 4
0
def test_count_large():
    precision = 6
    hll = HyperLogLog(precision)

    # NOTE: make n larger than the HLL upper correction threshold
    boost = 143165576 // LOREM_TEXT["num_of_unique_words"] + 1
    num_of_unique_words = boost * LOREM_TEXT["num_of_unique_words"]

    for i in range(boost):
        for word in LOREM_TEXT["text"].split():
            hll.add("{}_{}".format(word, i))

    cardinality = hll.count()
    assert cardinality >= 0.7 * num_of_unique_words
    assert cardinality <= 1.3 * num_of_unique_words
Esempio n. 5
0
def test_count_small():
    precision = 6
    hll = HyperLogLog(precision)
    std = 1.04 / sqrt(1 << precision)

    errors = []

    cardinality = 0
    for i in range(100):
        cardinality += 1
        element = "element_{}".format(i)
        hll.add(element)

        error = (cardinality - hll.count()) / float(cardinality)
        errors.append(error)

    avg_error = abs(sum(errors)) / float(len(errors))

    assert avg_error >= 0
    assert avg_error <= std
Esempio n. 6
0
def test_count():
    precision = 6
    hll = HyperLogLog(precision)
    std = 1.04 / sqrt(1 << precision)

    assert hll.count() == 0

    boost = 50 * LOREM_TEXT["num_of_unique_words"]
    num_of_unique_words = boost * LOREM_TEXT["num_of_unique_words"]

    for i in range(boost):
        for word in LOREM_TEXT["text"].split():
            hll.add("{}_{}".format(word, i))

    cardinality = hll.count()
    assert cardinality >= (1 - 2 * std) * num_of_unique_words
    assert cardinality <= (1 + 2 * std) * num_of_unique_words
Esempio n. 7
0
def test_len():
    hll = HyperLogLog(4)
    assert len(hll) == 16
Esempio n. 8
0
def test_init():
    hll = HyperLogLog(10)

    with pytest.raises(ValueError) as excinfo:
        hll = HyperLogLog(2)
    assert str(excinfo.value) == ("Precision has to be in range 4...16")
Esempio n. 9
0
def test_add():
    hll = HyperLogLog(10)

    for element in ["test", 1, {"hello": "world"}]:
        hll.add(element)
Esempio n. 10
0
def test_repr():
    hll = HyperLogLog(6)

    assert repr(hll) == ("<HyperLogLog (length: 64, precision: 6)>")
Esempio n. 11
0
def test_size():
    hll = HyperLogLog(10)

    element_size = array.array('L', [1]).itemsize
    assert hll.sizeof() == element_size * len(hll), "Unexpected size in bytes"
Esempio n. 12
0
def test_add():
    hll = HyperLogLog(10)

    for word in ["test", 1, {"hello": "world"}]:
        hll.add(word)