Beispiel #1
0
def test_init():
    cms = CountMinSketch(2, 4)

    with pytest.raises(ValueError) as excinfo:
        cms = CountMinSketch(0, 5)
    assert str(excinfo.value) == 'At least one counter array is required'

    with pytest.raises(ValueError) as excinfo:
        cms = CountMinSketch(5, 0)
    assert str(excinfo.value) == (
        'The length of the counter array cannot be less then 1')
Beispiel #2
0
def test_create_from_expected_error():
    cms = CountMinSketch.create_from_expected_error(0.000001, 0.01)
    assert repr(cms) == "<CountMinSketch (5 x 2718282)>"
    assert len(cms) == 13591410, 'Unexpected length'
    assert cms.sizeof() == 54365640, 'Unexpected size in bytes'

    with pytest.raises(ValueError) as excinfo:
        cms = CountMinSketch.create_from_expected_error(0.001, 2)
    assert str(excinfo.value) == 'Error rate shell be in (0, 1)'

    with pytest.raises(ValueError) as excinfo:
        cs = CountMinSketch.create_from_expected_error(0.0000000000001, 0.02)
    assert str(excinfo.value) == 'Deviation is too small. Not enough counters'
    21,
    9,
    29,
    6,
    5,
    2,
    3,
    1,
    16,
    17,
    15,
    5,
    3,
    6,
    9,
    12,
]

if __name__ == '__main__':
    cms = CountMinSketch(4, 100)

    print(cms)
    print("CMS uses {} bytes in the memory".format(cms.sizeof()))

    for digit in DATASET:
        cms.add(digit)

    for digit in sorted(set(DATASET)):
        print("Element: {}. Freq.: {}, Est. Freq.: {}".format(
            digit, DATASET.count(digit), cms.frequency(digit)))
Beispiel #4
0
def test_len():
    cms = CountMinSketch(2, 4)
    assert len(cms) == 8
Beispiel #5
0
def test_frequency():
    cms = CountMinSketch(4, 100)

    cms.add("test")
    assert cms.frequency("test") == 1, "Can't find recently added element"
    assert cms.frequency("test_test") == 0, "False positive detected"
Beispiel #6
0
def test_add():
    cms = CountMinSketch(4, 100)

    for word in ["test", 1, {"hello": "world"}]:
        cms.add(word)
        assert cms.frequency(word) == 1, "Can't find frequency for element"
Beispiel #7
0
def test_repr():
    cms = CountMinSketch(2, 4)
    assert repr(cms) == "<CountMinSketch (2 x 4)>"

    cms = CountMinSketch.create_from_expected_error(0.1, 0.01)
    assert repr(cms) == "<CountMinSketch (5 x 28)>"
Beispiel #8
0
def test_size():
    cms = CountMinSketch(2, 4)

    element_size = array.array('I', [1]).itemsize
    assert cms.sizeof() == element_size * len(cms), "Unexpected size in bytes"