def test_create_from_expected_error():
    cs = CountSketch.create_from_expected_error(0.0001, 0.01)
    assert repr(cs) == "<CountSketch (5 x 271828209)>"
    assert len(cs) == 1359141045, 'Unexpected length'
    assert cs.sizeof() == 5436564180, 'Unexpected size in bytes'

    with pytest.raises(ValueError) as excinfo:
        cs = CountSketch.create_from_expected_error(0.001, 2)
    assert str(excinfo.value) == 'Error rate shell be in (0, 1)'

    with pytest.raises(ValueError) as excinfo:
        cs = CountSketch.create_from_expected_error(0.0000000001, 0.02)
    assert str(excinfo.value) == 'Deviation is too small. Not enough counters'
def test_init():
    cs = CountSketch(2, 4)
    assert cs.sizeof() == 32, 'Unexpected size in bytes'

    with pytest.raises(ValueError) as excinfo:
        cs = CountSketch(0, 5)
    assert str(excinfo.value) == 'At least one counter array is required'

    with pytest.raises(ValueError) as excinfo:
        cs = CountSketch(5, 0)
    assert str(excinfo.value) == (
        'The length of the counter array cannot be less then 1')
def test_len():
    cs = CountSketch(2, 4)
    assert len(cs) == 8
def test_frequency():
    cs = CountSketch(4, 100)

    cs.add("test")
    assert cs.frequency("test") == 1, "Can't find recently added element"
    assert cs.frequency("test_test") == 0, "False positive detected"
def test_add():
    cs = CountSketch(4, 100)

    for word in ["test", 1, {"hello": "world"}]:
        cs.add(word)
        assert cs.frequency(word) == 1, "Can't find frequency for element"
def test_repr():
    cs = CountSketch(2, 4)
    assert repr(cs) == "<CountSketch (2 x 4)>"

    cs = CountSketch.create_from_expected_error(0.1, 0.01)
    assert repr(cs) == "<CountSketch (5 x 272)>"
Exemple #7
0
    21,
    9,
    29,
    6,
    5,
    2,
    3,
    1,
    16,
    17,
    15,
    5,
    3,
    6,
    9,
    12,
]

if __name__ == '__main__':
    cs = CountSketch(5, 2000)

    print(cs)
    print("CS uses {} bytes in the memory".format(cs.sizeof()))

    for digit in DATASET:
        cs.add(digit)

    for digit in sorted(set(DATASET)):
        print("Element: {}. Freq.: {}, Est. Freq.: {}".format(
            digit, DATASET.count(digit), cs.frequency(digit)))
def test_size():
    cs = CountSketch(2, 4)

    element_size = array.array('i', [1]).itemsize
    assert cs.sizeof() == element_size * len(cs), "Unexpected size in bytes"