def test_create_from_expected_error(): cs = CountSketch.create_from_expected_error(0.0001, 0.01) assert repr(cs) == "<CountSketch (5 x 271828209)>" assert len(cs) == 1359141045, 'Unexpected length' assert cs.sizeof() == 5436564180, 'Unexpected size in bytes' with pytest.raises(ValueError) as excinfo: cs = CountSketch.create_from_expected_error(0.001, 2) assert str(excinfo.value) == 'Error rate shell be in (0, 1)' with pytest.raises(ValueError) as excinfo: cs = CountSketch.create_from_expected_error(0.0000000001, 0.02) assert str(excinfo.value) == 'Deviation is too small. Not enough counters'
def test_init(): cs = CountSketch(2, 4) assert cs.sizeof() == 32, 'Unexpected size in bytes' with pytest.raises(ValueError) as excinfo: cs = CountSketch(0, 5) assert str(excinfo.value) == 'At least one counter array is required' with pytest.raises(ValueError) as excinfo: cs = CountSketch(5, 0) assert str(excinfo.value) == ( 'The length of the counter array cannot be less then 1')
def test_len(): cs = CountSketch(2, 4) assert len(cs) == 8
def test_frequency(): cs = CountSketch(4, 100) cs.add("test") assert cs.frequency("test") == 1, "Can't find recently added element" assert cs.frequency("test_test") == 0, "False positive detected"
def test_add(): cs = CountSketch(4, 100) for word in ["test", 1, {"hello": "world"}]: cs.add(word) assert cs.frequency(word) == 1, "Can't find frequency for element"
def test_repr(): cs = CountSketch(2, 4) assert repr(cs) == "<CountSketch (2 x 4)>" cs = CountSketch.create_from_expected_error(0.1, 0.01) assert repr(cs) == "<CountSketch (5 x 272)>"
21, 9, 29, 6, 5, 2, 3, 1, 16, 17, 15, 5, 3, 6, 9, 12, ] if __name__ == '__main__': cs = CountSketch(5, 2000) print(cs) print("CS uses {} bytes in the memory".format(cs.sizeof())) for digit in DATASET: cs.add(digit) for digit in sorted(set(DATASET)): print("Element: {}. Freq.: {}, Est. Freq.: {}".format( digit, DATASET.count(digit), cs.frequency(digit)))
def test_size(): cs = CountSketch(2, 4) element_size = array.array('i', [1]).itemsize assert cs.sizeof() == element_size * len(cs), "Unexpected size in bytes"