Ejemplo n.º 1
0
def test_correct_num_active_items():
    sketch = make_sketch_and_track(ALL_VALS)
    assert sketch.get_num_active_items() == 13
    sketch = dsketch.FrequentItemsSketch()
    assert sketch.get_num_active_items() == 0
    sketch.update(1)
    assert sketch.get_num_active_items() == 1
Ejemplo n.º 2
0
def test_null_lg_max_k_protobuf_value():
    sketch = dsketch.FrequentItemsSketch()
    sketch.lg_max_k = None
    msg = sketch.to_protobuf()
    assert msg.lg_max_k == -1

    sketch2 = dsketch.FrequentItemsSketch.from_protobuf(msg)
    assert sketch2.lg_max_k is None
Ejemplo n.º 3
0
def test_apriori_error_equal_datasketches_error():
    # Try it out for a few combos of map sizes
    map_sizes = [1, 2, 32, 120]
    weights = [0, 10, 50, 100]
    sketch = dsketch.FrequentItemsSketch(LG_K)
    strings_sketch = datasketches.frequent_strings_sketch(LG_K)
    for map_size, w in zip(map_sizes, weights):
        assert sketch.get_apriori_error(map_size, w) == strings_sketch.get_apriori_error(map_size, w)
Ejemplo n.º 4
0
def test_copy_returns_correct_type():
    # Empty sketch
    sketch = dsketch.FrequentItemsSketch()
    copy = sketch.copy()
    assert type(copy) == type(sketch)

    # Sketch with an item
    sketch.update(1)
    copy = sketch.copy()
    assert type(copy) == type(sketch)
Ejemplo n.º 5
0
def test_merge_empty_gives_same_result():
    # Make sketches
    sketch = make_sketch_and_track(ALL_VALS)
    empty = dsketch.FrequentItemsSketch()
    items = sketch.get_frequent_items()
    # Merge empty into full
    merged = sketch.merge(empty)
    compare_frequent_items(items, merged.get_frequent_items())
    # Merge full into empty
    merged = empty.merge(sketch)
    compare_frequent_items(items, merged.get_frequent_items())
Ejemplo n.º 6
0
def test_total_weight_correct():
    # Default weights
    sketch = make_sketch_and_track(STRINGS)
    assert sketch.get_total_weight() == len(STRINGS)

    # Integer weights
    sketch = dsketch.FrequentItemsSketch()
    weights = list(range(len(ALL_VALS)))
    for v, w in zip(ALL_VALS, weights):
        sketch.update(v, w)
    assert sketch.get_total_weight() == sum(weights)
Ejemplo n.º 7
0
def test_empty_sketch_summary_returns_none():
    sketch = dsketch.FrequentItemsSketch()
    summary = sketch.to_summary()
    assert summary is None
Ejemplo n.º 8
0
def test_copy_empty_returns_empty():
    sketch = dsketch.FrequentItemsSketch()
    assert sketch.copy().is_empty()
Ejemplo n.º 9
0
def test_empty_sketch_returns_empty():
    assert dsketch.FrequentItemsSketch(32).is_empty()
Ejemplo n.º 10
0
def make_sketch_and_track(vals=[], lg_k=LG_K):
    sketch = dsketch.FrequentItemsSketch(lg_k)
    for v in vals:
        sketch.update(v)
    return sketch
Ejemplo n.º 11
0
def test_string_epsilon_equals_datasketches():
    sketch = dsketch.FrequentItemsSketch(LG_K)
    strings_sketch = datasketches.frequent_strings_sketch(LG_K)
    for lg in [1, 3, 32, 120]:
        assert sketch.get_epsilon_for_lg_size(lg) == strings_sketch.get_epsilon_for_lg_size(lg)