def test_correct_num_active_items(): sketch = make_sketch_and_track(ALL_VALS) assert sketch.get_num_active_items() == 13 sketch = dsketch.FrequentItemsSketch() assert sketch.get_num_active_items() == 0 sketch.update(1) assert sketch.get_num_active_items() == 1
def test_null_lg_max_k_protobuf_value(): sketch = dsketch.FrequentItemsSketch() sketch.lg_max_k = None msg = sketch.to_protobuf() assert msg.lg_max_k == -1 sketch2 = dsketch.FrequentItemsSketch.from_protobuf(msg) assert sketch2.lg_max_k is None
def test_apriori_error_equal_datasketches_error(): # Try it out for a few combos of map sizes map_sizes = [1, 2, 32, 120] weights = [0, 10, 50, 100] sketch = dsketch.FrequentItemsSketch(LG_K) strings_sketch = datasketches.frequent_strings_sketch(LG_K) for map_size, w in zip(map_sizes, weights): assert sketch.get_apriori_error(map_size, w) == strings_sketch.get_apriori_error(map_size, w)
def test_copy_returns_correct_type(): # Empty sketch sketch = dsketch.FrequentItemsSketch() copy = sketch.copy() assert type(copy) == type(sketch) # Sketch with an item sketch.update(1) copy = sketch.copy() assert type(copy) == type(sketch)
def test_merge_empty_gives_same_result(): # Make sketches sketch = make_sketch_and_track(ALL_VALS) empty = dsketch.FrequentItemsSketch() items = sketch.get_frequent_items() # Merge empty into full merged = sketch.merge(empty) compare_frequent_items(items, merged.get_frequent_items()) # Merge full into empty merged = empty.merge(sketch) compare_frequent_items(items, merged.get_frequent_items())
def test_total_weight_correct(): # Default weights sketch = make_sketch_and_track(STRINGS) assert sketch.get_total_weight() == len(STRINGS) # Integer weights sketch = dsketch.FrequentItemsSketch() weights = list(range(len(ALL_VALS))) for v, w in zip(ALL_VALS, weights): sketch.update(v, w) assert sketch.get_total_weight() == sum(weights)
def test_empty_sketch_summary_returns_none(): sketch = dsketch.FrequentItemsSketch() summary = sketch.to_summary() assert summary is None
def test_copy_empty_returns_empty(): sketch = dsketch.FrequentItemsSketch() assert sketch.copy().is_empty()
def test_empty_sketch_returns_empty(): assert dsketch.FrequentItemsSketch(32).is_empty()
def make_sketch_and_track(vals=[], lg_k=LG_K): sketch = dsketch.FrequentItemsSketch(lg_k) for v in vals: sketch.update(v) return sketch
def test_string_epsilon_equals_datasketches(): sketch = dsketch.FrequentItemsSketch(LG_K) strings_sketch = datasketches.frequent_strings_sketch(LG_K) for lg in [1, 3, 32, 120]: assert sketch.get_epsilon_for_lg_size(lg) == strings_sketch.get_epsilon_for_lg_size(lg)