def test_sketch_roundtrip_serialize():
    sketch = make_sketch_and_track(ALL_VALS)
    msg = sketch.serialize()
    assert len(msg) == sketch.get_serialized_size_bytes()
    round_trip = dsketch.FrequentItemsSketch.deserialize(msg)
    compare_frequent_items(round_trip.get_frequent_items(),
                           sketch.get_frequent_items())
def test_frequent_items_correct(number_sketch):
    items = number_sketch.get_frequent_items()
    true = [
        (1, 3, 3, 3),
        (4, 2, 2, 2),
        (2, 1, 1, 1),
        (3, 1, 1, 1),
        (5.0, 1, 1, 1),
    ]
    compare_frequent_items(true, items)
Exemple #3
0
def test_merge_gives_correct_values():
    sketch = make_sketch_and_track(ALL_VALS)
    merged = sketch.merge(sketch.copy())
    items = sketch.get_frequent_items()
    merged_items = merged.get_frequent_items()
    # All counts should just be doubled!
    expected = []
    for item in items:
        new_item = (item[0], 2 * item[1], 2 * item[2], 2 * item[3])
        expected.append(new_item)
    compare_frequent_items(expected, merged_items)
Exemple #4
0
def test_merge_empty_gives_same_result():
    # Make sketches
    sketch = make_sketch_and_track(ALL_VALS)
    empty = dsketch.FrequentItemsSketch()
    items = sketch.get_frequent_items()
    # Merge empty into full
    merged = sketch.merge(empty)
    compare_frequent_items(items, merged.get_frequent_items())
    # Merge full into empty
    merged = empty.merge(sketch)
    compare_frequent_items(items, merged.get_frequent_items())
Exemple #5
0
def test_protobuf():
    c = ColumnProfile('col')
    for val in [1, 2, 3]:
        c.track(val)
    msg = c.to_protobuf()
    c1 = ColumnProfile.from_protobuf(msg)
    assert c1.column_name == c.column_name == 'col'
    assert hasattr(c1, 'number_tracker')
    msg2 = c1.to_protobuf()
    # We cannot do a straight equality comparison for serialized frequent
    # strings objects
    compare_frequent_items(
        c1.number_tracker.frequent_numbers.get_frequent_items(),
        c.number_tracker.frequent_numbers.get_frequent_items())
    msg.numbers.frequent_numbers.sketch = bytes()
    msg2.numbers.frequent_numbers.sketch = bytes()
Exemple #6
0
def test_frequent_items_correct():
    sketch = make_sketch_and_track(ALL_VALS)
    items = sketch.get_frequent_items()
    true_items = [
        (1, 3, 3, 3),
        (2, 1, 1, 1),
        (3, 1, 1, 1),
        (4, 2, 2, 2),
        (5.0, 2, 2, 2),
        (4.0, 1, 1, 1),
        (1.0e90, 1, 1, 1),
        (True, 2, 2, 2),
        (False, 1, 1, 1),
        ("a", 3, 3, 3),
        ("b", 2, 2, 2),
        ("hello world", 1, 1, 1),
        ("hello World", 1, 1, 1),
    ]
    compare_frequent_items(true_items, items)
def test_merge():
    x = NumberTracker()
    for v in [10, 11, 13]:
        x.track(v)

    merged = x.merge(x)

    assert merged.ints.count == 6
    assert merged.floats.count == 0
    assert merged.histogram.get_n() == 6
    assert merged.histogram.get_max_value() == 13.0
    assert merged.histogram.get_min_value() == 10.0
    expected_freq = [
        (10, 2, 2, 2),
        (11, 2, 2, 2),
        (13, 2, 2, 2),
    ]
    compare_frequent_items(expected_freq, merged.frequent_numbers.get_frequent_items())

    msg = merged.to_protobuf()
    NumberTracker.from_protobuf(msg)
Exemple #8
0
def test_copy_gives_same_results():
    sketch = make_sketch_and_track(ALL_VALS)
    copy = sketch.copy()
    compare_frequent_items(sketch.get_frequent_items(), copy.get_frequent_items())
Exemple #9
0
def test_protobuf_roundtrip():
    sketch = make_sketch_and_track(ALL_VALS)
    msg = sketch.to_protobuf()
    sketch2 = dsketch.FrequentItemsSketch.from_protobuf(msg)
    compare_frequent_items(sketch.get_frequent_items(), sketch2.get_frequent_items())