Example #1
0
def test_protobuf():
    c = ColumnProfile("col")
    for val in [1, 2, 3]:
        c.track(val)
    msg = c.to_protobuf()
    c1 = ColumnProfile.from_protobuf(msg)
    assert c1.column_name == c.column_name == "col"
    assert hasattr(c1, "number_tracker")
    assert hasattr(c1, "string_tracker")
    assert c1.string_tracker.length is not None

    assert c1.string_tracker.length.count == 0
    assert len(c1.string_tracker.char_pos_tracker.character_list) == 56
    c1.to_protobuf()
Example #2
0
def test_copy_counters_null_count_in_schema_tracker():
    col = ColumnProfile("test")
    vals = ["a", "b", None, "d", pd.NA, "f", 1.0, 2.0]
    for v in vals:
        col.track(v)
    assert col.schema_tracker.get_count(InferredType.Type.NULL) == 2

    # ensuring we can still access the value in summary mode
    assert col.to_summary().counters.null_count.value == 2

    # Mimic a legal protobuf with null_count set
    msg: ColumnMessage = col.to_protobuf()
    msg.counters.null_count.value = 2

    roundtrip = ColumnProfile.from_protobuf(msg)
    assert roundtrip.schema_tracker.get_count(InferredType.Type.NULL) == 4
def test_protobuf():
    c = ColumnProfile("col")
    for val in [1, 2, 3]:
        c.track(val)
    msg = c.to_protobuf()
    c1 = ColumnProfile.from_protobuf(msg)
    assert c1.column_name == c.column_name == "col"
    assert hasattr(c1, "number_tracker")
    msg2 = c1.to_protobuf()
    # We cannot do a straight equality comparison for serialized frequent
    # strings objects
    compare_frequent_items(
        c1.number_tracker.frequent_numbers.get_frequent_items(),
        c.number_tracker.frequent_numbers.get_frequent_items(),
    )
    msg.numbers.frequent_numbers.sketch = bytes()
    msg2.numbers.frequent_numbers.sketch = bytes()