def __init__( self, name: str, number_tracker: NumberTracker = None, string_tracker: StringTracker = None, schema_tracker: SchemaTracker = None, counters: CountersTracker = None, frequent_items: FrequentItemsSketch = None, cardinality_tracker: HllSketch = None, ): # Handle default values if counters is None: counters = CountersTracker() if number_tracker is None: number_tracker = NumberTracker() if string_tracker is None: string_tracker = StringTracker() if schema_tracker is None: schema_tracker = SchemaTracker() if frequent_items is None: frequent_items = FrequentItemsSketch() if cardinality_tracker is None: cardinality_tracker = HllSketch() # Assign values self.column_name = name self.number_tracker = number_tracker self.string_tracker = string_tracker self.schema_tracker = schema_tracker self.counters = counters self.frequent_items = frequent_items self.cardinality_tracker = cardinality_tracker
def test_fallback_fallbacks_to_number_counter(): col = ColumnProfile("test") vals = ["a", "b", 1.0, 2.0] for v in vals: col.track(v) col.cardinality_tracker = HllSketch() summary = col.to_summary() assert summary.unique_count.estimate == summary.number_summary.unique_count.estimate
def test_fallback_number_counter(): col = ColumnProfile("test") vals = [1, 1.0, 2, 3, 4, 5, 6, 6.0, "text"] for v in vals: col.track(v) col.cardinality_tracker = HllSketch() summary = col.to_summary() assert summary.unique_count.estimate == summary.number_summary.unique_count.estimate
def from_protobuf(message): """ Load from a protobuf message Returns ------- column_profile : ColumnProfile """ return ColumnProfile( message.name, counters=CountersTracker.from_protobuf(message.counters), schema_tracker=SchemaTracker.from_protobuf(message.schema), number_tracker=NumberTracker.from_protobuf(message.numbers), string_tracker=StringTracker.from_protobuf(message.strings), frequent_items=FrequentItemsSketch.from_protobuf( message.frequent_items), cardinality_tracker=HllSketch.from_protobuf( message.cardinality_tracker), )