def from_protobuf(message: NumbersMessage): """ Load from a protobuf message Returns ------- number_tracker : NumberTracker """ theta = None if message.theta is not None and len(message.theta) > 0: theta = ThetaSketch.deserialize(message.theta) elif message.compact_theta is not None and len(message.compact_theta) > 0: theta = ThetaSketch.deserialize(message.compact_theta) opts = dict( theta_sketch=theta, variance=VarianceTracker.from_protobuf(message.variance), histogram=dsketch.deserialize_kll_floats_sketch(message.histogram), frequent_numbers=dsketch.FrequentNumbersSketch.from_protobuf( message.frequent_numbers ), ) if message.HasField("doubles"): opts["floats"] = FloatTracker.from_protobuf(message.doubles) if message.HasField("longs"): opts["ints"] = IntTracker.from_protobuf(message.longs) return NumberTracker(**opts)
def __init__( self, variance: VarianceTracker = None, floats: FloatTracker = None, ints: IntTracker = None, theta_sketch: ThetaSketch = None, histogram: datasketches.kll_floats_sketch = None, frequent_numbers: dsketch.FrequentNumbersSketch = None, ): # Our own trackers if variance is None: variance = VarianceTracker() if floats is None: floats = FloatTracker() if ints is None: ints = IntTracker() if theta_sketch is None: theta_sketch = ThetaSketch() if histogram is None: histogram = datasketches.kll_floats_sketch(DEFAULT_HIST_K) if frequent_numbers is None: frequent_numbers = dsketch.FrequentNumbersSketch() self.variance = variance self.floats = floats self.ints = ints self.theta_sketch = theta_sketch self.histogram = histogram self.frequent_numbers = frequent_numbers
def from_protobuf(message: NumbersMessage): """ Load from a protobuf message Returns ------- number_tracker : NumberTracker """ theta = None if message.compact_theta is not None and len( message.compact_theta) > 0: theta = ThetaSketch.deserialize(message.compact_theta) elif message.theta is not None and len(message.theta) > 0: logger.warning( "Possible missing data. Non-compact theta sketches are no longer supported" ) opts = dict( theta_sketch=theta, variance=VarianceTracker.from_protobuf(message.variance), histogram=dsketch.deserialize_kll_floats_sketch(message.histogram), ) if message.HasField("doubles"): opts["floats"] = FloatTracker.from_protobuf(message.doubles) if message.HasField("longs"): opts["ints"] = IntTracker.from_protobuf(message.longs) return NumberTracker(**opts)
def from_protobuf(message: StringsMessage): """ Load from a protobuf message Returns ------- string_tracker : StringTracker """ theta = None if message.compact_theta is not None and len( message.compact_theta) > 0: theta = ThetaSketch.deserialize(message.compact_theta) elif message.theta is not None and len(message.theta) > 0: logger.warning( "Possible missing data. Non-compact theta sketches are no longer supported" ) return StringTracker( count=message.count, items=dsketch.deserialize_frequent_strings_sketch(message.items), theta_sketch=theta, length=NumberTracker.from_protobuf(message.length), token_length=NumberTracker.from_protobuf(message.token_length), char_pos_tracker=CharPosTracker.from_protobuf( message.char_pos_tracker), )
def __init__( self, count: int = None, items: frequent_strings_sketch = None, theta_sketch: ThetaSketch = None, length: NumberTracker = None, token_length: NumberTracker = None, char_pos_tracker: CharPosTracker = None, token_method: Callable[[], List[str]] = None, ): if count is None: count = 0 if items is None: items = frequent_strings_sketch(round(math.log(MAX_ITEMS_SIZE))) if theta_sketch is None: theta_sketch = ThetaSketch() self.count = count self.items = items self.theta_sketch = theta_sketch self.char_pos_tracker = char_pos_tracker if char_pos_tracker else CharPosTracker( ) self.length = length if length else NumberTracker() self.token_length = token_length if token_length else NumberTracker() self.token_method = token_method if token_method else lambda x: x.split( " ")
def from_protobuf(message: StringsMessage): """ Load from a protobuf message Returns ------- string_tracker : StringTracker """ theta = None if message.theta is not None and len(message.theta) > 0: theta = ThetaSketch.deserialize(message.theta) elif message.compact_theta is not None and len( message.compact_theta) > 0: theta = ThetaSketch.deserialize(message.compact_theta) return StringTracker( count=message.count, items=dsketch.deserialize_frequent_strings_sketch(message.items), theta_sketch=theta, )
def __init__( self, count: int = None, items: frequent_strings_sketch = None, theta_sketch: ThetaSketch = None, ): if count is None: count = 0 if items is None: items = frequent_strings_sketch(round(math.log(MAX_ITEMS_SIZE))) if theta_sketch is None: theta_sketch = ThetaSketch() self.count = count self.items = items self.theta_sketch = theta_sketch