Example #1
0
    def from_protobuf(message: StringsMessage):
        """
        Load from a protobuf message

        Returns
        -------
        string_tracker : StringTracker
        """
        theta = None
        if message.compact_theta is not None and len(
                message.compact_theta) > 0:
            theta = ThetaSketch.deserialize(message.compact_theta)
        elif message.theta is not None and len(message.theta) > 0:
            logger.warning(
                "Possible missing data. Non-compact theta sketches are no longer supported"
            )

        return StringTracker(
            count=message.count,
            items=dsketch.deserialize_frequent_strings_sketch(message.items),
            theta_sketch=theta,
            length=NumberTracker.from_protobuf(message.length),
            token_length=NumberTracker.from_protobuf(message.token_length),
            char_pos_tracker=CharPosTracker.from_protobuf(
                message.char_pos_tracker),
        )
Example #2
0
def test_deserialize_full_strings_returns_sketch():
    x = dsketch.deserialize_frequent_strings_sketch(FULL_STRING_SKETCH)
    assert isinstance(x, datasketches.frequent_strings_sketch)
    assert x.get_num_active_items() == 6
    freq_strings = x.get_frequent_items(
        datasketches.frequent_items_error_type.NO_FALSE_NEGATIVES, 7)
    assert freq_strings == [
        ("C", 36, 36, 36),
        ("B", 22, 22, 22),
        ("A", 19, 19, 19),
        ("D", 14, 14, 14),
    ]
Example #3
0
    def from_protobuf(message: StringsMessage):
        """
        Load from a protobuf message

        Returns
        -------
        string_tracker : StringTracker
        """
        theta = None
        if message.theta is not None and len(message.theta) > 0:
            theta = ThetaSketch.deserialize(message.theta)
        elif message.compact_theta is not None and len(
                message.compact_theta) > 0:
            theta = ThetaSketch.deserialize(message.compact_theta)

        return StringTracker(
            count=message.count,
            items=dsketch.deserialize_frequent_strings_sketch(message.items),
            theta_sketch=theta,
        )
Example #4
0
def test_deserialize_empty_strings_returns_none():
    x = dsketch.deserialize_frequent_strings_sketch(EMPTY_STRING_SKETCH)
    assert x is None