def get_register_value(raw_value, log2m):
    """
    Extracts the HLL register value from a raw value.
    """
    substream_value = BitUtil.unsigned_right_shift_long(raw_value, log2m)
    if substream_value == 0:
        # The paper does not cover p(0x0), so the special value 0 is used.
        # 0 is the original initialization value of the registers, so by
        # doing this the HLL simply ignores it. This is acceptable
        # because the probability is 1/(2^(2^register_size_in_bits)).
        p_w = 0
    else:
        p_w = BitUtil.to_signed_byte(
            min(1 + BitUtil.least_significant_bit(substream_value), 31))
    return p_w
Exemple #2
0
    def _add_raw_sparse_probabilistic(self, raw_value):
        """
        Adds the raw value to the ``sparseProbabilisticStorage``.
        ``type`` ``HLLType.SPARSE``.

        :param long raw_value: the raw value to add to the sparse storage.
        :rtype: void
        """

        # p(w): position of the least significant set bit (one-indexed)
        # By contract: p(w) <= 2^(register_value_in_bits) - 1 (the max register value)
        #
        # By construction of pw_max_mask (see constructor),
        #      lsb(pw_max_mask) = 2^(register_value_in_bits) - 2,
        # thus lsb(any_long | pw_max_mask) <= 2^(register_value_in_bits) - 2,
        # thus 1 + lsb(any_long | pw_max_mask) <= 2^(register_value_in_bits) -1.
        sub_stream_value = BitUtil.unsigned_right_shift_long(
            raw_value, self._log2m)
        p_w = None

        if sub_stream_value == 0:
            # The paper does not cover p(0x0), so the special value 0 is used.
            # 0 is the original initialization value of the registers, so by
            # doing this the multiset simply ignores it. This is acceptable
            # because the probability is 1/(2^(2^register_size_in_bits)).
            p_w = 0
        else:
            p_w = BitUtil.to_signed_byte(1 + BitUtil.least_significant_bit(
                sub_stream_value | self._pw_max_mask))

        # Short-circuit if the register is being set to zero, since algorithmically
        # this corresponds to an "unset" register, and "unset" registers aren't
        # stored to save memory. (The very reason this sparse implementation
        # exists.) If a register is set to zero it will break the algorithm_cardinality
        # code.
        if p_w == 0:
            return

        # NOTE:  no +1 as in paper since 0-based indexing
        j = int(raw_value & self._m_bits_mask)

        current_value = self._sparse_probabilistic_storage.get(j, 0)
        if p_w > current_value:
            self._sparse_probabilistic_storage[j] = p_w