def get_register_value(raw_value, log2m): """ Extracts the HLL register value from a raw value. """ substream_value = BitUtil.unsigned_right_shift_long(raw_value, log2m) if substream_value == 0: # The paper does not cover p(0x0), so the special value 0 is used. # 0 is the original initialization value of the registers, so by # doing this the HLL simply ignores it. This is acceptable # because the probability is 1/(2^(2^register_size_in_bits)). p_w = 0 else: p_w = BitUtil.to_signed_byte( min(1 + BitUtil.least_significant_bit(substream_value), 31)) return p_w
def _add_raw_sparse_probabilistic(self, raw_value): """ Adds the raw value to the ``sparseProbabilisticStorage``. ``type`` ``HLLType.SPARSE``. :param long raw_value: the raw value to add to the sparse storage. :rtype: void """ # p(w): position of the least significant set bit (one-indexed) # By contract: p(w) <= 2^(register_value_in_bits) - 1 (the max register value) # # By construction of pw_max_mask (see constructor), # lsb(pw_max_mask) = 2^(register_value_in_bits) - 2, # thus lsb(any_long | pw_max_mask) <= 2^(register_value_in_bits) - 2, # thus 1 + lsb(any_long | pw_max_mask) <= 2^(register_value_in_bits) -1. sub_stream_value = BitUtil.unsigned_right_shift_long( raw_value, self._log2m) p_w = None if sub_stream_value == 0: # The paper does not cover p(0x0), so the special value 0 is used. # 0 is the original initialization value of the registers, so by # doing this the multiset simply ignores it. This is acceptable # because the probability is 1/(2^(2^register_size_in_bits)). p_w = 0 else: p_w = BitUtil.to_signed_byte(1 + BitUtil.least_significant_bit( sub_stream_value | self._pw_max_mask)) # Short-circuit if the register is being set to zero, since algorithmically # this corresponds to an "unset" register, and "unset" registers aren't # stored to save memory. (The very reason this sparse implementation # exists.) If a register is set to zero it will break the algorithm_cardinality # code. if p_w == 0: return # NOTE: no +1 as in paper since 0-based indexing j = int(raw_value & self._m_bits_mask) current_value = self._sparse_probabilistic_storage.get(j, 0) if p_w > current_value: self._sparse_probabilistic_storage[j] = p_w