def _full_probabilistic_algorithm_cardinality(self): """ Computes the exact cardinality value returned by the HLL algorithm when represented as a ``HLLType.FULL`` HLL. Kept separate from ``cardinality()`` for testing purposes. type must be ``HLLType.FULL``. :rtype: float """ from python_hll.hllutil import HLLUtil # for performance m = self._m # compute the "indicator function" -- sum(2^(-M[j])) where M[j] is the # 'j'th register value sum = 0 number_of_zeroes = 0 # "V" in the paper iterator = self._probabilistic_storage.register_iterator() for register in iterator: sum += 1.0 / BitUtil.left_shift_long(1, register) if register == 0: number_of_zeroes += 1 # apply the estimate and correction to the indicator function estimator = self._alpha_m_squared / sum if number_of_zeroes != 0 and (estimator < self._small_estimator_cutoff): return HLLUtil.small_estimator(m, number_of_zeroes) elif estimator <= self._large_estimator_cutoff: return estimator else: return HLLUtil.large_estimator(self._log2m, self._regwidth, estimator)
def construct_hll_value(log2m, register_index, register_value): """ Constructs a value that when added raw to a HLL will set the register at ``register_index`` to ``register_value``. :param log2m: The log-base-2 of the number of registers in the HLL :type log2m: int :param register_index: The index of the register to set :type register_index: int :param register_value: the value to set the register to :type register_value: int :rtype: int """ partition = register_index substream_value = BitUtil.left_shift_long(1, register_value - 1) return BitUtil.left_shift_long(substream_value, log2m) | partition
def _sparse_probabilistic_algorithm_cardinality(self): """ Computes the exact cardinality value returned by the HLL algorithm when represented as a ``HLLType.SPARSE`` HLL. Kept separate from ``cardinality()`` for testing purposes. ``type`` must be ``HLLType.SPARSE``. :returns: the exact, unrounded cardinality given by the HLL algorithm :rtype: float """ from python_hll.hllutil import HLLUtil m = self._m # compute the "indicator function" -- sum(2^(-M[j])) where M[j] is the # 'j'th register value indicator_function = 0.0 number_of_zeroes = 0 # "V" in the paper for j in range(m): register = self._sparse_probabilistic_storage.get(j, 0) indicator_function += 1.0 / BitUtil.left_shift_long(1, register) if register == 0: number_of_zeroes += 1 # apply the estimate and correction to the indicator function estimator = self._alpha_m_squared / indicator_function if number_of_zeroes != 0 and estimator < self._small_estimator_cutoff: return HLLUtil.small_estimator(m, number_of_zeroes) elif estimator <= self._large_estimator_cutoff: return estimator else: return HLLUtil.large_estimator(self._log2m, self._regwidth, estimator)
def run_ascending_test(word_length, byte_padding, word_count): """ Runs a test which serializes and deserializes ascending (from zero) word values. """ word_mask = ~0 if word_length == 64 else BitUtil.left_shift_long(1, word_length) - 1 serializer = BigEndianAscendingWordSerializer(word_length, word_count, byte_padding) for i in range(word_count): serializer.write_word(i & word_mask) bytes_ = serializer.get_bytes() deserializer = BigEndianAscendingWordDeserializer(word_length, byte_padding, bytes_) assert deserializer.total_word_count() == word_count for i in range(word_count): assert deserializer.read_word() == (i & word_mask)
def run_random_test(word_length, byte_padding, word_count, seed): """ Runs a test which serializes and deserializes random word values. """ random.seed(seed) word_mask = ~0 if word_length == 64 else BitUtil.left_shift_long(1, word_length) - 1 serializer = BigEndianAscendingWordSerializer(word_length, word_count, byte_padding) for _ in range(word_count): value = random.randint(0, maxsize) & word_mask serializer.write_word(value) bytes_ = serializer.get_bytes() deserializer = BigEndianAscendingWordDeserializer(word_length, byte_padding, bytes_) assert deserializer.total_word_count() == word_count # verification random random.seed(seed) for _ in range(word_count): assert deserializer.read_word() == (random.randint(0, maxsize) & word_mask)
def test_left_shift_long_3(): assert BitUtil.left_shift_long(128, 3) == 1024
def test_left_shift_long_2(): assert BitUtil.left_shift_long(214748364, 8) == 54975581184
def test_left_shift_long_1(): assert BitUtil.left_shift_long(72057594037927935, 8) == -256