Exemple #1
0
    def _full_probabilistic_algorithm_cardinality(self):
        """
        Computes the exact cardinality value returned by the HLL algorithm when
        represented as a ``HLLType.FULL`` HLL. Kept separate from ``cardinality()`` for testing purposes.
        type must be ``HLLType.FULL``.

        :rtype: float
        """
        from python_hll.hllutil import HLLUtil
        # for performance
        m = self._m
        # compute the "indicator function" -- sum(2^(-M[j])) where M[j] is the
        # 'j'th register value
        sum = 0
        number_of_zeroes = 0  # "V" in the paper
        iterator = self._probabilistic_storage.register_iterator()
        for register in iterator:
            sum += 1.0 / BitUtil.left_shift_long(1, register)
            if register == 0:
                number_of_zeroes += 1
        # apply the estimate and correction to the indicator function
        estimator = self._alpha_m_squared / sum
        if number_of_zeroes != 0 and (estimator <
                                      self._small_estimator_cutoff):
            return HLLUtil.small_estimator(m, number_of_zeroes)
        elif estimator <= self._large_estimator_cutoff:
            return estimator
        else:
            return HLLUtil.large_estimator(self._log2m, self._regwidth,
                                           estimator)
def construct_hll_value(log2m, register_index, register_value):
    """
    Constructs a value that when added raw to a HLL will set the register at
    ``register_index`` to ``register_value``.

    :param log2m: The log-base-2 of the number of registers in the HLL
    :type log2m: int
    :param register_index: The index of the register to set
    :type register_index: int
    :param register_value: the value to set the register to
    :type register_value: int
    :rtype: int
    """
    partition = register_index
    substream_value = BitUtil.left_shift_long(1, register_value - 1)
    return BitUtil.left_shift_long(substream_value, log2m) | partition
Exemple #3
0
    def _sparse_probabilistic_algorithm_cardinality(self):
        """
        Computes the exact cardinality value returned by the HLL algorithm when
        represented as a ``HLLType.SPARSE`` HLL. Kept
        separate from ``cardinality()`` for testing purposes. ``type``
        must be ``HLLType.SPARSE``.

        :returns: the exact, unrounded cardinality given by the HLL algorithm
        :rtype: float
        """
        from python_hll.hllutil import HLLUtil
        m = self._m

        # compute the "indicator function" -- sum(2^(-M[j])) where M[j] is the
        # 'j'th register value
        indicator_function = 0.0
        number_of_zeroes = 0  # "V" in the paper
        for j in range(m):
            register = self._sparse_probabilistic_storage.get(j, 0)

            indicator_function += 1.0 / BitUtil.left_shift_long(1, register)
            if register == 0:
                number_of_zeroes += 1

        # apply the estimate and correction to the indicator function
        estimator = self._alpha_m_squared / indicator_function
        if number_of_zeroes != 0 and estimator < self._small_estimator_cutoff:
            return HLLUtil.small_estimator(m, number_of_zeroes)
        elif estimator <= self._large_estimator_cutoff:
            return estimator
        else:
            return HLLUtil.large_estimator(self._log2m, self._regwidth,
                                           estimator)
Exemple #4
0
def run_ascending_test(word_length, byte_padding, word_count):
    """
    Runs a test which serializes and deserializes ascending (from zero) word values.
    """
    word_mask = ~0 if word_length == 64 else BitUtil.left_shift_long(1, word_length) - 1

    serializer = BigEndianAscendingWordSerializer(word_length, word_count, byte_padding)

    for i in range(word_count):
        serializer.write_word(i & word_mask)

    bytes_ = serializer.get_bytes()

    deserializer = BigEndianAscendingWordDeserializer(word_length, byte_padding, bytes_)

    assert deserializer.total_word_count() == word_count

    for i in range(word_count):
        assert deserializer.read_word() == (i & word_mask)
Exemple #5
0
def run_random_test(word_length, byte_padding, word_count, seed):
    """
    Runs a test which serializes and deserializes random word values.
    """
    random.seed(seed)

    word_mask = ~0 if word_length == 64 else BitUtil.left_shift_long(1, word_length) - 1

    serializer = BigEndianAscendingWordSerializer(word_length, word_count, byte_padding)

    for _ in range(word_count):
        value = random.randint(0, maxsize) & word_mask
        serializer.write_word(value)

    bytes_ = serializer.get_bytes()

    deserializer = BigEndianAscendingWordDeserializer(word_length, byte_padding, bytes_)

    assert deserializer.total_word_count() == word_count

    # verification random
    random.seed(seed)
    for _ in range(word_count):
        assert deserializer.read_word() == (random.randint(0, maxsize) & word_mask)
def test_left_shift_long_3():
    assert BitUtil.left_shift_long(128, 3) == 1024
def test_left_shift_long_2():
    assert BitUtil.left_shift_long(214748364, 8) == 54975581184
def test_left_shift_long_1():
    assert BitUtil.left_shift_long(72057594037927935, 8) == -256