def __init__(self, maxCardinality, error_rate): """Implementes a LogLog Sketch *maxCardinality this Sketch is able to count cardinalities up to cardinality *maxCardinality* error_rate the error_rate of the sketch when calculating the cardinality of the set """ if not (0 < error_rate < 1): raise ValueError("Error_Rate must be between 0 and 1.") if not maxCardinality > 0: raise ValueError("maxCardinality must be > 0") self._maxCardinality = maxCardinality #k self._k = int(round(log(pow(1.30/error_rate,2),2))) # m = 2**k self._bucketNumber = 1<<self._k self._bucketSize = compute_wordsize(self._maxCardinality) #M(1)... M(m) = 0 self._bucketList =[bitarray(self._bucketSize) for _ in xrange(self._bucketNumber)] for barray in self._bucketList: barray.setall(False) self.__name = "LogLog"
def __init__(self, maxCardinality, error_rate): """Implementes a HyperLogLog Sketch *maxCardinality this Sketch is able to count cardinalities up to cardinality *maxCardinality* error_rate the error_rate of the sketch when calculating the cardinality of the set """ self.__ALPHA16=0.673 self.__ALPHA32=0.697 self.__ALPHA64=0.709 if not (0 < error_rate < 1): raise ValueError("Error_Rate must be between 0 and 1.") if not maxCardinality > 0: raise ValueError("maxCardinality must be > 0") self._maxCardinality = maxCardinality #k self._k = int(round(log(pow(1.04/error_rate,2),2))) # m = 2**k self._bucketNumber = 1<<self._k self._bucketSize = compute_wordsize(self._maxCardinality) #M(1)... M(m) = 0 self._bucketList =[0 for _ in xrange(self._bucketNumber)] self.__name = "HyperLogLog" self._alpha = self.__getALPHA(self._bucketNumber)