def eg1():
    h = HyperLogLog()
    for d in data1:
        h.update(d.encode('utf8'))
    print("Estimated cardinality is", h.count())

    s1 = set(data1)
    print("Actual cardinality is", len(s1))
def run_perf(card, p):
    h = HyperLogLog(p=p)
    logging.info("HyperLogLog using p = %d " % p)
    start = time.clock()
    for i in range(card):
        h.update(int_bytes(i))
    duration = time.clock() - start
    logging.info("Digested %d hashes in %.4f sec" % (card, duration))
    return duration
def _run_hyperloglog(A, B, data, seed, p):
    (a_start, a_end), (b_start, b_end) = A, B
    hasher = pyhash.murmur3_32()
    h1 = HyperLogLog(p=p, hashobj=Hash)
    h2 = HyperLogLog(p=p, hashobj=Hash)
    for i in xrange(a_start, a_end):
        h1.update(hasher(data[i], seed=seed))
    for i in xrange(b_start, b_end):
        h2.update(hasher(data[i], seed=seed))
    return _hyperloglog_jaccard(h1, h2)
def run_acc(size, seed, p):
    logging.info("HyperLogLog using p = %d " % p)
    h = HyperLogLog(p=p)
    s = set()
    random.seed(seed)
    for i in range(size):
        v = int_bytes(random.randint(1, size))
        h.update(v)
        s.add(v)
    perr = abs(float(len(s)) - h.count()) / float(len(s))
    return perr
def eg2():
    h1 = HyperLogLog()
    h2 = HyperLogLog()
    for d in data1:
        h1.update(d.encode('utf8'))
    for d in data2:
        h2.update(d.encode('utf8'))
    u = HyperLogLog.union(h1, h2)
    print("Estimated union cardinality is", u.count())

    s1 = set(data1)
    s2 = set(data2)
    su = s1.union(s2)
    print("Actual union cardinality is", len(su))
def _run_hyperloglog(data, seed, p):
    hasher = pyhash.murmur3_32()
    h = HyperLogLog(p=p, hashobj=Hash)
    for d in data:
        h.update(hasher(d, seed=seed))
    return h.count()