def _hyperloglog_inclusion(h1, h2):
    c1 = h1.count()
    if c1 == 0.0:
        return 1.0
    c2 = h2.count()
    uc = HyperLogLog.union(h1, h2).count()
    ic = c1 + c2 - uc
    return ic / c1
def _hyperloglog_jaccard(h1, h2):
    c1 = h1.count()
    c2 = h2.count()
    uc = HyperLogLog.union(h1, h2).count()
    if uc == 0.0:
        return 1.0
    ic = c1 + c2 - uc
    return ic / uc
def _hyperloglog_jaccard(h1, h2):
    c1 = h1.count()
    c2 = h2.count()
    uc = HyperLogLog.union(h1, h2).count()
    if uc == 0.0:
        return 1.0
    ic = c1 + c2 - uc
    return ic / uc
def eg2():
    h1 = HyperLogLog()
    h2 = HyperLogLog()
    for d in data1:
        h1.digest(sha1(d.encode('utf8')))
    for d in data2:
        h2.digest(sha1(d.encode('utf8')))
    u = HyperLogLog.union(h1, h2)
    print("Estimated union cardinality is", u.count())

    s1 = set(data1)
    s2 = set(data2)
    su = s1.union(s2)
    print("Actual union cardinality is", len(su))
def eg2():
    h1 = HyperLogLog()
    h2 = HyperLogLog()
    for d in data1:
        h1.update(d.encode('utf8'))
    for d in data2:
        h2.update(d.encode('utf8'))
    u = HyperLogLog.union(h1, h2)
    print("Estimated union cardinality is", u.count())

    s1 = set(data1)
    s2 = set(data2)
    su = s1.union(s2)
    print("Actual union cardinality is", len(su))