Ejemplo n.º 1
0
def mainScalable(capacity=100000, request_error_rate=0.001):
    server = StrictRedis.from_url("redis://192.168.254.106:6379")
    bfkeypreffix = "test-sbf"
    f = ScalableRedisBloomFilter(initial_capacity=10000,
                                 server=server,
                                 bfkeypreffix=bfkeypreffix,
                                 error_rate=request_error_rate,
                                 max_filters=5)
    # assert (capacity == f.capacity)
    start = time.time()
    for i in range_fn(0, capacity):
        f.add(i)
    end = time.time()
    print("{:5.3f} seconds to add to capacity, {:10.2f} entries/second".format(
        end - start, f.capacity / (end - start)))
    oneBits = 0
    filterKeys = server.lrange('%s:fs' % bfkeypreffix, 0, -1)
    for filterKey in filterKeys:
        filterKey, _ = filterKey.rsplit("||", 1)
        keys = server.keys("%s:s:%s" % (filterKey, "*"))
        for key in keys:
            oneBits += server.bitcount(key)
    #print "Number of 1 bits:", oneBits
    print("Number of Filter Bits:",
          sum(ff.num_bits for ff in f.filtersMap.values()))
    print("Number of slices:",
          sum(ff.num_slices for ff in f.filtersMap.values()))
    print("Bits per slice:",
          sum(ff.bits_per_slice for ff in f.filtersMap.values()))
    print("------")
    print("Fraction of 1 bits at capacity: {:5.3f}".format(
        oneBits / float(sum(ff.num_bits for ff in f.filtersMap.values()))))

    # Look for false positives and measure the actual fp rate
    trials = f.capacity
    fp = 0
    start = time.time()
    for i in range_fn(f.capacity, f.capacity + trials + 1):
        if i in f:
            fp += 1
    end = time.time()
    print(("{:5.3f} seconds to check false positives, "
           "{:10.2f} checks/second".format(end - start,
                                           trials / (end - start))))
    print("Requested FP rate: {:2.4f}".format(request_error_rate))
    print("Experimental false positive rate: {:2.4f}".format(fp /
                                                             float(trials)))
Ejemplo n.º 2
0
def main(capacity=100000, request_error_rate=0.001):
    server = StrictRedis.from_url("redis://127.0.0.1:6379")
    bfkeypreffix = "test-bf"
    print("1231")
    f = RedisBloomFilter(capacity=capacity,
                         server=server,
                         bfkeypreffix=bfkeypreffix,
                         error_rate=request_error_rate)
    assert (capacity == f.capacity)
    start = time.time()
    for i in range_fn(0, f.capacity):
        f.add(i, skip_check=True)
    end = time.time()
    print("{:5.3f} seconds to add to capacity, {:10.2f} entries/second".format(
        end - start, f.capacity / (end - start)))
    keys = server.keys("%s:s:%s" % (bfkeypreffix, "*"))
    oneBits = 0
    for key in keys:
        oneBits += server.bitcount(key)
    #print "Number of 1 bits:", oneBits
    print("Number of Filter Bits:", f.num_bits)
    print("Number of slices:", f.num_slices)
    print("Bits per slice:", f.bits_per_slice)
    print("------")
    print("Fraction of 1 bits at capacity: {:5.3f}".format(oneBits /
                                                           float(f.num_bits)))
    # Look for false positives and measure the actual fp rate
    trials = f.capacity
    fp = 0
    start = time.time()
    for i in range_fn(f.capacity, f.capacity + trials + 1):
        if i in f:
            fp += 1
    end = time.time()
    print(("{:5.3f} seconds to check false positives, "
           "{:10.2f} checks/second".format(end - start,
                                           trials / (end - start))))
    print("Requested FP rate: {:2.4f}".format(request_error_rate))
    print("Experimental false positive rate: {:2.4f}".format(fp /
                                                             float(trials)))
    # Compute theoretical fp max (Goel/Gupta)
    k = f.num_slices
    m = f.num_bits
    n = f.capacity
    fp_theory = math.pow((1 - math.exp(-k * (n + 0.5) / (m - 1))), k)
    print("Projected FP rate (Goel/Gupta): {:2.6f}".format(fp_theory))
Ejemplo n.º 3
0
def make_hashfuncs(num_slices, num_bits):
    if num_bits >= (1 << 31):
        fmt_code, chunk_size = 'Q', 8#Q 代表格式,8个16进制
    elif num_bits >= (1 << 15):
        fmt_code, chunk_size = 'I', 4
    else:
        fmt_code, chunk_size = 'H', 2#每2位表示一个字符,如果有3个H,则需要6位bit的值unpack
    total_hash_bits = 8 * num_slices * chunk_size
    # print('total_hash_bits:',total_hash_bits)
    if total_hash_bits > 384:
        hashfn = hashlib.sha512
    elif total_hash_bits > 256:
        hashfn = hashlib.sha384
    elif total_hash_bits > 160:
        hashfn = hashlib.sha256
    elif total_hash_bits > 128:
        hashfn = hashlib.sha1#20 bit
    else:
        hashfn = hashlib.md5#16 bit
    fmt = fmt_code * (hashfn().digest_size // chunk_size)
    # print('fmt:',fmt,fmt_code,hashfn().digest_size, chunk_size)
    num_salts, extra = divmod(num_slices, len(fmt))
    # print('div:',num_salts,extra)
    if extra:
        num_salts += 1
    salts = tuple(hashfn(hashfn(pack('I', i)).digest()) for i in range_fn(num_salts))
    # print('salts:',salts)
    def _make_hashfuncs(key):
        if running_python_3:
            if isinstance(key, str):
                key = key.encode('utf-8')
            else:
                key = str(key).encode('utf-8')
        else:
            if isinstance(key, unicode):
                key = key.encode('utf-8')
            else:
                key = str(key)
        i = 0
        # print('byt_key:',key)
        for salt in salts:
            h = salt.copy()
            h.update(key)
            # print('h:',salt)
            for uint in unpack(fmt, h.digest()):
                # print('uin:',uint,(uint % num_bits))
                yield uint % num_bits
                i += 1
                if i >= num_slices:
                    return

    return _make_hashfuncs
Ejemplo n.º 4
0
def main(capacity=100000, request_error_rate=0.1):
    f = BloomFilter(capacity=capacity, error_rate=request_error_rate)
    assert (capacity == f.capacity)
    start = time.time()
    for i in range_fn(0, f.capacity):
        f.add(i, skip_check=True)
    end = time.time()
    print("{:5.3f} seconds to add to capacity, {:10.2f} entries/second".format(
        end - start, f.capacity / (end - start)))
    oneBits = f.bitarray.count(True)
    zeroBits = f.bitarray.count(False)
    #print "Number of 1 bits:", oneBits
    #print "Number of 0 bits:", zeroBits
    print("Number of Filter Bits:", f.num_bits)
    print("Number of slices:", f.num_slices)
    print("Bits per slice:", f.bits_per_slice)
    print("------")
    print("Fraction of 1 bits at capacity: {:5.3f}".format(oneBits /
                                                           float(f.num_bits)))
    # Look for false positives and measure the actual fp rate
    trials = f.capacity
    fp = 0
    start = time.time()
    for i in range_fn(f.capacity, f.capacity + trials + 1):
        if i in f:
            fp += 1
    end = time.time()
    print(("{:5.3f} seconds to check false positives, "
           "{:10.2f} checks/second".format(end - start,
                                           trials / (end - start))))
    print("Requested FP rate: {:2.4f}".format(request_error_rate))
    print("Experimental false positive rate: {:2.4f}".format(fp /
                                                             float(trials)))
    # Compute theoretical fp max (Goel/Gupta)
    k = f.num_slices
    m = f.num_bits
    n = f.capacity
    fp_theory = math.pow((1 - math.exp(-k * (n + 0.5) / (m - 1))), k)
    print("Projected FP rate (Goel/Gupta): {:2.6f}".format(fp_theory))
Ejemplo n.º 5
0
def main(capacity=100000, request_error_rate=0.1):
    f = BloomFilter(capacity=capacity, error_rate=request_error_rate)
    assert (capacity == f.capacity)
    start = time.time()
    for i in range_fn(0, f.capacity):
        f.add(i, skip_check=True)
    end = time.time()
    print("{:5.3f} seconds to add to capacity, {:10.2f} entries/second".format(
            end - start, f.capacity / (end - start)))
    oneBits = f.bitarray.count(True)
    zeroBits = f.bitarray.count(False)
    #print "Number of 1 bits:", oneBits
    #print "Number of 0 bits:", zeroBits
    print("Number of Filter Bits:", f.num_bits)
    print("Number of slices:", f.num_slices)
    print("Bits per slice:", f.bits_per_slice)
    print("------")
    print("Fraction of 1 bits at capacity: {:5.3f}".format(
            oneBits / float(f.num_bits)))
    # Look for false positives and measure the actual fp rate
    trials = f.capacity
    fp = 0
    start = time.time()
    for i in range_fn(f.capacity, f.capacity + trials + 1):
        if i in f:
            fp += 1
    end = time.time()
    print(("{:5.3f} seconds to check false positives, "
           "{:10.2f} checks/second".format(end - start, trials / (end - start))))
    print("Requested FP rate: {:2.4f}".format(request_error_rate))
    print("Experimental false positive rate: {:2.4f}".format(fp / float(trials)))
    # Compute theoretical fp max (Goel/Gupta)
    k = f.num_slices
    m = f.num_bits
    n = f.capacity
    fp_theory = math.pow((1 - math.exp(-k * (n + 0.5) / (m - 1))), k)
    print("Projected FP rate (Goel/Gupta): {:2.6f}".format(fp_theory))
Ejemplo n.º 6
0
def make_hashfuncs(num_slices, num_bits):
    if num_bits >= (1 << 31):
        fmt_code, chunk_size = 'Q', 8
    elif num_bits >= (1 << 15):
        fmt_code, chunk_size = 'I', 4
    else:
        fmt_code, chunk_size = 'H', 2
    total_hash_bits = 8 * num_slices * chunk_size
    if total_hash_bits > 384:
        hashfn = hashlib.sha512
    elif total_hash_bits > 256:
        hashfn = hashlib.sha384
    elif total_hash_bits > 160:
        hashfn = hashlib.sha256
    elif total_hash_bits > 128:
        hashfn = hashlib.sha1
    else:
        hashfn = hashlib.md5
    fmt = fmt_code * (hashfn().digest_size // chunk_size)
    num_salts, extra = divmod(num_slices, len(fmt))
    if extra:
        num_salts += 1
    salts = tuple(
        hashfn(hashfn(pack('I', i)).digest()) for i in range_fn(num_salts))

    def _make_hashfuncs(key):
        if running_python_3:
            if isinstance(key, str):
                key = key.encode('utf-8')
            else:
                key = str(key).encode('utf-8')
        else:
            if isinstance(key, unicode):
                key = key.encode('utf-8')
            else:
                key = str(key)
        i = 0
        for salt in salts:
            h = salt.copy()
            h.update(key)
            for uint in unpack(fmt, h.digest()):
                yield uint % num_bits
                i += 1
                if i >= num_slices:
                    return

    return _make_hashfuncs
Ejemplo n.º 7
0
def make_hashfuncs(num_slices, num_bits, hashmac):
    if num_bits >= (1 << 31):
        fmt_code, chunk_size = 'Q', 8
    elif num_bits >= (1 << 15):
        fmt_code, chunk_size = 'I', 4
    else:
        fmt_code, chunk_size = 'H', 2
    total_hash_bits = 8 * num_slices * chunk_size
    
    if total_hash_bits > 384:
        digest_size = 64
        if hashmac==0:
            hashfn = hashlib.sha512
        else:
            hmackey = random.getrandbits(digest_size)
            hashfn = hmac
    elif total_hash_bits > 256:
        digest_size = 48
        if hashmac==0:
            hashfn = hashlib.sha384
        else:
            hmackey = random.getrandbits(digest_size)
            hashfn = hmac
    elif total_hash_bits > 160:
        digest_size = 32
        if hashmac==0:
            hashfn = hashlib.sha256
        else:
            hmackey = random.getrandbits(digest_size)
            hashfn = hmac
    elif total_hash_bits > 128:
        digest_size = 20
        if hashmac==0:
            hashfn = hashlib.sha1
        else:
            hmackey = random.getrandbits(digest_size)
            hashfn = hmac
    else:
        digest_size=16
        if hashmac==0:
            hashfn = hashlib.md5
        else:
            hmackey = random.getrandbits(digest_size)
            hashfn = hmac
    fmt = fmt_code * (digest_size // chunk_size)
    num_salts, extra = divmod(num_slices, len(fmt))
    if extra:
        num_salts += 1

    if hashmac==0:
        salts = tuple(hashfn(hashfn(pack('I', i)).digest()) for i in range_fn(num_salts))
    else:
        if digest_size==64:
            salts = tuple(hashfn.new(str(hmackey),hashfn.new(str(hmackey), pack('I',i), hashlib.sha512).digest(), hashlib.sha512) for i in range_fn(num_salts))
        elif digest_size==48:
            salts =tuple(hashfn.new(str(hmackey),hashfn.new(str(hmackey), pack('I',i), hashlib.sha384).digest(), hashlib.sha384) for i in range_fn(num_salts))
        elif digest_size==32:
            salts = tuple(hashfn.new(str(hmackey),hashfn.new(str(hmackey), pack('I',i), hashlib.sha256).digest(), hashlib.sha256) for i in range_fn(num_salts))
        elif digest_size==20:
            salts = tuple(hashfn.new(str(hmackey),hashfn.new(str(hmackey), pack('I',i), hashlib.sha1).digest(), hashlib.sha1) for i in range_fn(num_salts))
        else:
            salts = tuple(hashfn.new(str(hmackey),hashfn.new(str(hmackey), pack('I',i), hashlib.md5).digest(), hashlib.md5) for i in range_fn(num_salts))

    def _make_hashfuncs(key):
        if running_python_3:
            if isinstance(key, str):
                key = key.encode('utf-8')
            else:
                key = str(key).encode('utf-8')
        else:
            if isinstance(key, unicode):
                key = key.encode('utf-8')
            else:
                key = str(key)
        i = 0
        for salt in salts:
            h = salt.copy()
            h.update(key)
            h.hexdigest()
            for uint in unpack(fmt, h.digest()):
                yield uint % num_bits
                i += 1
                if i >= num_slices:
                    return

    return _make_hashfuncs
Ejemplo n.º 8
0
def make_hashfuncs(num_slices, num_bits, hashmac):
    if num_bits >= (1 << 31):
        fmt_code, chunk_size = 'Q', 8
    elif num_bits >= (1 << 15):
        fmt_code, chunk_size = 'I', 4
    else:
        fmt_code, chunk_size = 'H', 2
    total_hash_bits = 8 * num_slices * chunk_size

    if total_hash_bits > 384:
        digest_size = 64
        if hashmac == 0:
            hashfn = hashlib.sha512
        else:
            hmackey = random.getrandbits(digest_size)
            hashfn = hmac
    elif total_hash_bits > 256:
        digest_size = 48
        if hashmac == 0:
            hashfn = hashlib.sha384
        else:
            hmackey = random.getrandbits(digest_size)
            hashfn = hmac
    elif total_hash_bits > 160:
        digest_size = 32
        if hashmac == 0:
            hashfn = hashlib.sha256
        else:
            hmackey = random.getrandbits(digest_size)
            hashfn = hmac
    elif total_hash_bits > 128:
        digest_size = 20
        if hashmac == 0:
            hashfn = hashlib.sha1
        else:
            hmackey = random.getrandbits(digest_size)
            hashfn = hmac
    else:
        digest_size = 16
        if hashmac == 0:
            hashfn = hashlib.md5
        else:
            hmackey = random.getrandbits(digest_size)
            hashfn = hmac
    fmt = fmt_code * (digest_size // chunk_size)
    num_salts, extra = divmod(num_slices, len(fmt))
    if extra:
        num_salts += 1

    if hashmac == 0:
        salts = tuple(
            hashfn(hashfn(pack('I', i)).digest()) for i in range_fn(num_salts))
    else:
        if digest_size == 64:
            salts = tuple(
                hashfn.new(
                    str(hmackey),
                    hashfn.new(str(hmackey), pack('I', i),
                               hashlib.sha512).digest(), hashlib.sha512)
                for i in range_fn(num_salts))
        elif digest_size == 48:
            salts = tuple(
                hashfn.new(
                    str(hmackey),
                    hashfn.new(str(hmackey), pack('I', i),
                               hashlib.sha384).digest(), hashlib.sha384)
                for i in range_fn(num_salts))
        elif digest_size == 32:
            salts = tuple(
                hashfn.new(
                    str(hmackey),
                    hashfn.new(str(hmackey), pack('I', i),
                               hashlib.sha256).digest(), hashlib.sha256)
                for i in range_fn(num_salts))
        elif digest_size == 20:
            salts = tuple(
                hashfn.new(
                    str(hmackey),
                    hashfn.new(str(hmackey), pack('I', i),
                               hashlib.sha1).digest(), hashlib.sha1)
                for i in range_fn(num_salts))
        else:
            salts = tuple(
                hashfn.new(
                    str(hmackey),
                    hashfn.new(str(hmackey), pack('I', i),
                               hashlib.md5).digest(), hashlib.md5)
                for i in range_fn(num_salts))

    def _make_hashfuncs(key):
        if running_python_3:
            if isinstance(key, str):
                key = key.encode('utf-8')
            else:
                key = str(key).encode('utf-8')
        else:
            if isinstance(key, unicode):
                key = key.encode('utf-8')
            else:
                key = str(key)
        i = 0
        for salt in salts:
            h = salt.copy()
            h.update(key)
            h.hexdigest()
            for uint in unpack(fmt, h.digest()):
                yield uint % num_bits
                i += 1
                if i >= num_slices:
                    return

    return _make_hashfuncs