def _setup(): import sys import rabin import logging file = logging.FileHandler("/home/alan/.rab/log") file.setFormatter(logging.Formatter("%(levelname)s: %(asctime)s - %(name)s -- %(message)s")) file.setLevel(logging.DEBUG) console = logging.StreamHandler(sys.stdout) console.setFormatter(logging.Formatter("%(message)s")) console.setLevel(logging.INFO) logger = logging.getLogger() logger.setLevel(logging.DEBUG) logger.addHandler(console) logger.addHandler(file) logging.getLogger(__name__).debug("===========") logging.getLogger(__name__).debug("Staring up.") logging.getLogger(__name__).debug("===========") kb = 1024 rabin.set_window_size(48) rabin.set_min_block_size(8*kb) rabin.set_average_block_size(64*kb-1) rabin.set_max_block_size(256*kb)
def __init__(self, ds, chunk_min_size, chunk_ave_size, chunk_max_size, compressor, hasher): self.ds = ds self.compressor = compressor self.hasher = hasher self.size = 0 set_min_block_size(chunk_min_size) set_average_block_size(chunk_ave_size) set_max_block_size(chunk_max_size)
def rabinFinger(emailBody): for i in range(0,emailBody.__len__()): chunk = str((emailBody[i])[0]) set_min_block_size(chunk.__len__()) set_max_block_size(chunk.__len__()) set_average_block_size(chunk.__len__()) r.update(emailBody[i][0]) return reached
import os import random from rabin import Rabin, get_file_fingerprints, set_min_block_size, set_max_block_size, set_average_block_size TARGET = 'test.bin' filesizeM = 10 Mb = 1024 * 1024 os.system("dd if=/dev/urandom of=%s bs=%d count=%d" % (TARGET, Mb, filesizeM)) random.seed(open(TARGET, 'rb').read(1024)) set_min_block_size(1024) set_max_block_size(2048) set_average_block_size(1024) r = Rabin() before = get_file_fingerprints(TARGET) f = open(TARGET, 'rb+') data = f.read(int(filesizeM / 2 * Mb)) r.update(data) r.update(b'x') data = f.read() r.update(data) after = r.fingerprints() assert len(before) <= len(after) diffcount = 0
#!/usr/bin/env python from __future__ import print_function import os import random from rabin import Rabin, get_file_fingerprints, set_min_block_size, set_max_block_size, set_average_block_size TARGET = 'test.bin' os.system("dd if=/dev/urandom of=%s bs=1024 count=100" % TARGET) random.seed(open(TARGET, 'rb').read(1024)) set_min_block_size(1024) set_max_block_size(2048) set_average_block_size(1024) reached = [] def block_reached(start, length, fingerprint): # print('(%s, %s, %s)' % (start, length, fingerprint)) reached.append((start, length, fingerprint)) r = Rabin() r.register(block_reached) with open(TARGET, 'rb') as f: while True: size = random.randint(1,os.path.getsize(TARGET)) data = f.read(size) if len(data) == 0: break
print('seed', seed) stream_bs = 1024 * 1024 stream_len = stream_bs * stream_count max_mem = 1024 * 1024 * 20 resource.setrlimit(resource.RLIMIT_AS, (max_mem,-1)) # print(resource.getrlimit(resource.RLIMIT_AS)) random.seed(seed) max_blocksize = random.randint(512, max_mem/10) set_min_block_size(int(max_blocksize/10)) set_max_block_size(max_blocksize) set_average_block_size(int(max_blocksize/5)) reached = [] def block_reached(start, length, fingerprint): # print('(%s, %s, %s)' % (start, length, fingerprint)) reached.append((start, length, fingerprint)) r = Rabin() r.register(block_reached) try: from guppy import hpy; hp=hpy() except: pass
print 'seed', seed stream_bs = 1024 * 1024 stream_len = stream_bs * stream_count max_mem = 1024 * 1024 * 20 resource.setrlimit(resource.RLIMIT_AS, (max_mem,-1)) # print resource.getrlimit(resource.RLIMIT_AS) random.seed(seed) max_blocksize = random.randint(512, max_mem/10) set_min_block_size(max_blocksize/10) set_max_block_size(max_blocksize) set_average_block_size(max_blocksize/5) reached = [] def block_reached(start, length, fingerprint): # print '(%s, %s, %s)' % (start, length, fingerprint) reached.append((start, length, fingerprint)) r = Rabin() r.register(block_reached) from guppy import hpy; hp=hpy() total = 0 while total < stream_len: size = random.randint(1,max_blocksize*2)