from rabin import Rabin, get_file_fingerprints, set_min_block_size, set_max_block_size, set_average_block_size TARGET = 'test.bin' os.system("dd if=/dev/urandom of=%s bs=1024 count=100" % TARGET) random.seed(open(TARGET, 'rb').read(1024)) set_min_block_size(1024) set_max_block_size(2048) set_average_block_size(1024) reached = [] def block_reached(start, length, fingerprint): # print('(%s, %s, %s)' % (start, length, fingerprint)) reached.append((start, length, fingerprint)) r = Rabin() r.register(block_reached) with open(TARGET, 'rb') as f: while True: size = random.randint(1,os.path.getsize(TARGET)) data = f.read(size) if len(data) == 0: break r.update(data) partial = r.fingerprints() gold = get_file_fingerprints(TARGET) assert len(gold) == len(partial) == len(reached)
from rabin import Rabin, get_file_fingerprints, set_min_block_size, set_max_block_size, set_average_block_size TARGET = 'test.bin' filesizeM = 10 Mb = 1024 * 1024 os.system("dd if=/dev/urandom of=%s bs=%d count=%d" % (TARGET, Mb, filesizeM)) random.seed(open(TARGET, 'rb').read(1024)) set_min_block_size(1024) set_max_block_size(2048) set_average_block_size(1024) r = Rabin() before = get_file_fingerprints(TARGET) f = open(TARGET, 'rb+') data = f.read(int(filesizeM / 2 * Mb)) r.update(data) r.update(b'x') data = f.read() r.update(data) after = r.fingerprints() assert len(before) <= len(after) diffcount = 0 for i in range(len(before)): try:
#!/usr/bin/env python import os import random from rabin import Rabin, get_file_fingerprints, set_min_block_size, set_max_block_size, set_average_block_size TARGET = 'test.bin' os.system("dd if=/dev/urandom of=%s bs=1024k count=10" % TARGET) random.seed(open(TARGET, 'r').read(1024)) set_min_block_size(1024) set_max_block_size(2048) set_average_block_size(1024) r = Rabin() before = get_file_fingerprints(TARGET) fh = open(TARGET, 'r+') fh.seek(1024*1024*5) fh.write('x') fh.close() after = get_file_fingerprints(TARGET) assert len(before) == len(after) diffcount = 0 for i in range(len(before)): try: bs,bl,bp = before[i] As,al,ap = after[i]
from rabin import Rabin, set_min_block_size, set_max_block_size, set_average_block_size, set_prime #This method generate the fingerprint of document reached = [] def block_reached(start, length, fingerprint): # print '(%s, %s, %s)' % (start, length, fingerprint) reached.append(( fingerprint)) r = Rabin() r.register(block_reached) def rabinFinger(emailBody): for i in range(0,emailBody.__len__()): chunk = str((emailBody[i])[0]) set_min_block_size(chunk.__len__()) set_max_block_size(chunk.__len__()) set_average_block_size(chunk.__len__()) r.update(emailBody[i][0]) return reached # input is the list of email body, and the output is the list of fingerprint def rabinFingerprint(emailBody): fingerPrint = [] for i in range(0, len(emailBody)): # print rabinFingerprint(test[i])
from rabin import Rabin, get_file_fingerprints, set_min_block_size, set_max_block_size, set_average_block_size TARGET = 'test.bin' filesizeM = 10 Mb = 1024*1024 os.system("dd if=/dev/urandom of=%s bs=%d count=%d" % ( TARGET, Mb, filesizeM)) random.seed(open(TARGET, 'r').read(1024)) set_min_block_size(1024) set_max_block_size(2048) set_average_block_size(1024) r = Rabin() before = get_file_fingerprints(TARGET) f = open(TARGET, 'r+') data = f.read(filesizeM/2 * Mb) r.update(data) r.update('x') data = f.read() r.update(data) after = r.fingerprints() assert len(before) <= len(after) diffcount = 0 for i in range(len(before)): try:
os.system("dd if=/dev/urandom of=%s bs=1024 count=100" % TARGET) random.seed(open(TARGET, 'rb').read(1024)) set_min_block_size(1024) set_max_block_size(2048) set_average_block_size(1024) reached = [] def block_reached(start, length, fingerprint): # print('(%s, %s, %s)' % (start, length, fingerprint)) reached.append((start, length, fingerprint)) r = Rabin() r.register(block_reached) with open(TARGET, 'rb') as f: while True: size = random.randint(1, os.path.getsize(TARGET)) data = f.read(size) if len(data) == 0: break r.update(data) partial = r.fingerprints() gold = get_file_fingerprints(TARGET) assert len(gold) == len(partial) == len(reached)
from rabin import Rabin from common import get_args, sys ## sample file import os SAMPLE_FILE = os.path.join((os.path.abspath(os.path.dirname(__file__))), '..', 'sample.txt') if __name__ == '__main__': text, to_find = get_args(sys.argv[1:]) r = Rabin(to_find, text) r.search(use_rabin_fingerprint=True) if r.result: print("Pattern '{}' found at positions {}".format(to_find, r.result)) else: print("No match found for pattern '{}'".format(to_find))