def test_rsyncdelta_with_changes(self): changes_in_blocks = [ (0, 0), (3, 2), (4, 0), (5, self.TEST_BLOCK_SIZE - 1), (math.ceil(len(self.TEST_FILE) / self.TEST_BLOCK_SIZE) - 1, 0) ] changed_blocks = [block for block, position in changes_in_blocks] with BytesIO(self.TEST_FILE) as changed_file: file_buffer = changed_file.getbuffer() for block, position in changes_in_blocks: file_buffer[block * self.TEST_BLOCK_SIZE + position] += 1 changed_file_data = changed_file.getvalue() with BytesIO(self.TEST_FILE) as file_to: hashes = pyrsync2.blockchecksums( file_to, blocksize=self.TEST_BLOCK_SIZE) delta = pyrsync2.rsyncdelta(changed_file, hashes, blocksize=self.TEST_BLOCK_SIZE, max_buffer=self.TEST_BLOCK_SIZE) for block, data in enumerate(delta): if block in changed_blocks: self.assertEqual( self.get_block(changed_file_data, block), data) else: self.assertEqual(block, data)
def testDeltaSameFile(self): with open("file1", "r+b") as file1: hashes1 = rsync.blockchecksums(file1) deltas = rsync.rsyncdelta(file1, hashes1) print("Printing deltas for same file:") print(deltas) for delta in deltas: print("d: "+str(delta))
def common_rsync(patched_file, unpatched_file, resulting_file, blocksize): with open(unpatched_file, "rb") as unpatched, \ open(patched_file, "rb") as patched, \ open(resulting_file, "wb") as result: start = datetime.now() hashes = pyrsync2.blockchecksums(unpatched, blocksize) delta = pyrsync2.rsyncdelta(patched, hashes, blocksize) pyrsync2.patchstream(unpatched, result, delta, blocksize) duration = datetime.now() - start return duration
def get_delta(self, file): file_to = BytesIO(self.TEST_FILE) file_from = BytesIO(file) hashes = pyrsync2.blockchecksums(file_to, blocksize=self.TEST_BLOCK_SIZE) delta = pyrsync2.rsyncdelta(file_from, hashes, blocksize=self.TEST_BLOCK_SIZE) return list(delta)
def syncFile(srcFile, dstFile): unpatched = open(dstFile, 'rb') hashes = pyrsync2.blockchecksums(unpatched) patchedFile = open(srcFile, 'rb') delta = pyrsync2.rsyncdelta(patchedFile, hashes) unpatched.seek(0) save_to = open(dstFile, 'wb') pyrsync2.patchstream(unpatched, save_to, delta) # linkData('test', "D:/Clement Research/Test", "")
def testCopying(self): with open("file1", "r+b") as file1, open("file2", "r+b") as file2: hashes2 = rsync.blockchecksums(file2) delta = rsync.rsyncdelta(file1, hashes2) print(hashes2) print(dir(hashes2)) print(dir(delta)) rsync.patchstream(file2, file2, delta) with open("file2", "r") as file2: line = file2.readline() self.assertEqual(TEXT, line) print(line)
def test_blockchecksums(self): with BytesIO(self.TEST_FILE) as file1: hashes = pyrsync2.blockchecksums(file1, blocksize=self.TEST_BLOCK_SIZE) for block, block_hash in enumerate(hashes): block_data = self.get_block(self.TEST_FILE, block) weaksum = pyrsync2.weakchecksum(block_data)[0] strongsum = hashlib.md5(block_data).digest() self.assertEqual(block_hash, (weaksum, strongsum))
def test_rsyncdelta_same_file(self): with BytesIO(self.TEST_FILE) as file_to: hashes = pyrsync2.blockchecksums(file_to, blocksize=self.TEST_BLOCK_SIZE) with BytesIO(self.TEST_FILE) as file_from: delta = pyrsync2.rsyncdelta(file_from, hashes, blocksize=self.TEST_BLOCK_SIZE) for index, block in enumerate(delta): self.assertEqual(index, block)
def testSingleDeltaForBigFile(self): # Create output file with open(BIG_OUT, "wb"): pass start = time.time() with open(BIG_OUT, "r+b") as outstream, open(BIG_IN, "rb") as instream: hashes = rsync.blockchecksums(outstream) deltas = rsync.rsyncdelta(instream, hashes) rsync.patchstream(outstream, outstream, deltas) finish = time.time() elapsed = finish - start print("Took " + str(elapsed) + " seconds") self.assertTrue(filecmp.cmp(BIG_IN, BIG_OUT, shallow=False))
def test_blockchecksums(self): with BytesIO(self.TEST_FILE) as file1: hashes = pyrsync2.blockchecksums( file1, blocksize=self.TEST_BLOCK_SIZE ) for block, block_hash in enumerate(hashes): block_data = self.get_block(self.TEST_FILE, block) weaksum = pyrsync2.weakchecksum(block_data)[0] strongsum = hashlib.md5(block_data).digest() self.assertEqual(block_hash, (weaksum, strongsum))
def get_block_checksums(self, file_name, block_size): with self.fs_access_lock: if file_name not in self._index: return [] file_data = self._get_index_unsafe(file_name) if 'deleted' in file_data and file_data['deleted']: return [] with open(self.get_file_path(file_name), 'rb') as file: block_checksums = list( pyrsync2.blockchecksums(file, blocksize=block_size)) return block_checksums
def get_delta(self, file): file_to = BytesIO(self.TEST_FILE) file_from = BytesIO(file) hashes = pyrsync2.blockchecksums( file_to, blocksize=self.TEST_BLOCK_SIZE ) delta = pyrsync2.rsyncdelta( file_from, hashes, blocksize=self.TEST_BLOCK_SIZE ) return list(delta)
def test_rsyncdelta_same_file(self): with BytesIO(self.TEST_FILE) as file_to: hashes = pyrsync2.blockchecksums( file_to, blocksize=self.TEST_BLOCK_SIZE ) with BytesIO(self.TEST_FILE) as file_from: delta = pyrsync2.rsyncdelta( file_from, hashes, blocksize=self.TEST_BLOCK_SIZE ) for index, block in enumerate(delta): self.assertEqual(index, block)
def get_block_checksums(self, file_name, block_size): with self.fs_access_lock: if file_name not in self._index: return [] file_data = self._get_index_unsafe(file_name) if 'deleted' in file_data and file_data['deleted']: return [] with open(self.get_file_path(file_name), 'rb') as file: block_checksums = list(pyrsync2.blockchecksums( file, blocksize=block_size )) return block_checksums
def testMultipleDeltasForBigFile(self): # Create output file with open(BIG_OUT, "wb"): pass num_deltas = 0 start = time.time() with open(BIG_OUT, "r+b") as outstream, open(BIG_IN, "rb") as instream: hashes = rsync.blockchecksums(outstream) deltas = rsync.rsyncdelta(instream, hashes) for delta in deltas: num_deltas += 1 #print("delta: "+str(delta)) rsync.patchstream(outstream, outstream, [delta]) finish = time.time() elapsed = finish - start print("Took " + str(elapsed) + " seconds and "+str(num_deltas)+" individual deltas") self.assertTrue(filecmp.cmp(BIG_IN, BIG_OUT, shallow=False))
def test_rsyncdelta_with_changes(self): changes_in_blocks = [ (0, 0), (3, 2), (4, 0), (5, self.TEST_BLOCK_SIZE - 1), (math.ceil(len(self.TEST_FILE) / self.TEST_BLOCK_SIZE) - 1, 0) ] changed_blocks = [block for block, position in changes_in_blocks] with BytesIO(self.TEST_FILE) as changed_file: file_buffer = changed_file.getbuffer() for block, position in changes_in_blocks: file_buffer[block * self.TEST_BLOCK_SIZE + position] += 1 changed_file_data = changed_file.getvalue() with BytesIO(self.TEST_FILE) as file_to: hashes = pyrsync2.blockchecksums( file_to, blocksize=self.TEST_BLOCK_SIZE ) delta = pyrsync2.rsyncdelta( changed_file, hashes, blocksize=self.TEST_BLOCK_SIZE, max_buffer=self.TEST_BLOCK_SIZE ) for block, data in enumerate(delta): if block in changed_blocks: self.assertEqual( self.get_block(changed_file_data, block), data ) else: self.assertEqual(block, data)
def testDeltaDifferentFiles(self): with open("file1", "r+b") as file1, open("file2", "r+b") as file2: hashes2 = rsync.blockchecksums(file2) delta = rsync.rsyncdelta(file1, hashes2) for i in delta: print("i: "+ str(i))
def archive_files(source, target, compress=False, rsync=False, filetype=None, limit=0, verbose=False): verboseprint = print if verbose else lambda *a, **k: None target_filter = lambda fileinfo: filter_files(fileinfo, filetype, limit, verbose) tardir, tarfn = os.path.split(target) target_iteration = list_archives( tardir, # Each month is an iteration re.compile( re.match('{0}-\d{{4}}-\d{{2}}'.format(socket.gethostname()), tarfn).group(0))) try: if rsync and target_iteration: with io.BytesIO() as target_buffer, \ open(target_iteration[0], 'rb') as target_father_fid, \ open(target + '.diff', 'wb') as diff_fid: # Open TAR buffer for writing with tarfile.open( fileobj=target_buffer, mode='w|xz' if compress else 'w|') as target_fid: for root in source: target_fid.add(root, filter=target_filter) target_buffer.seek(0) # Write diff file based on rsync algorithm verboseprint('Making the diff file') hashes = pyrsync2.blockchecksums(target_father_fid) delta = pyrsync2.rsyncdelta(target_buffer, hashes, max_buffer=65535) for element in delta: if isinstance(element, int): diff_fid.write(b'\x00\x00') diff_fid.write(element.to_bytes(8, byteorder='big')) else: verboseprint('Saving {} bytes'.format(len(element))) diff_fid.write( len(element).to_bytes(2, byteorder='big')) diff_fid.write(element) else: # Open TAR file for writing with tarfile.open(target, 'w:xz' if compress else 'w') as target_fid: for root in source: target_fid.add(root, filter=target_filter) except FileNotFoundError as not_found: if os.path.dirname(not_found.filename) == tardir: verboseprint('Making directory {}'.format(tardir)) os.makedirs(tardir) return archive_files(source, target, compress, rsync, filetype, limit, verbose) else: print(not_found.filename + ' not found') return False else: return True