def run(self): """ Send the delta data to the remote side. """ try: with open(self.directory.get_file_path(self.file_name), 'rb') \ as file: delta_generator = pyrsync2.rsyncdelta( file, self.remote_checksums, blocksize=self.block_size, max_buffer=self.block_size ) # Actual transfer of data for block in delta_generator: self.messanger.send({ "type": self.MSG_BLOCK_DATA, "binary_data": block }) except Exception as ex: self.logger.exception(ex) self.logger.error( "File {} couldn't be read transferred to {}. Maybe it changed." .format(self.file_name, self.messanger.address[0]) ) self.shutdown() else: self.messanger.send({ "type": self.MSG_DONE })
def run(self): """ Send the delta data to the remote side. """ try: with open(self.directory.get_file_path(self.file_name), 'rb') \ as file: delta_generator = pyrsync2.rsyncdelta( file, self.remote_checksums, blocksize=self.block_size, max_buffer=self.block_size) # Actual transfer of data for block in delta_generator: self.messanger.send({ "type": self.MSG_BLOCK_DATA, "binary_data": block }) except Exception as ex: self.logger.exception(ex) self.logger.error( "File {} couldn't be read transferred to {}. Maybe it changed." .format(self.file_name, self.messanger.address[0])) self.shutdown() else: self.messanger.send({"type": self.MSG_DONE})
def test_rsyncdelta_with_changes(self): changes_in_blocks = [ (0, 0), (3, 2), (4, 0), (5, self.TEST_BLOCK_SIZE - 1), (math.ceil(len(self.TEST_FILE) / self.TEST_BLOCK_SIZE) - 1, 0) ] changed_blocks = [block for block, position in changes_in_blocks] with BytesIO(self.TEST_FILE) as changed_file: file_buffer = changed_file.getbuffer() for block, position in changes_in_blocks: file_buffer[block * self.TEST_BLOCK_SIZE + position] += 1 changed_file_data = changed_file.getvalue() with BytesIO(self.TEST_FILE) as file_to: hashes = pyrsync2.blockchecksums( file_to, blocksize=self.TEST_BLOCK_SIZE) delta = pyrsync2.rsyncdelta(changed_file, hashes, blocksize=self.TEST_BLOCK_SIZE, max_buffer=self.TEST_BLOCK_SIZE) for block, data in enumerate(delta): if block in changed_blocks: self.assertEqual( self.get_block(changed_file_data, block), data) else: self.assertEqual(block, data)
def testDeltaSameFile(self): with open("file1", "r+b") as file1: hashes1 = rsync.blockchecksums(file1) deltas = rsync.rsyncdelta(file1, hashes1) print("Printing deltas for same file:") print(deltas) for delta in deltas: print("d: "+str(delta))
def common_rsync(patched_file, unpatched_file, resulting_file, blocksize): with open(unpatched_file, "rb") as unpatched, \ open(patched_file, "rb") as patched, \ open(resulting_file, "wb") as result: start = datetime.now() hashes = pyrsync2.blockchecksums(unpatched, blocksize) delta = pyrsync2.rsyncdelta(patched, hashes, blocksize) pyrsync2.patchstream(unpatched, result, delta, blocksize) duration = datetime.now() - start return duration
def get_delta(self, file): file_to = BytesIO(self.TEST_FILE) file_from = BytesIO(file) hashes = pyrsync2.blockchecksums(file_to, blocksize=self.TEST_BLOCK_SIZE) delta = pyrsync2.rsyncdelta(file_from, hashes, blocksize=self.TEST_BLOCK_SIZE) return list(delta)
def syncFile(srcFile, dstFile): unpatched = open(dstFile, 'rb') hashes = pyrsync2.blockchecksums(unpatched) patchedFile = open(srcFile, 'rb') delta = pyrsync2.rsyncdelta(patchedFile, hashes) unpatched.seek(0) save_to = open(dstFile, 'wb') pyrsync2.patchstream(unpatched, save_to, delta) # linkData('test', "D:/Clement Research/Test", "")
def test_rsyncdelta_same_file(self): with BytesIO(self.TEST_FILE) as file_to: hashes = pyrsync2.blockchecksums(file_to, blocksize=self.TEST_BLOCK_SIZE) with BytesIO(self.TEST_FILE) as file_from: delta = pyrsync2.rsyncdelta(file_from, hashes, blocksize=self.TEST_BLOCK_SIZE) for index, block in enumerate(delta): self.assertEqual(index, block)
def testCopying(self): with open("file1", "r+b") as file1, open("file2", "r+b") as file2: hashes2 = rsync.blockchecksums(file2) delta = rsync.rsyncdelta(file1, hashes2) print(hashes2) print(dir(hashes2)) print(dir(delta)) rsync.patchstream(file2, file2, delta) with open("file2", "r") as file2: line = file2.readline() self.assertEqual(TEXT, line) print(line)
def testSingleDeltaForBigFile(self): # Create output file with open(BIG_OUT, "wb"): pass start = time.time() with open(BIG_OUT, "r+b") as outstream, open(BIG_IN, "rb") as instream: hashes = rsync.blockchecksums(outstream) deltas = rsync.rsyncdelta(instream, hashes) rsync.patchstream(outstream, outstream, deltas) finish = time.time() elapsed = finish - start print("Took " + str(elapsed) + " seconds") self.assertTrue(filecmp.cmp(BIG_IN, BIG_OUT, shallow=False))
def test_rsyncdelta_same_file(self): with BytesIO(self.TEST_FILE) as file_to: hashes = pyrsync2.blockchecksums( file_to, blocksize=self.TEST_BLOCK_SIZE ) with BytesIO(self.TEST_FILE) as file_from: delta = pyrsync2.rsyncdelta( file_from, hashes, blocksize=self.TEST_BLOCK_SIZE ) for index, block in enumerate(delta): self.assertEqual(index, block)
def get_delta(self, file): file_to = BytesIO(self.TEST_FILE) file_from = BytesIO(file) hashes = pyrsync2.blockchecksums( file_to, blocksize=self.TEST_BLOCK_SIZE ) delta = pyrsync2.rsyncdelta( file_from, hashes, blocksize=self.TEST_BLOCK_SIZE ) return list(delta)
def testMultipleDeltasForBigFile(self): # Create output file with open(BIG_OUT, "wb"): pass num_deltas = 0 start = time.time() with open(BIG_OUT, "r+b") as outstream, open(BIG_IN, "rb") as instream: hashes = rsync.blockchecksums(outstream) deltas = rsync.rsyncdelta(instream, hashes) for delta in deltas: num_deltas += 1 #print("delta: "+str(delta)) rsync.patchstream(outstream, outstream, [delta]) finish = time.time() elapsed = finish - start print("Took " + str(elapsed) + " seconds and "+str(num_deltas)+" individual deltas") self.assertTrue(filecmp.cmp(BIG_IN, BIG_OUT, shallow=False))
def test_rsyncdelta_with_changes(self): changes_in_blocks = [ (0, 0), (3, 2), (4, 0), (5, self.TEST_BLOCK_SIZE - 1), (math.ceil(len(self.TEST_FILE) / self.TEST_BLOCK_SIZE) - 1, 0) ] changed_blocks = [block for block, position in changes_in_blocks] with BytesIO(self.TEST_FILE) as changed_file: file_buffer = changed_file.getbuffer() for block, position in changes_in_blocks: file_buffer[block * self.TEST_BLOCK_SIZE + position] += 1 changed_file_data = changed_file.getvalue() with BytesIO(self.TEST_FILE) as file_to: hashes = pyrsync2.blockchecksums( file_to, blocksize=self.TEST_BLOCK_SIZE ) delta = pyrsync2.rsyncdelta( changed_file, hashes, blocksize=self.TEST_BLOCK_SIZE, max_buffer=self.TEST_BLOCK_SIZE ) for block, data in enumerate(delta): if block in changed_blocks: self.assertEqual( self.get_block(changed_file_data, block), data ) else: self.assertEqual(block, data)
def archive_files(source, target, compress=False, rsync=False, filetype=None, limit=0, verbose=False): verboseprint = print if verbose else lambda *a, **k: None target_filter = lambda fileinfo: filter_files(fileinfo, filetype, limit, verbose) tardir, tarfn = os.path.split(target) target_iteration = list_archives( tardir, # Each month is an iteration re.compile( re.match('{0}-\d{{4}}-\d{{2}}'.format(socket.gethostname()), tarfn).group(0))) try: if rsync and target_iteration: with io.BytesIO() as target_buffer, \ open(target_iteration[0], 'rb') as target_father_fid, \ open(target + '.diff', 'wb') as diff_fid: # Open TAR buffer for writing with tarfile.open( fileobj=target_buffer, mode='w|xz' if compress else 'w|') as target_fid: for root in source: target_fid.add(root, filter=target_filter) target_buffer.seek(0) # Write diff file based on rsync algorithm verboseprint('Making the diff file') hashes = pyrsync2.blockchecksums(target_father_fid) delta = pyrsync2.rsyncdelta(target_buffer, hashes, max_buffer=65535) for element in delta: if isinstance(element, int): diff_fid.write(b'\x00\x00') diff_fid.write(element.to_bytes(8, byteorder='big')) else: verboseprint('Saving {} bytes'.format(len(element))) diff_fid.write( len(element).to_bytes(2, byteorder='big')) diff_fid.write(element) else: # Open TAR file for writing with tarfile.open(target, 'w:xz' if compress else 'w') as target_fid: for root in source: target_fid.add(root, filter=target_filter) except FileNotFoundError as not_found: if os.path.dirname(not_found.filename) == tardir: verboseprint('Making directory {}'.format(tardir)) os.makedirs(tardir) return archive_files(source, target, compress, rsync, filetype, limit, verbose) else: print(not_found.filename + ' not found') return False else: return True
def testDeltaDifferentFiles(self): with open("file1", "r+b") as file1, open("file2", "r+b") as file2: hashes2 = rsync.blockchecksums(file2) delta = rsync.rsyncdelta(file1, hashes2) for i in delta: print("i: "+ str(i))