def try_patch(relative_path, data_path, save_path): current_file = get_binary_file_content(pj(data_path, relative_path)) src_file = get_binary_file_content(pj(save_path, 'src', relative_path)) li_patch = get_li_patch(relative_path, save_path) composed_file = compose_patch(src_file, *li_patch) print('have all content') if "data/Default 1/GPUCache" in relative_path: # to fix, bsdiff4.diff run indefinitely return if current_file != composed_file: print('create patch') patch = bsdiff4.diff(composed_file, current_file) print('diff from composed patchs') patch_from_src = bsdiff4.diff(src_file, current_file) print('diff from src') patchs_size = sum(map(len, li_patch)) len_ram_file = len(current_file) len_patch = len(patch) if len(patch_from_src) < len_patch * (1 + 0.05 * len(li_patch)): logi('delete olds patchs an add a new one (from src file) {}'. format(relative_path)) delete_patchs(relative_path, save_path) add_new_patch(relative_path, patch_from_src, 1, save_path) elif len_patch < len_ram_file and patchs_size < len_ram_file * 0.5: logi('add new patch for: {}'.format(relative_path)) add_new_patch(relative_path, patch, len(li_patch) + 1, save_path) else: logi('del olds patch and file -> create src_new for {}'.format( relative_path)) create_binary_file( os.path.join(save_path, 'src_new', relative_path), current_file) delete_old_and_mv_new_to_src(save_path)
def test_branch_conflict(self): view = View(self.log, self.root_key) view.build() home_path = os.path.join(os.sep, 'home-' + utils.random_ascii()) view.mkdir(home_path) key = Key.generate() view.grant(home_path, 'user', key) view = View(self.log, key) view.build() parent_node = view.get(home_path) user_path = os.path.join(home_path, 'user-' + utils.random_ascii()) max_hash = None enc_content = '' for ix in range(12): content = 'content-' + utils.random_ascii(32) prev = enc_content enc_content = bsdiff4.diff(enc_content, content) entry = self.log.write(parent_node.entry, user_path, key, attachment=enc_content) max_hash = max(max_hash, entry.hash) if max_hash else entry.hash view = View(self.log, self.root_key) view.build() self.assertEqual( bsdiff4.patch(prev, self.log.entries[max_hash].get_content()), view.get(user_path).content) # Admin branch more power admin_content = 'content-' + utils.random_ascii(32) content = bsdiff4.diff(enc_content, admin_content) self.log.write(parent_node.entry, user_path, self.root_key, attachment=content) view.build() self.assertEqual(admin_content.encode(), view.get(user_path).content) alt_content = bsdiff4.diff(content, ('content-' + utils.random_ascii(32)).encode()) self.log.write(parent_node.entry, user_path, key, attachment=alt_content) self.assertEqual(admin_content.encode(), view.get(user_path).content) # Grant consistency with prev state view.grant(os.sep, 'user', key) self.assertEqual(admin_content.encode(), view.get(user_path).content) view.build() self.assertEqual(admin_content.encode(), view.get(user_path).content) # Test prints self.log.print_tree(view=view, color=True) self.log.print_tree(view=view, ascii=True)
def generate_patch(rom: bytes, metadata: Optional[dict] = None, game: str = GAME_ALTTP) -> bytes: if metadata is None: metadata = {} patch = bsdiff4.diff(get_base_rom_data(game), rom) return generate_yaml(patch, metadata, game)
def try_compress(self): # Already no data, same HTML as previous if self.html_raw is None and self.html_delta is None: return # Check if HTML is the same and we can remove all the data if self.prev_update: prev_html, prev_level = self.prev_update.html_and_level # Don't compress if the level is too high if prev_level >= 64: return if self.html == self.prev_update.html: self.html_raw = None self.html_delta = None return # Already delta-compressed if self.html_delta: return # If there's a prev_update, we can delta-compress assert self.html_raw if self.prev_update: self.html_delta = bsdiff4.diff( self.prev_update.html.encode(), self.html_raw.encode(), ) self.html_raw = None
def diff_data_bsdiff(source_data, modi_data): if len(source_data) == 0 or len(modi_data) == 0: raise IOError("[Error] Not valid data length: %d, %d" % (len(source_data), len(modi_data))) patch = bsdiff4.diff(source_data, modi_data) return patch
def compute_diff( orig_file: IOIter, new_file: IOIter, diff_file: IOIter, discard_diff_percentage: Optional[float] = None, ) -> IOIter: """ Given an open original file and a new file, compute the diff between the two :param orig_file: an IOIter object whose contents are the "original" data :param new_file: an IOIter object whose contents are the "new" data :param diff_file: an IOIter object where the diff data will be written """ total_written = 0 writer = diff_file.writer() next(writer) logger.debug2('beginning diff computation') for orig_bytes, new_bytes in zip_longest(orig_file.reader(), new_file.reader(), fillvalue=b''): diff = bsdiff4.diff(orig_bytes, new_bytes) diff_str = str(len(diff)).encode() + SEPARATOR + diff total_written += len(diff_str) if discard_diff_percentage and total_written > orig_file.size * discard_diff_percentage: raise DiffTooLargeException writer.send(diff_str) return diff_file
def diff_data_bsdiff(source_data, modi_data): if len(source_data) == 0 or len(modi_data) == 0: raise IOError( "[Error] Not valid data length: %d, %d" % (len(source_data), len(modi_data))) patch = bsdiff4.diff(source_data, modi_data) return patch
def write_contents(self, opened_zipfile: zipfile.ZipFile): super(APDeltaPatch, self).write_contents(opened_zipfile) # write Delta opened_zipfile.writestr( "delta.bsdiff4", bsdiff4.diff(self.get_source_data_with_cache(), open(self.patched_path, "rb").read()), compress_type=zipfile.ZIP_STORED ) # bsdiff4 is a format with integrated compression
def write(self, path, content, commit=True): path = os.path.normpath(path) try: parent = self.get(path) except exceptions.DoesNotExist: parent = self.get(os.path.dirname(path)) content = bsdiff4.diff(parent.content, content) name = path.split(os.sep)[-1] return self.do_action(parent, self.log.write, path, name, attachment=content, commit=commit)
def create_doc_rev(self, title, content): doc_path = self.path / title with _lock_file(doc_path, 'x'): __class__._check_doc_exists(doc_path) revision = str(uuid.uuid4()) _, latest_rev = __class__._get_doc_rev(doc_path, revision = "latest") with (doc_path / revision).open('xb') as file: content = content.encode() file.write(bsdiff4.diff(latest_rev if latest_rev else b"", content) if latest_rev else content) return DocumentRevisionInfo(revision, _get_mtime(file))
def diff(self, oldFiles): # Create a patch in the target directory, if need be, and then return our entry in the manifest # First of all, is there a current version of this file? if not os.path.exists(self.loc): print "Current version of file %s does not exist, aborting! You should've told me this file isn't managed any more :(" % self.name exit(1) currentHash = self.currentHash() # bz2 myself and toss it on disk me = self.getContents() me = bz2.compress(me) compHash = self.__hash(me) compressedSelf = open(self.loc + '.bz2', 'wb') compressedSelf.write(me) compressedSelf.close() # if this is a first-time manifest if not oldFiles: # New file, don't have patches or anything return {'hash': currentHash, 'dl': self.name + '.bz2', 'compHash': compHash, 'patches': {}} fileEntry = {'hash': currentHash, 'dl': self.name + '.bz2', 'compHash': compHash, 'patches': {}} # iterate through all the old versions we'll be diffing against for oldFile in oldFiles: oldFileHandle = oldFile._getFile('rb') if oldFileHandle is None: # Old file doesn't exist, w/e continue oldFileHandle.close() oldHash = oldFile.currentHash() if oldHash == currentHash: # easy continue # Does a patch already exist? if oldHash in fileEntry['patches']: # Yep, it does continue # ooooooh, we have to make a patch. start by setting up where the patch will go patchName = '%s_%s_to_%s.patch.bin' % (os.path.basename(self.name), oldHash[:5], currentHash[:5]) print 'Diffing file %s: %s/%s -> %s' % (self.name, oldFile.installBase, oldHash[:5], currentHash[:5]) patchPath = os.path.join(os.path.join(self.installBase, os.path.split(self.name)[0]), patchName) # Then, do the diff in-memory patchContents = bsdiff4.diff(oldFile.getContents(), self.getContents()) # Figure out the hash of the patch patchHash = self.__hash(patchContents) # Then compress it! patchContents = bz2.compress(patchContents) # Then hash it again! Isn't this fun? compPatchHash = self.__hash(patchContents) # Then finally write it to disk patchHandle = open(patchPath, 'wb') patchHandle.write(patchContents) patchHandle.close() fileEntry['patches'][oldHash] = {'filename': os.path.join(os.path.dirname(self.name), patchName), 'patchHash': patchHash, 'compPatchHash': compPatchHash} return fileEntry
def diff(self, oldFiles): # Create a patch in the target directory, if need be, and then return our entry in the manifest # First of all, is there a current version of this file? if not os.path.exists(self.loc): print "Current version of file %s does not exist, aborting! You should've told me this file isn't managed any more :(" % self.name exit(1) currentHash = self.currentHash() # bz2 myself and toss it on disk me = self.getContents() me = bz2.compress(me) compHash = self.__hash(me) compressedSelf = open(self.loc + '.bz2', 'wb') compressedSelf.write(me) compressedSelf.close() # if this is a first-time manifest if not oldFiles: # New file, don't have patches or anything return {'hash': currentHash, 'dl': self.name + '.bz2', 'compHash': compHash, 'patches': {}} fileEntry = {'hash': currentHash, 'dl': self.name + '.bz2', 'compHash': compHash, 'patches': {}} # iterate through all the old versions we'll be diffing against for oldFile in oldFiles: oldFileHandle = oldFile._getFile('rb') if oldFileHandle is None: # Old file doesn't exist, w/e continue oldFileHandle.close() oldHash = oldFile.currentHash() if oldHash == currentHash: # easy continue # Does a patch already exist? if oldHash in fileEntry['patches']: # Yep, it does continue # ooooooh, we have to make a patch. start by setting up where the patch will go patchName = '%s_%s_to_%s.patch.bin' % (os.path.basename(self.name), oldHash[:5], currentHash[:5]) print 'Diffing file %s: %s/%s -> %s' % (self.name, oldFile.installBase, oldHash[:5], currentHash[:5]) patchPath = os.path.join(os.path.join(self.installBase, os.path.split(self.name)[0]), patchName) # Then, do the diff in-memory patchContents = bsdiff4.diff(oldFile.getContents(), self.getContents()) # Figure out the hash of the patch patchHash = self.__hash(patchContents) # Then compress it! patchContents = bz2.compress(patchContents) # Then hash it again! Isn't this fun? compPatchHash = self.__hash(patchContents) # Then finally write it to disk patchHandle = open(patchPath, 'wb') patchHandle.write(patchContents) patchHandle.close() fileEntry['patches'][oldHash] = {'filename': os.path.join(os.path.dirname(self.name), patchName), 'patchHash': patchHash, 'compPatchHash': compPatchHash} return fileEntry
def get_delta(self, src, dst): """ Calculate strings delta :param src: Source string :param dst: Destination string :return: (<type>, delta) """ delta = bsdiff4.diff(src, dst) if len(delta) >= len(dst): return self.T_FILE, dst return self.T_BSDIFF4, delta
def get_delta(self, src, dst): # type: (six.text_type, six.text_type) -> Tuple[six.text_type, six.binary_type] """ Calculate strings delta :param src: Source string :param dst: Destination string :return: (<type>, delta) """ delta = bsdiff4.diff(src, dst) if len(delta) >= len(dst): return self.T_FILE, smart_bytes(dst) return self.T_BSDIFF4, delta
def generate_patch(vanilla_file, randomized_file, metadata: Optional[dict] = None) -> bytes: """Generate old (<4) apbp format patch data. Run through lzma to get a complete apbp file.""" with open(vanilla_file, "rb") as f: vanilla = read_rom(f) with open(randomized_file, "rb") as f: randomized = read_rom(f) if metadata is None: metadata = {} patch = bsdiff4.diff(vanilla, randomized) return generate_yaml(patch, metadata)
def test_branch_conflict(self): view = View(self.log, self.root_key) view.build() home_path = os.path.join(os.sep, 'home-' + utils.random_ascii()) view.mkdir(home_path) key = Key.generate() view.grant(home_path, 'user', key) view = View(self.log, key) view.build() parent_node = view.get(home_path) user_path = os.path.join(home_path, 'user-' + utils.random_ascii()) max_hash = None enc_content = '' for ix in range(12): content = 'content-' + utils.random_ascii(32) prev = enc_content enc_content = bsdiff4.diff(enc_content, content) entry = self.log.write(parent_node.entry, user_path, key, attachment=enc_content) max_hash = max(max_hash, entry.hash) if max_hash else entry.hash view = View(self.log, self.root_key) view.build() self.assertEqual(bsdiff4.patch(prev, self.log.entries[max_hash].get_content()), view.get(user_path).content) # Admin branch more power admin_content = 'content-' + utils.random_ascii(32) content = bsdiff4.diff(enc_content, admin_content) self.log.write(parent_node.entry, user_path, self.root_key, attachment=content) view.build() self.assertEqual(admin_content.encode(), view.get(user_path).content) alt_content = bsdiff4.diff(content, ('content-' + utils.random_ascii(32)).encode()) self.log.write(parent_node.entry, user_path, key, attachment=alt_content) self.assertEqual(admin_content.encode(), view.get(user_path).content) # Grant consistency with prev state view.grant(os.sep, 'user', key) self.assertEqual(admin_content.encode(), view.get(user_path).content) view.build() self.assertEqual(admin_content.encode(), view.get(user_path).content) # Test prints self.log.print_tree(view=view, color=True) self.log.print_tree(view=view, ascii=True)
def update(self, data): print('--start--') f = self.unpack(open(self.base, 'rb').read()) f2 = open(data, 'rb').read() patch = open('%s.%d' % (self.base, len(self.patches) + 1), 'wb') self.scan() p = bsdiff4.diff(f2, f) patch.write(p) patch.close() f = open(self.base, 'wb') f.write(self.pack(f2)) f.close() print('--done--')
def write(self, path, content, commit=True): path = os.path.normpath(path) try: parent = self.get(path) except exceptions.DoesNotExist: parent = self.get(os.path.dirname(path)) content = bsdiff4.diff(parent.content, content) name = path.split(os.sep)[-1] return self.do_action(parent, self.log.write, path, name, attachment=content, commit=commit)
def __init__(self, target, payload='', mode=DEFAULT_MODE, orig_digest='', dest_digest='', orig_data=None, dest_data=None, hash_type=DEFAULT_HASH_TYPE, **kwargs): if orig_data is not None or dest_data is not None: orig_digest = self._generate_digest(orig_data, hash_type) dest_digest = self._generate_digest(dest_data, hash_type) payload = bsdiff4.diff(orig_data, dest_data) super(PDARDiffEntry, self).__init__( target=target, payload=payload, mode=mode, orig_digest=orig_digest, dest_digest=dest_digest, hash_type=hash_type, **kwargs)
def diff(src_path, dst_path, patch_path): x = zipfile.ZipFile(src_path) y = zipfile.ZipFile(dst_path) z = zipfile.ZipFile(patch_path, 'w', zipfile.ZIP_STORED) xnames = set(x.namelist()) ynames = set(y.namelist()) count = 0 for name in xnames | ynames: xdata = x.read(name) if name in xnames else None ydata = y.read(name) if name in ynames else None if xdata == ydata: continue if ydata is not None: bz2_data = bz2.compress(ydata) # startswith BZ if xdata is not None and ydata is not None: diff_data = bsdiff4.diff(xdata, ydata) if len(diff_data) < len(bz2_data): zdata = diff_data # startswith BSDIFF4 else: zdata = bz2_data elif xdata is not None and ydata is None: zdata = 'RM' elif ydata is not None and xdata is None: zdata = bz2_data else: raise Exception("Hmm, didn't expect to get here.") #print zdata[:2], name z.writestr(name, zdata) count += 1 info = {} for path, pre in (src_path, 'src'), (dst_path, 'dst'): info.update({pre: basename(path), pre + '_size': getsize(path), pre + '_mtime': getmtime(path), pre + '_md5': md5_file(path)}) z.writestr('__zdiff_info__.json', json.dumps(info, indent=2, sort_keys=True)) z.close() y.close() x.close() return count
def diff(src_path, dst_path, patch_path): x = zipfile.ZipFile(src_path) y = zipfile.ZipFile(dst_path) z = zipfile.ZipFile(patch_path, 'w', zipfile.ZIP_STORED) xnames = set(x.namelist()) ynames = set(y.namelist()) count = 0 for name in xnames | ynames: xdata = x.read(name) if name in xnames else None ydata = y.read(name) if name in ynames else None if xdata == ydata: continue if ydata is not None: bz2_data = bz2.compress(ydata) # startswith BZ if xdata is not None and ydata is not None: diff_data = bsdiff4.diff(xdata, ydata) if len(diff_data) < len(bz2_data): zdata = diff_data # startswith BSDIFF4 else: zdata = bz2_data elif xdata is not None and ydata is None: zdata = 'RM' elif ydata is not None and xdata is None: zdata = bz2_data else: raise Exception("Hmm, didn't expect to get here.") #print zdata[:2], name z.writestr(name, zdata) count += 1 info = {} for path, pre in (src_path, 'src'), (dst_path, 'dst'): info.update({pre: basename(path), pre + '_size': getsize(path), pre + '_mtime': getmtime(path), pre + '_md5': md5_file(path)}) z.writestr('__zdiff_info__.json', json.dumps(info, indent=2, sort_keys=True)) z.close() y.close() x.close() return count
def diff_from_file(self, mfile, mm_use): """ calc diff: using bsdiff to generate patch data automatic divide data, memory use will not exceed MAX_MEMORY_USE(not guarantee.) @param mm_use MAX_MEMORY_USE. @return an iterative object saving diff data for each of chunksize """ total_size = self.size() + mfile.size() chunksize = mm_use if mm_use > total_size else mm_use / 3 for chunk, pos in self.read_in_chunks(chunksize): min_patch = None start = 0 end = 0 for schunk, spos in mfile.read_in_chunks(chunksize): patch = bsdiff4.diff(schunk, chunk) if not min_patch or len(patch) < len(min_patch): min_patch = patch start = end end = spos yield min_patch, start, end, pos
def put(klass, value, parent_key=None): if not isinstance(value, six.binary_type): raise TypeError("data type must be binary_type") if len(value) > 100 * 1024 * 1024: raise Exception("file size exceed") key = hashlib.sha1(value).hexdigest() cache.set(key, force_bytes(value), None) size = len(value) if not klass.objects.filter(key=key).exists(): if parent_key: parent_data = klass.get(key=parent_key) patch = bsdiff4.diff(parent_data, value) if len(value) > 512 and 2 * len(patch) < len(value): value = patch else: parent_key = None klass.objects.create(key=key, parent_key=parent_key, data=value, size=size) return key
def OnButton3Button(self, event): event.Skip() dlg = wx.FileDialog(self, 'Save a patch', '.', '', '*.patch', wx.SAVE) try: if dlg.ShowModal() == wx.ID_OK: filename = dlg.GetPath() destinationPatch = open(filename,'wb') patchFile = bsdiff4.diff(bytes(genOriginalBytes), bytes(genModifiedBytes)) destinationPatch.write(patchFile) destinationPatch.close() finally: dlg.Destroy() ok_dlg = wx.MessageDialog (self, u'Completed!!', u'Completed', wx.OK | wx.ICON_INFORMATION ) ok_dlg.ShowModal () ok_dlg.Destroy ()
def diff(self, oldFiles): if not os.path.exists(self.loc): print("Current version of file %s does not exist, aborting! You should've told me this file isn't managed any more :(" % self.name) exit(1) currentHash = self.currentHash() me = self.getContents() me = bz2.compress(me) compHash = self.__hash(me) compressedSelf = open(self.loc + '.bz2', 'wb') compressedSelf.write(me) compressedSelf.close() if not oldFiles: return {'hash': currentHash, 'dl': self.name + '.bz2', 'compHash': compHash, 'patches': {}} fileEntry = {'hash': currentHash, 'dl': self.name + '.bz2', 'compHash': compHash, 'patches': {}} for oldFile in oldFiles: oldFileHandle = oldFile._getFile('rb') if oldFileHandle is None: continue oldFileHandle.close() oldHash = oldFile.currentHash() if oldHash == currentHash: continue if oldHash in fileEntry['patches']: continue patchName = '%s_%s_to_%s.patch.bin' % (os.path.basename(self.name), oldHash[:5], currentHash[:5]) print('Diffing file %s: %s/%s -> %s' % (self.name, oldFile.installBase, oldHash[:5], currentHash[:5])) patchPath = os.path.join(os.path.join(self.installBase, os.path.split(self.name)[0]), patchName) patchContents = bsdiff4.diff(oldFile.getContents(), self.getContents()) patchHash = self.__hash(patchContents) patchContents = bz2.compress(patchContents) compPatchHash = self.__hash(patchContents) patchHandle = open(patchPath, 'wb') patchHandle.write(patchContents) patchHandle.close() fileEntry['patches'][oldHash] = {'filename': os.path.join(os.path.dirname(self.name), patchName), 'patchHash': patchHash, 'compPatchHash': compPatchHash} return fileEntry
def generate_patch(baserombytes: bytes, rom: bytes) -> bytes: patch = bsdiff4.diff(bytes(baserombytes), rom) return generate_yaml(patch, {})
def diff(source,target): return bsdiff4_native.diff(source,target)
if size > first_packet: packets += math.ceil((size-first_packet)/(512-1-28)) # if packets > 60: # packets = 60 except (FileNotFoundError, IsADirectoryError): continue times.append(time.time()-ini) messages.append(packets) return messages, time.time()-start, times # warm-up compress(lambda n: '') results = { 'raw': compress(lambda n: n), 'bsdiff4': compress(lambda n: bsdiff4.diff(b'', n)), 'lzma': compress(lzma.compress), 'zlib': compress(zlib.compress), } sys.stdout.write('method,type,value\n') for method, v in results.items(): messages, __, times = v for value in messages: sys.stdout.write('%s,%s,%s\n' % (method, 'messages', value)) for value in times: sys.stdout.write('%s,%s,%s\n' % (method, 'times', value)) plt.hist(results['raw'][0], bins=1000, histtype='step', normed=True, color='y', label='raw', cumulative=True) plt.hist(results['zlib'][0], bins=1000, histtype='step', normed=True, color='r', label='zlib', cumulative=True)
existingAttachmentItem = zlib.fetchItem(args.attachmentItemKey) print(existingAttachmentItem) #compare md5 of our original file with the md5 of the uploaded file apiMD5 = existingAttachmentItem.get('md5') print('md5sum of the file existing on the Zotero server: ' + apiMD5) if apiMD5 != finfo['md5']: raise Exception("MD5 mismatch : " + apiMD5 + " : " + finfo['md5']) #get information for new version of file and diff with old version nfilestat = os.stat(newfilepath) nf = open(newfilepath, 'rb') nfdata = nf.read() newmd5 = hashlib.md5() newmd5.update(nfdata) newDigest = newmd5.hexdigest() diffData = str(bsdiff4.diff(efdata, nfdata)) nfinfo = { 'md5': newDigest, 'filename': os.path.basename(oldfilepath), 'filesize': nfilestat.st_size, 'mtime': int( nfilestat.st_mtime * 1000 ) # the zotero api accepts mtime in ms, os.stat may return seconds depending on operating system } #guess mimetype info nfinfo['contentType'], nfinfo['charset'] = mimetypes.guess_type(newfilepath) print("new file info:") print(nfinfo) #xddiffData = open('samplefile.xd3.diff', 'rb').read()
def diff(compress, reference): """Compresses the given bytes using the reference and return a patch""" return bsdiff4.diff(reference, compress)
def create(self): patch = bsdiff4.diff(self.second_file, self.first_file) patch_path = Path(str(self.file_path) + ".patch") with open(patch_path, "wb") as f: f.write(patch)
target = sys.argv[2] patch = "bsdiff4" if not os.path.exists(patch): os.makedirs(patch) for filename in os.listdir(target): source_file = os.path.join(source, filename) target_file = os.path.join(target, filename) source_data = open(source_file, "rb").read() target_data = open(target_file, "rb").read() if hashlib.md5(source_data).hexdigest() != hashlib.md5(target_data).hexdigest(): print("diffing " + filename) patch_data = bsdiff4.diff(source_data, target_data) patch_file = os.path.join(patch, hashlib.md5(source_data).hexdigest()) print("creating patch for " + filename + " with " + patch_file) open(patch_file, "wb+").write(patch_data) source_data = None target_data = None patch_data = None verify = {} for filename in os.listdir(target): filepath = os.path.join(target, filename) target_data = open(filepath, "rb").read() verify[filename] = hashlib.md5(target_data).hexdigest()
(size - first_packet) / (512 - 1 - 28)) # if packets > 60: # packets = 60 except (FileNotFoundError, IsADirectoryError): continue times.append(time.time() - ini) messages.append(packets) return messages, time.time() - start, times # warm-up compress(lambda n: '') results = { 'raw': compress(lambda n: n), 'bsdiff4': compress(lambda n: bsdiff4.diff(b'', n)), 'lzma': compress(lzma.compress), 'zlib': compress(zlib.compress), } sys.stdout.write('method,type,value\n') for method, v in results.items(): messages, __, times = v for value in messages: sys.stdout.write('%s,%s,%s\n' % (method, 'messages', value)) for value in times: sys.stdout.write('%s,%s,%s\n' % (method, 'times', value)) plt.hist(results['raw'][0], bins=1000, histtype='step',
def generate_patch(rom: bytes, metadata: Optional[dict] = None) -> bytes: if metadata is None: metadata = {} patch = bsdiff4.diff(get_base_rom_bytes(), rom) return generate_yaml(patch, metadata)
def round_trip(self, src, dst): p = diff(src, dst) #print(len(src), len(p)) dst2 = patch(src, p) self.assertEqual(dst, dst2)
def make_patch(self): """Create patch files.""" # First get change package information cpinfo = self.get_cpinfo() # Iterate through each member of change package for item in cpinfo: member = item[0] project = item[1] rev = item[2] # Get previous member revision prev_rev = self.get_prev_rev(rev) #---- Get member files for old and new revisions si_args = ["si"] si_args.append("viewrevision") si_args.append("-r") si_args.append(prev_rev) si_args.append("--project=" + project) si_args += self.std_args si_args.append(member) # String representing old file old_file_st = subprocess.check_output(si_args) si_args = ["si"] si_args.append("viewrevision") si_args.append("-r") si_args.append(rev) si_args.append("--project=" + project) si_args += self.std_args si_args.append(member) # String representing new file new_file_st = subprocess.check_output(si_args) # Open file for writing # Binary patch file naming convention: "foobar.out" becomes "foobar_out.bsdiff" filename = os.path.splitext(member)[0] + "_" + os.path.splitext( member)[1].lstrip(".") + ".bsdiff" # Prepend destination path if specified if self.destination: dir_normalized = os.path.normpath( self.destination ) # Get normalized path Python can understand # If invalid path specified, patch files go in current directory if os.path.isdir(dir_normalized): filename = os.path.join(os.path.normpath(self.destination), filename) outfile = open(filename, 'wb') # Create patch patch = bsdiff4.diff(old_file_st, new_file_st) # Write lines of patch output to file outfile.write(patch) outfile.close()
existingAttachmentItem = zlib.fetchItem(args.attachmentItemKey) print(existingAttachmentItem) # compare md5 of our original file with the md5 of the uploaded file apiMD5 = existingAttachmentItem.get("md5") print("md5sum of the file existing on the Zotero server: " + apiMD5) if apiMD5 != finfo["md5"]: raise Exception("MD5 mismatch : " + apiMD5 + " : " + finfo["md5"]) # get information for new version of file and diff with old version nfilestat = os.stat(newfilepath) nf = open(newfilepath, "rb") nfdata = nf.read() newmd5 = hashlib.md5() newmd5.update(nfdata) newDigest = newmd5.hexdigest() diffData = str(bsdiff4.diff(efdata, nfdata)) nfinfo = { "md5": newDigest, "filename": os.path.basename(oldfilepath), "filesize": nfilestat.st_size, "mtime": int( nfilestat.st_mtime * 1000 ), # the zotero api accepts mtime in ms, os.stat may return seconds depending on operating system } # guess mimetype info nfinfo["contentType"], nfinfo["charset"] = mimetypes.guess_type(newfilepath) print("new file info:") print(nfinfo) # xddiffData = open('samplefile.xd3.diff', 'rb').read()
patch = "bsdiff4" if not os.path.exists(patch): os.makedirs(patch) for filename in os.listdir(target): source_file = os.path.join(source, filename) target_file = os.path.join(target, filename) source_data = open(source_file, "rb").read() target_data = open(target_file, "rb").read() if hashlib.md5(source_data).hexdigest() != hashlib.md5( target_data).hexdigest(): print("diffing " + filename) patch_data = bsdiff4.diff(source_data, target_data) patch_file = os.path.join(patch, hashlib.md5(source_data).hexdigest()) print("creating patch for " + filename + " with " + patch_file) open(patch_file, "wb+").write(patch_data) source_data = None target_data = None patch_data = None verify = {} for filename in os.listdir(target): filepath = os.path.join(target, filename) target_data = open(filepath, "rb").read()
if not os.path.isfile(cmd_settings.get_first()): print "first file not exists" sys.exit() if not os.path.isfile(cmd_settings.get_second()): print "second file not exists" sys.exit() f_f = open(cmd_settings.get_first(), "rb") binary_f = f_f.read() f_s = open(cmd_settings.get_second(), "rb") binary_s = f_s.read() binary_delta = bsdiff4.diff(binary_f, binary_s) f_o = open(cmd_settings.get_out(), "wb") f_o.write(binary_delta) f_o.close() f_f.close() f_s.close()
def round_trip(self, src, dst): p = diff(src, dst) #print(len(src), len(p)) dst2 = patch(src, p) self.assertEqual(dst, dst2)