def test_different_compression(): all_ascii = (string.ascii_letters + string.digits).encode() v1 = all_ascii * 1000 v2 = all_ascii * 900 + string.ascii_letters.encode() * 100 + all_ascii * 100 delta_a = xdelta3.encode(v1, v2, xdelta3.Flags.COMPLEVEL_9) v2_a = xdelta3.decode(v1, delta_a) assert v2 == v2_a delta_b = xdelta3.encode(v1, v2, xdelta3.Flags.COMPLEVEL_1) v2_b = xdelta3.decode(v1, delta_b) assert v2 == v2_b assert len(delta_a) < len(delta_b)
def write_content_diff(out, in1, in2): content_id_map = { iterdiff.PROLOGUE: CONTENT_PROLOGUE, iterdiff.EPILOGUE: CONTENT_EPILOGUE, CONTENT_PROLOGUE: None, # fail-fast on struct.pack CONTENT_EPILOGUE: None, # fail-fast on struct.pack } # Default compresslevel of 9 is okay as gzip costs nothing compared to XML parsing of source. # Note, xdelta from b'' may be still smaller than the original blob. with gzip.GzipFile(fileobj=out, mode='wb') as gz: for k, v1, v2 in iterdiff.iter_content_diff(in1, in2): k = content_id_map.get(k, k) try: delta = xdelta3.encode(v1, v2) # FIXME: workaround for bug https://github.com/samuelcolvin/xdelta3-python/issues/2 try: good = xdelta3.decode(v1, delta) == v2 # Epic Fail if happens. except xdelta3.XDeltaError: good = False magic = MAGIC_XDELTA3 if good else MAGIC_UGLY except xdelta3.NoDeltaFound: magic, delta = MAGIC_RAW, None blob = delta if magic == MAGIC_XDELTA3 else v2 gz.write(CDIFF_HEAD.pack(magic, k, len(v1), len(blob))) gz.write(v1) gz.write(blob)
def test_readme(): value_one = b'wonderful string to demonstrate xdelta3, much of these two strings is the same.' value_two = b'different string to demonstrate xdelta3, much of these two strings is the same.' delta = xdelta3.encode(value_one, value_two) value_two_rebuilt = xdelta3.decode(value_one, delta) assert value_two_rebuilt == value_two
def xdelta3gen(source_bytes, target_bytes): #if source bytes finished, everything remaining is a target bytes if not source_bytes: sources_bytes = b'' #if target bytes finished, we should stop if not target_bytes: return None #these are memoryviews and python xdelta3 lib doesn't like that... return xdelta3.encode(bytes(source_bytes), bytes(target_bytes))
def test_large_decode(): this_dir = Path(__file__).parent try: b1 = (this_dir / 'b1.bin').read_bytes() b2 = (this_dir / 'b2.bin').read_bytes() except FileNotFoundError as e: raise RuntimeError( 'file required for test not found, run `make download-test-files`' ) from e d = xdelta3.encode(b1, b2) b3 = xdelta3.decode(b1, d) assert b2 == b3
curl https://ocw.mit.edu/ans7870/6/6.006/s08/lecturenotes/files/t8.shakespeare.txt > shakespeare.txt cp shakespeare.txt shakespeare_changed.txt vim shakespeare_changed.txt (and make some changes to shakespeare_changed.txt) python performance.py """ from pathlib import Path from statistics import mean, stdev from time import time import xdelta3 v1 = Path('shakespeare.txt').read_bytes() v2 = Path('shakespeare_changed.txt').read_bytes() times = [] for i in range(50): start = time() delta = xdelta3.encode(v1, v2, xdelta3.Flags.COMPLEVEL_1) v22 = xdelta3.decode(v1, delta) time_taken = (time() - start) * 1000 times.append(time_taken) print(f'{i + 1:3} result_match={v2 == v22} time={time_taken:0.1f}ms') print(f'\noriginal length: {len(v1)}') print(f'changed length: {len(v2)}') print(f'delta length: {len(delta)}') print( f'mean time taken to encode and decode: {mean(times):0.3f}ms, stdev {stdev(times):0.3f}ms' )
def test_no_delta(): with pytest.raises(xdelta3.NoDeltaFound) as exc_info: xdelta3.encode(b'hello', b'goodbye') assert exc_info.value.args[ 0] == 'No delta found shorter than the input value'
def test_long_random(): v1 = base64.b32encode(os.urandom(1000)) v2 = b'x' + v1 + b'x' delta = xdelta3.encode(v1, v2) v22 = xdelta3.decode(v1, delta) assert v2 == v22
def test_encode_decode(): delta = xdelta3.encode(value_one, value_two) assert delta == expected_delta value_two2 = xdelta3.decode(value_one, delta) assert value_two == value_two2