Exemple #1
0
def test_different_compression():
    all_ascii = (string.ascii_letters + string.digits).encode()
    v1 = all_ascii * 1000
    v2 = all_ascii * 900 + string.ascii_letters.encode() * 100 + all_ascii * 100
    delta_a = xdelta3.encode(v1, v2, xdelta3.Flags.COMPLEVEL_9)
    v2_a = xdelta3.decode(v1, delta_a)
    assert v2 == v2_a

    delta_b = xdelta3.encode(v1, v2, xdelta3.Flags.COMPLEVEL_1)
    v2_b = xdelta3.decode(v1, delta_b)
    assert v2 == v2_b
    assert len(delta_a) < len(delta_b)
Exemple #2
0
def write_content_diff(out, in1, in2):
    content_id_map = {
        iterdiff.PROLOGUE: CONTENT_PROLOGUE,
        iterdiff.EPILOGUE: CONTENT_EPILOGUE,
        CONTENT_PROLOGUE: None,  # fail-fast on struct.pack
        CONTENT_EPILOGUE: None,  # fail-fast on struct.pack
    }
    # Default compresslevel of 9 is okay as gzip costs nothing compared to XML parsing of source.
    # Note, xdelta from b'' may be still smaller than the original blob.
    with gzip.GzipFile(fileobj=out, mode='wb') as gz:
        for k, v1, v2 in iterdiff.iter_content_diff(in1, in2):
            k = content_id_map.get(k, k)
            try:
                delta = xdelta3.encode(v1, v2)
                # FIXME: workaround for bug https://github.com/samuelcolvin/xdelta3-python/issues/2
                try:
                    good = xdelta3.decode(v1,
                                          delta) == v2  # Epic Fail if happens.
                except xdelta3.XDeltaError:
                    good = False
                magic = MAGIC_XDELTA3 if good else MAGIC_UGLY
            except xdelta3.NoDeltaFound:
                magic, delta = MAGIC_RAW, None
            blob = delta if magic == MAGIC_XDELTA3 else v2
            gz.write(CDIFF_HEAD.pack(magic, k, len(v1), len(blob)))
            gz.write(v1)
            gz.write(blob)
Exemple #3
0
def test_readme():
    value_one = b'wonderful string to demonstrate xdelta3, much of these two strings is the same.'
    value_two = b'different string to demonstrate xdelta3, much of these two strings is the same.'
    delta = xdelta3.encode(value_one, value_two)

    value_two_rebuilt = xdelta3.decode(value_one, delta)
    assert value_two_rebuilt == value_two
Exemple #4
0
def xdelta3dec(source_bytes, patch_bytes):
    #if source bytes finished, everything remaining is a patch bytes
    if not source_bytes:
        source_bytes = b''
    #if this happens either the patchfile is corrupt or there is a bug.
    #Every write and read must have a patch file written and read.
    assert patch_bytes
    #these are memoryviews and python xdelta3 lib doesn't like that...
    return xdelta3.decode(bytes(source_bytes), patch_bytes)
Exemple #5
0
def test_large_decode():
    this_dir = Path(__file__).parent
    try:
        b1 = (this_dir / 'b1.bin').read_bytes()
        b2 = (this_dir / 'b2.bin').read_bytes()
    except FileNotFoundError as e:
        raise RuntimeError(
            'file required for test not found, run `make download-test-files`'
        ) from e

    d = xdelta3.encode(b1, b2)
    b3 = xdelta3.decode(b1, d)
    assert b2 == b3
Exemple #6
0
def read_content_diff(fd):
    with gzip.GzipFile(fileobj=fd, mode='rb') as gz:
        while True:
            head = gz.read(CDIFF_HEAD.size)
            if not head:
                break
            magic, content_id, l1, l2 = CDIFF_HEAD.unpack(head)
            v1 = gz.read(l1)
            v2 = gz.read(l2)
            if magic not in MAGIC_LIST or len(v1) != l1 or len(v2) != l2:
                raise RuntimeError('Bad format', magic, content_id, l1,
                                   len(v1), l2, len(v2))
            if magic == MAGIC_XDELTA3:
                v2 = xdelta3.decode(v1, v2)
            yield content_id, v1, v2
curl https://ocw.mit.edu/ans7870/6/6.006/s08/lecturenotes/files/t8.shakespeare.txt > shakespeare.txt
cp shakespeare.txt shakespeare_changed.txt
vim shakespeare_changed.txt (and make some changes to shakespeare_changed.txt)
python performance.py
"""

from pathlib import Path
from statistics import mean, stdev
from time import time
import xdelta3

v1 = Path('shakespeare.txt').read_bytes()
v2 = Path('shakespeare_changed.txt').read_bytes()

times = []
for i in range(50):
    start = time()
    delta = xdelta3.encode(v1, v2, xdelta3.Flags.COMPLEVEL_1)
    v22 = xdelta3.decode(v1, delta)
    time_taken = (time() - start) * 1000
    times.append(time_taken)
    print(f'{i + 1:3} result_match={v2 == v22} time={time_taken:0.1f}ms')

print(f'\noriginal length: {len(v1)}')
print(f'changed length:  {len(v2)}')
print(f'delta length:    {len(delta)}')
print(
    f'mean time taken to encode and decode: {mean(times):0.3f}ms, stdev {stdev(times):0.3f}ms'
)
Exemple #8
0
def test_decode_error():
    with pytest.raises(xdelta3.XDeltaError) as exc_info:
        xdelta3.decode(expected_delta, value_one)
    assert exc_info.value.args[
        0] == 'Error occur executing xdelta3: XD3_INVALID_INPUT'
Exemple #9
0
def test_long_random():
    v1 = base64.b32encode(os.urandom(1000))
    v2 = b'x' + v1 + b'x'
    delta = xdelta3.encode(v1, v2)
    v22 = xdelta3.decode(v1, delta)
    assert v2 == v22
Exemple #10
0
def test_encode_decode():
    delta = xdelta3.encode(value_one, value_two)
    assert delta == expected_delta
    value_two2 = xdelta3.decode(value_one, delta)
    assert value_two == value_two2
    def load_human_data(self, start_idx=None, end_idx=None, ids=[]):
        """
        Load human backups and event data for the specified humans.

        Ex : Calling with start_idx=1 and end_idx=4 will load data for
        the second, third and fourth humans.

        Args:
            start_idx (int, optional): Index (starting at 0) of the first human to load.
                If unspecified, loading will start at first human.
            end_idx (int, optional): Index (starting at 0) of the last human to load plus one.
                If unspecified, humans up until the last one will be loaded.

        Returns:
            [type]: [description]
        """
        if start_idx is None:
            start_idx = 0
        if end_idx is None:
            end_idx = self.get_nb_humans()
        assert start_idx < end_idx

        # If we pass in a specfic set of human ids, go get those ones, otherwise batch
        if ids:
            idxs = [int(i.split(":")[-1])-1 for i in ids]
        else:
            idxs = range(start_idx, end_idx)

        human_backups = {}
        humans_events = {}
        latest_human_buffers = [None] * self.get_nb_humans()
        print("loading humans from delta buffer...")
        # first, quickly load all the raw data, we'll rebuild the full objects afterwards
        for day_idx in tqdm.tqdm(range(self.get_nb_days())):
            for hour_idx in range(24):
                for idx in idxs:
                    # here, we assume the data is always encoded in a delta-since-last format
                    if latest_human_buffers[idx] is None or not self.is_delta[day_idx, hour_idx, idx]:
                        assert not self.is_delta[day_idx, hour_idx, idx]
                        latest_human_buffers[idx] = self.dataset[day_idx, hour_idx, idx]
                        human_buffer = latest_human_buffers[idx]
                    else:
                        human_delta = self.dataset[day_idx, hour_idx, idx]
                        human_buffer = xdelta3.decode(latest_human_buffers[idx], human_delta)
                        latest_human_buffers[idx] = human_buffer
                    human_data = pickle.loads(human_buffer)
                    timestamp = human_data.env.timestamp
                    human_data.conf = self.conf
                    if timestamp not in human_backups:
                        human_backups[timestamp] = {}
                    human_backups[timestamp][human_data.name] = human_data
        human_constr_args = [k for k in inspect.getfullargspec(Human.__init__)[0] if k != "self"]
        for timestamp, humans in human_backups.items():
            # time to recreate the (approx) full object w/ its member functions
            for human_name, human_dump in humans.items():
                new_env = Env(human_dump.env.initial_timestamp)
                new_env._now = human_dump.env.now
                human_dump.env = new_env
                human_dump.rng = human_dump.init_seed  # to keep same init construction state
                human_obj = Human(*[getattr(human_dump, k) for k in human_constr_args])
                # override all attributes except the blacklist/dummy ones
                for attr_name in human_obj.__dict__.keys():
                    if attr_name != "env" and attr_name not in human_dump.blacklisted_attribs and attr_name != "known_connections" and attr_name != "intervened_behavior":
                        setattr(human_obj, attr_name, getattr(human_dump, attr_name))
                human_obj.name = human_dump.name
                humans[human_name] = human_obj
            # now, extract human event data
            for human_name, human in humans.items():
                humans_events[human.name] = {}
                for event in human._events:
                    humans_events[human.name][(event["time"], event["event_type"])] = event
                human._events = []
        # finally, ensure events are sorted by timestamp for each human
        for human_id, human_events in humans_events.items():
            events = list(human_events.values())
            events.sort(key=lambda e: e["time"])
            humans_events[human_id] = events
        return human_backups, humans_events