def test(self): d = StableDict(foo=1, bar=2, boo=3, baz=4) self.assert_equal(list(d.items()), [('bar', 2), ('baz', 4), ('boo', 3), ('foo', 1)]) self.assert_equal( hashlib.md5(msgpack.packb(d)).hexdigest(), 'fc78df42cd60691b3ac3dd2a2b39903f')
def stat_attrs(self, st, path): item = { b'mode': st.st_mode, b'uid': st.st_uid, b'user': uid2user(st.st_uid), b'gid': st.st_gid, b'group': gid2group(st.st_gid), b'mtime': st_mtime_ns(st), } if self.numeric_owner: item[b'user'] = item[b'group'] = None xattrs = xattr.get_all(path, follow_symlinks=False) if xattrs: item[b'xattrs'] = StableDict(xattrs) if has_lchflags and st.st_flags: item[b'bsdflags'] = st.st_flags item[b'acl'] = acl_get(path, item, self.numeric_owner) return item
def save(self, name=None): name = name or self.name if name in self.manifest.archives: raise self.AlreadyExists(name) self.items_buffer.flush(flush=True) metadata = StableDict({ 'version': 1, 'name': name, 'items': self.items_buffer.chunks, 'cmdline': sys.argv, 'hostname': socket.gethostname(), 'username': getuser(), 'time': datetime.utcnow().isoformat(), }) data = msgpack.packb(metadata, unicode_errors='surrogateescape') self.id = self.key.id_hash(data) self.cache.add_chunk(self.id, data, self.stats) self.manifest.archives[name] = {'id': self.id, 'time': metadata['time']} self.manifest.write() self.repository.commit() self.cache.commit()
def test(self): d = StableDict(foo=1, bar=2, boo=3, baz=4) self.assert_equal(list(d.items()), [('bar', 2), ('baz', 4), ('boo', 3), ('foo', 1)]) self.assert_equal(hashlib.md5(msgpack.packb(d)).hexdigest(), 'fc78df42cd60691b3ac3dd2a2b39903f')
def add(self, item): self.buffer.write(self.packer.pack(StableDict(item))) if self.is_full(): self.flush()
def rebuild_refcounts(self): """Rebuild object reference counts by walking the metadata Missing and/or incorrect data is repaired when detected """ # Exclude the manifest from chunks del self.chunks[Manifest.MANIFEST_ID] def mark_as_possibly_superseded(id_): if self.chunks.get(id_, (0,))[0] == 0: self.possibly_superseded.add(id_) def add_callback(chunk): id_ = self.key.id_hash(chunk) cdata = self.key.encrypt(chunk) add_reference(id_, len(chunk), len(cdata), cdata) return id_ def add_reference(id_, size, csize, cdata=None): try: count, _, _ = self.chunks[id_] self.chunks[id_] = count + 1, size, csize except KeyError: assert cdata is not None self.chunks[id_] = 1, size, csize if self.repair: self.repository.put(id_, cdata) def verify_file_chunks(item): """Verifies that all file chunks are present Missing file chunks will be replaced with new chunks of the same length containing all zeros. """ offset = 0 chunk_list = [] for chunk_id, size, csize in item[b'chunks']: if not chunk_id in self.chunks: # If a file chunk is missing, create an all empty replacement chunk self.report_progress('{}: Missing file chunk detected (Byte {}-{})'.format(item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size), error=True) data = bytes(size) chunk_id = self.key.id_hash(data) cdata = self.key.encrypt(data) csize = len(cdata) add_reference(chunk_id, size, csize, cdata) else: add_reference(chunk_id, size, csize) chunk_list.append((chunk_id, size, csize)) offset += size item[b'chunks'] = chunk_list def robust_iterator(archive): """Iterates through all archive items Missing item chunks will be skipped and the msgpack stream will be restarted """ unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item) _state = 0 def missing_chunk_detector(chunk_id): nonlocal _state if _state % 2 != int(not chunk_id in self.chunks): _state += 1 return _state for state, items in groupby(archive[b'items'], missing_chunk_detector): items = list(items) if state % 2: self.report_progress('Archive metadata damage detected', error=True) continue if state > 0: unpacker.resync() for chunk_id, cdata in zip(items, repository.get_many(items)): unpacker.feed(self.key.decrypt(chunk_id, cdata)) for item in unpacker: yield item repository = cache_if_remote(self.repository) num_archives = len(self.manifest.archives) for i, (name, info) in enumerate(list(self.manifest.archives.items()), 1): self.report_progress('Analyzing archive {} ({}/{})'.format(name, i, num_archives)) archive_id = info[b'id'] if not archive_id in self.chunks: self.report_progress('Archive metadata block is missing', error=True) del self.manifest.archives[name] continue mark_as_possibly_superseded(archive_id) cdata = self.repository.get(archive_id) data = self.key.decrypt(archive_id, cdata) archive = StableDict(msgpack.unpackb(data)) if archive[b'version'] != 1: raise Exception('Unknown archive metadata version') decode_dict(archive, (b'name', b'hostname', b'username', b'time')) # fixme: argv items_buffer = ChunkBuffer(self.key) items_buffer.write_chunk = add_callback for item in robust_iterator(archive): if b'chunks' in item: verify_file_chunks(item) items_buffer.add(item) items_buffer.flush(flush=True) for previous_item_id in archive[b'items']: mark_as_possibly_superseded(previous_item_id) archive[b'items'] = items_buffer.chunks data = msgpack.packb(archive, unicode_errors='surrogateescape') new_archive_id = self.key.id_hash(data) cdata = self.key.encrypt(data) add_reference(new_archive_id, len(data), len(cdata), cdata) info[b'id'] = new_archive_id