Esempio n. 1
0
 def test(self):
     d = StableDict(foo=1, bar=2, boo=3, baz=4)
     self.assert_equal(list(d.items()), [('bar', 2), ('baz', 4), ('boo', 3),
                                         ('foo', 1)])
     self.assert_equal(
         hashlib.md5(msgpack.packb(d)).hexdigest(),
         'fc78df42cd60691b3ac3dd2a2b39903f')
Esempio n. 2
0
 def stat_attrs(self, st, path):
     item = {
         b'mode': st.st_mode,
         b'uid': st.st_uid, b'user': uid2user(st.st_uid),
         b'gid': st.st_gid, b'group': gid2group(st.st_gid),
         b'mtime': st_mtime_ns(st),
     }
     if self.numeric_owner:
         item[b'user'] = item[b'group'] = None
     xattrs = xattr.get_all(path, follow_symlinks=False)
     if xattrs:
         item[b'xattrs'] = StableDict(xattrs)
     if has_lchflags and st.st_flags:
         item[b'bsdflags'] = st.st_flags
     item[b'acl'] = acl_get(path, item, self.numeric_owner)
     return item
Esempio n. 3
0
 def save(self, name=None):
     name = name or self.name
     if name in self.manifest.archives:
         raise self.AlreadyExists(name)
     self.items_buffer.flush(flush=True)
     metadata = StableDict({
         'version': 1,
         'name': name,
         'items': self.items_buffer.chunks,
         'cmdline': sys.argv,
         'hostname': socket.gethostname(),
         'username': getuser(),
         'time': datetime.utcnow().isoformat(),
     })
     data = msgpack.packb(metadata, unicode_errors='surrogateescape')
     self.id = self.key.id_hash(data)
     self.cache.add_chunk(self.id, data, self.stats)
     self.manifest.archives[name] = {'id': self.id, 'time': metadata['time']}
     self.manifest.write()
     self.repository.commit()
     self.cache.commit()
Esempio n. 4
0
 def test(self):
     d = StableDict(foo=1, bar=2, boo=3, baz=4)
     self.assert_equal(list(d.items()), [('bar', 2), ('baz', 4), ('boo', 3), ('foo', 1)])
     self.assert_equal(hashlib.md5(msgpack.packb(d)).hexdigest(), 'fc78df42cd60691b3ac3dd2a2b39903f')
Esempio n. 5
0
 def add(self, item):
     self.buffer.write(self.packer.pack(StableDict(item)))
     if self.is_full():
         self.flush()
Esempio n. 6
0
    def rebuild_refcounts(self):
        """Rebuild object reference counts by walking the metadata

        Missing and/or incorrect data is repaired when detected
        """
        # Exclude the manifest from chunks
        del self.chunks[Manifest.MANIFEST_ID]

        def mark_as_possibly_superseded(id_):
            if self.chunks.get(id_, (0,))[0] == 0:
                self.possibly_superseded.add(id_)

        def add_callback(chunk):
            id_ = self.key.id_hash(chunk)
            cdata = self.key.encrypt(chunk)
            add_reference(id_, len(chunk), len(cdata), cdata)
            return id_

        def add_reference(id_, size, csize, cdata=None):
            try:
                count, _, _ = self.chunks[id_]
                self.chunks[id_] = count + 1, size, csize
            except KeyError:
                assert cdata is not None
                self.chunks[id_] = 1, size, csize
                if self.repair:
                    self.repository.put(id_, cdata)

        def verify_file_chunks(item):
            """Verifies that all file chunks are present

            Missing file chunks will be replaced with new chunks of the same
            length containing all zeros.
            """
            offset = 0
            chunk_list = []
            for chunk_id, size, csize in item[b'chunks']:
                if not chunk_id in self.chunks:
                    # If a file chunk is missing, create an all empty replacement chunk
                    self.report_progress('{}: Missing file chunk detected (Byte {}-{})'.format(item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size), error=True)
                    data = bytes(size)
                    chunk_id = self.key.id_hash(data)
                    cdata = self.key.encrypt(data)
                    csize = len(cdata)
                    add_reference(chunk_id, size, csize, cdata)
                else:
                    add_reference(chunk_id, size, csize)
                chunk_list.append((chunk_id, size, csize))
                offset += size
            item[b'chunks'] = chunk_list

        def robust_iterator(archive):
            """Iterates through all archive items

            Missing item chunks will be skipped and the msgpack stream will be restarted
            """
            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item)
            _state = 0
            def missing_chunk_detector(chunk_id):
                nonlocal _state
                if _state % 2 != int(not chunk_id in self.chunks):
                    _state += 1
                return _state
            for state, items in groupby(archive[b'items'], missing_chunk_detector):
                items = list(items)
                if state % 2:
                    self.report_progress('Archive metadata damage detected', error=True)
                    continue
                if state > 0:
                    unpacker.resync()
                for chunk_id, cdata in zip(items, repository.get_many(items)):
                    unpacker.feed(self.key.decrypt(chunk_id, cdata))
                    for item in unpacker:
                        yield item

        repository = cache_if_remote(self.repository)
        num_archives = len(self.manifest.archives)
        for i, (name, info) in enumerate(list(self.manifest.archives.items()), 1):
            self.report_progress('Analyzing archive {} ({}/{})'.format(name, i, num_archives))
            archive_id = info[b'id']
            if not archive_id in self.chunks:
                self.report_progress('Archive metadata block is missing', error=True)
                del self.manifest.archives[name]
                continue
            mark_as_possibly_superseded(archive_id)
            cdata = self.repository.get(archive_id)
            data = self.key.decrypt(archive_id, cdata)
            archive = StableDict(msgpack.unpackb(data))
            if archive[b'version'] != 1:
                raise Exception('Unknown archive metadata version')
            decode_dict(archive, (b'name', b'hostname', b'username', b'time'))  # fixme: argv
            items_buffer = ChunkBuffer(self.key)
            items_buffer.write_chunk = add_callback
            for item in robust_iterator(archive):
                if b'chunks' in item:
                    verify_file_chunks(item)
                items_buffer.add(item)
            items_buffer.flush(flush=True)
            for previous_item_id in archive[b'items']:
                mark_as_possibly_superseded(previous_item_id)
            archive[b'items'] = items_buffer.chunks
            data = msgpack.packb(archive, unicode_errors='surrogateescape')
            new_archive_id = self.key.id_hash(data)
            cdata = self.key.encrypt(data)
            add_reference(new_archive_id, len(data), len(cdata), cdata)
            info[b'id'] = new_archive_id