예제 #1
0
    def _extract_b2_metadata(self, response, obj_key):
        '''Extract metadata from HTTP response object'''

        headers = CaseInsensitiveDict()
        for k, v in response.headers.items():
            # we convert to lower case in order to do case-insensitive comparison
            if k.lower().startswith(info_header_prefix.lower() + 'meta-'):
                headers[k] = self._b2_url_decode(v)

        format_ = headers.get('%smeta-format' % info_header_prefix, 'raw')
        if format_ != 'raw2':  # Current metadata format
            raise CorruptedObjectError('invalid metadata format: %s' % format_)

        parts = []
        for i in count():
            part = headers.get('%smeta-%03d' % (info_header_prefix, i), None)
            if part is None:
                break
            parts.append(part)

        buffer = urllib.parse.unquote(''.join(parts))
        meta = literal_eval('{ %s }' % buffer)

        # Decode bytes values
        for (k, v) in meta.items():
            if not isinstance(v, bytes):
                continue
            try:
                meta[k] = base64.b64decode(v)
            except binascii.Error:
                # This should trigger a MD5 mismatch below
                meta[k] = None

        # Check MD5. There is a case to be made for treating a mismatch as a
        # `CorruptedObjectError` rather than a `BadDigestError`, because the MD5
        # sum is not calculated on-the-fly by the server but stored with the
        # object, and therefore does not actually verify what the server has
        # sent over the wire. However, it seems more likely for the data to get
        # accidentally corrupted in transit than to get accidentally corrupted
        # on the server (which hopefully checksums its storage devices).
        md5 = base64.b64encode(checksum_basic_mapping(meta)).decode('ascii')
        if md5 != headers.get('%smeta-md5' % info_header_prefix, None):
            log.warning('MD5 mismatch in metadata for %s', obj_key)

            # When trying to read file system revision 23 or earlier, we will
            # get a MD5 error because the checksum was calculated
            # differently. In order to get a better error message, we special
            # case the s3ql_passphrase and s3ql_metadata object (which are only
            # retrieved once at program start).
            if obj_key in ('s3ql_passphrase', 's3ql_metadata'):
                raise CorruptedObjectError('Meta MD5 for %s does not match' %
                                           obj_key)
            raise BadDigestError(400, 'bad_digest',
                                 'Meta MD5 for %s does not match' % obj_key)

        return meta