Ejemplo n.º 1
0
    def multipart_response_iter(self, resp, boundary, body_key, crypto_meta):
        """
        Decrypts a multipart mime doc response body.

        :param resp: application response
        :param boundary: multipart boundary string
        :param body_key: decryption key for the response body
        :param crypto_meta: crypto_meta for the response body
        :return: generator for decrypted response body
        """
        with closing_if_possible(resp):
            parts_iter = multipart_byteranges_to_document_iters(
                FileLikeIter(resp), boundary)
            for first_byte, last_byte, length, headers, body in parts_iter:
                yield b"--" + boundary + b"\r\n"

                for header, value in headers:
                    yield b"%s: %s\r\n" % (wsgi_to_bytes(header),
                                           wsgi_to_bytes(value))

                yield b"\r\n"

                decrypt_ctxt = self.crypto.create_decryption_ctxt(
                    body_key, crypto_meta['iv'], first_byte)
                for chunk in iter(lambda: body.read(DECRYPT_CHUNK_SIZE), b''):
                    yield decrypt_ctxt.update(chunk)

                yield b"\r\n"

            yield b"--" + boundary + b"--"
Ejemplo n.º 2
0
    def multipart_response_iter(self, resp, boundary, body_key, crypto_meta):
        """
        Decrypts a multipart mime doc response body.

        :param resp: application response
        :param boundary: multipart boundary string
        :param body_key: decryption key for the response body
        :param crypto_meta: crypto_meta for the response body
        :return: generator for decrypted response body
        """
        with closing_if_possible(resp):
            parts_iter = multipart_byteranges_to_document_iters(
                FileLikeIter(resp), boundary)
            for first_byte, last_byte, length, headers, body in parts_iter:
                yield b"--" + boundary + b"\r\n"

                for header, value in headers:
                    yield b"%s: %s\r\n" % (wsgi_to_bytes(header),
                                           wsgi_to_bytes(value))

                yield b"\r\n"

                decrypt_ctxt = self.crypto.create_decryption_ctxt(
                    body_key, crypto_meta['iv'], first_byte)
                for chunk in iter(lambda: body.read(DECRYPT_CHUNK_SIZE), b''):
                    yield decrypt_ctxt.update(chunk)

                yield b"\r\n"

            yield b"--" + boundary + b"--"
Ejemplo n.º 3
0
 def _verify_user_metadata(self, req_hdrs, name, value, key):
     # verify encrypted version of user metadata
     self.assertNotIn('X-Object-Meta-' + name, req_hdrs)
     expected_hdr = 'X-Object-Transient-Sysmeta-Crypto-Meta-' + name
     self.assertIn(expected_hdr, req_hdrs)
     enc_val, param = req_hdrs[expected_hdr].split(';')
     param = param.strip()
     self.assertTrue(param.startswith('swift_meta='))
     actual_meta = json.loads(
         urlparse.unquote_plus(param[len('swift_meta='):]))
     self.assertEqual(Crypto.cipher, actual_meta['cipher'])
     meta_iv = base64.b64decode(actual_meta['iv'])
     self.assertEqual(FAKE_IV, meta_iv)
     self.assertEqual(
         base64.b64encode(encrypt(wsgi_to_bytes(value), key, meta_iv)),
         wsgi_to_bytes(enc_val))
     # if there is any encrypted user metadata then this header should exist
     self.assertIn('X-Object-Transient-Sysmeta-Crypto-Meta', req_hdrs)
     common_meta = json.loads(
         urlparse.unquote_plus(
             req_hdrs['X-Object-Transient-Sysmeta-Crypto-Meta']))
     self.assertDictEqual(
         {
             'cipher': Crypto.cipher,
             'key_id': {
                 'v': 'fake',
                 'path': '/a/c/fake'
             }
         }, common_meta)
Ejemplo n.º 4
0
Archivo: wsgi.py Proyecto: mahak/swift
 def parse_request(self):
     if not six.PY2:
         # request lines *should* be ascii per the RFC, but historically
         # we've allowed (and even have func tests that use) arbitrary
         # bytes. This breaks on py3 (see https://bugs.python.org/issue33973
         # ) but the work-around is simple: munge the request line to be
         # properly quoted. py2 will do the right thing without this, but it
         # doesn't hurt to re-write the request line like this and it
         # simplifies testing.
         if self.raw_requestline.count(b' ') >= 2:
             parts = self.raw_requestline.split(b' ', 2)
             path, q, query = parts[1].partition(b'?')
             # unquote first, so we don't over-quote something
             # that was *correctly* quoted
             path = wsgi_to_bytes(wsgi_quote(wsgi_unquote(
                 bytes_to_wsgi(path))))
             query = b'&'.join(
                 sep.join([
                     wsgi_to_bytes(wsgi_quote_plus(wsgi_unquote_plus(
                         bytes_to_wsgi(key)))),
                     wsgi_to_bytes(wsgi_quote_plus(wsgi_unquote_plus(
                         bytes_to_wsgi(val))))
                 ])
                 for part in query.split(b'&')
                 for key, sep, val in (part.partition(b'='), ))
             parts[1] = path + q + query
             self.raw_requestline = b' '.join(parts)
         # else, mangled protocol, most likely; let base class deal with it
     return wsgi.HttpProtocol.parse_request(self)
Ejemplo n.º 5
0
def check_metadata(req, target_type):
    """
    Check metadata sent in the request headers.  This should only check
    that the metadata in the request given is valid.  Checks against
    account/container overall metadata should be forwarded on to its
    respective server to be checked.

    :param req: request object
    :param target_type: str: one of: object, container, or account: indicates
                        which type the target storage for the metadata is
    :returns: HTTPBadRequest with bad metadata otherwise None
    """
    target_type = target_type.lower()
    prefix = 'x-%s-meta-' % target_type
    meta_count = 0
    meta_size = 0
    for key, value in req.headers.items():
        if (isinstance(value, six.string_types)
           and len(value) > MAX_HEADER_SIZE):

            return HTTPBadRequest(body=b'Header value too long: %s' %
                                  wsgi_to_bytes(key[:MAX_META_NAME_LENGTH]),
                                  request=req, content_type='text/plain')
        if not key.lower().startswith(prefix):
            continue
        key = key[len(prefix):]
        if not key:
            return HTTPBadRequest(body='Metadata name cannot be empty',
                                  request=req, content_type='text/plain')
        bad_key = not check_utf8(wsgi_to_str(key))
        bad_value = value and not check_utf8(wsgi_to_str(value))
        if target_type in ('account', 'container') and (bad_key or bad_value):
            return HTTPBadRequest(body='Metadata must be valid UTF-8',
                                  request=req, content_type='text/plain')
        meta_count += 1
        meta_size += len(key) + len(value)
        if len(key) > MAX_META_NAME_LENGTH:
            return HTTPBadRequest(
                body=wsgi_to_bytes('Metadata name too long: %s%s' % (
                    prefix, key)),
                request=req, content_type='text/plain')
        if len(value) > MAX_META_VALUE_LENGTH:
            return HTTPBadRequest(
                body=wsgi_to_bytes('Metadata value longer than %d: %s%s' % (
                    MAX_META_VALUE_LENGTH, prefix, key)),
                request=req, content_type='text/plain')
        if meta_count > MAX_META_COUNT:
            return HTTPBadRequest(
                body='Too many metadata items; max %d' % MAX_META_COUNT,
                request=req, content_type='text/plain')
        if meta_size > MAX_META_OVERALL_SIZE:
            return HTTPBadRequest(
                body='Total metadata too large; max %d'
                % MAX_META_OVERALL_SIZE,
                request=req, content_type='text/plain')
    return None
Ejemplo n.º 6
0
def check_metadata(req, target_type):
    """
    Check metadata sent in the request headers.  This should only check
    that the metadata in the request given is valid.  Checks against
    account/container overall metadata should be forwarded on to its
    respective server to be checked.

    :param req: request object
    :param target_type: str: one of: object, container, or account: indicates
                        which type the target storage for the metadata is
    :returns: HTTPBadRequest with bad metadata otherwise None
    """
    target_type = target_type.lower()
    prefix = 'x-%s-meta-' % target_type
    meta_count = 0
    meta_size = 0
    for key, value in req.headers.items():
        if (isinstance(value, six.string_types)
           and len(value) > MAX_HEADER_SIZE):

            return HTTPBadRequest(body=b'Header value too long: %s' %
                                  wsgi_to_bytes(key[:MAX_META_NAME_LENGTH]),
                                  request=req, content_type='text/plain')
        if not key.lower().startswith(prefix):
            continue
        key = key[len(prefix):]
        if not key:
            return HTTPBadRequest(body='Metadata name cannot be empty',
                                  request=req, content_type='text/plain')
        bad_key = not check_utf8(wsgi_to_str(key))
        bad_value = value and not check_utf8(wsgi_to_str(value))
        if target_type in ('account', 'container') and (bad_key or bad_value):
            return HTTPBadRequest(body='Metadata must be valid UTF-8',
                                  request=req, content_type='text/plain')
        meta_count += 1
        meta_size += len(key) + len(value)
        if len(key) > MAX_META_NAME_LENGTH:
            return HTTPBadRequest(
                body=wsgi_to_bytes('Metadata name too long: %s%s' % (
                    prefix, key)),
                request=req, content_type='text/plain')
        if len(value) > MAX_META_VALUE_LENGTH:
            return HTTPBadRequest(
                body=wsgi_to_bytes('Metadata value longer than %d: %s%s' % (
                    MAX_META_VALUE_LENGTH, prefix, key)),
                request=req, content_type='text/plain')
        if meta_count > MAX_META_COUNT:
            return HTTPBadRequest(
                body='Too many metadata items; max %d' % MAX_META_COUNT,
                request=req, content_type='text/plain')
        if meta_size > MAX_META_OVERALL_SIZE:
            return HTTPBadRequest(
                body='Total metadata too large; max %d'
                % MAX_META_OVERALL_SIZE,
                request=req, content_type='text/plain')
    return None
Ejemplo n.º 7
0
        def do_test(method, plain_etags, expected_plain_etags=None):
            env = {CRYPTO_KEY_CALLBACK: fetch_crypto_keys}
            match_header_value = ', '.join(plain_etags)
            req = Request.blank(
                '/v1/a/c/o',
                environ=env,
                method=method,
                headers={match_header_name: match_header_value})
            app = FakeSwift()
            app.register(method, '/v1/a/c/o', HTTPOk, {})
            resp = req.get_response(encrypter.Encrypter(app, {}))
            self.assertEqual('200 OK', resp.status)

            self.assertEqual(1, len(app.calls), app.calls)
            self.assertEqual(method, app.calls[0][0])
            actual_headers = app.headers[0]

            # verify the alternate etag location has been specified
            if match_header_value and match_header_value != '*':
                self.assertIn('X-Backend-Etag-Is-At', actual_headers)
                self.assertEqual('X-Object-Sysmeta-Crypto-Etag-Mac',
                                 actual_headers['X-Backend-Etag-Is-At'])

            # verify etags have been supplemented with masked values
            self.assertIn(match_header_name, actual_headers)
            actual_etags = set(actual_headers[match_header_name].split(', '))
            # masked values for secret_id None
            key = fetch_crypto_keys()['object']
            masked_etags = [
                '"%s"' % bytes_to_wsgi(
                    base64.b64encode(
                        hmac.new(key, wsgi_to_bytes(etag.strip('"')),
                                 hashlib.sha256).digest()))
                for etag in plain_etags if etag not in ('*', '')
            ]
            # masked values for secret_id myid
            key = fetch_crypto_keys(key_id={'secret_id': 'myid'})['object']
            masked_etags_myid = [
                '"%s"' % bytes_to_wsgi(
                    base64.b64encode(
                        hmac.new(key, wsgi_to_bytes(etag.strip('"')),
                                 hashlib.sha256).digest()))
                for etag in plain_etags if etag not in ('*', '')
            ]
            expected_etags = set((expected_plain_etags or plain_etags) +
                                 masked_etags + masked_etags_myid)
            self.assertEqual(expected_etags, actual_etags)
            # check that the request environ was returned to original state
            self.assertEqual(set(plain_etags),
                             set(req.headers[match_header_name].split(', ')))
Ejemplo n.º 8
0
    def test_encrypt_header_val(self):
        # Prepare key and Crypto instance
        object_key = fetch_crypto_keys()['object']

        # - Normal string can be crypted
        encrypted = encrypter.encrypt_header_val(Crypto(), 'aaa', object_key)
        # sanity: return value is 2 item tuple
        self.assertEqual(2, len(encrypted))
        crypted_val, crypt_info = encrypted
        expected_crypt_val = base64.b64encode(
            encrypt(b'aaa', object_key, FAKE_IV))
        expected_crypt_info = {
            'cipher': 'AES_CTR_256',
            'iv': b'This is an IV123'
        }
        self.assertEqual(expected_crypt_val, wsgi_to_bytes(crypted_val))
        self.assertEqual(expected_crypt_info, crypt_info)

        # - Empty string raises a ValueError for safety
        with self.assertRaises(ValueError) as cm:
            encrypter.encrypt_header_val(Crypto(), '', object_key)

        self.assertEqual('empty value is not acceptable', cm.exception.args[0])

        # - None also raises a ValueError for safety
        with self.assertRaises(ValueError) as cm:
            encrypter.encrypt_header_val(Crypto(), None, object_key)

        self.assertEqual('empty value is not acceptable', cm.exception.args[0])
Ejemplo n.º 9
0
    def test_encrypt_header_val(self):
        # Prepare key and Crypto instance
        object_key = fetch_crypto_keys()['object']

        # - Normal string can be crypted
        encrypted = encrypter.encrypt_header_val(Crypto(), 'aaa', object_key)
        # sanity: return value is 2 item tuple
        self.assertEqual(2, len(encrypted))
        crypted_val, crypt_info = encrypted
        expected_crypt_val = base64.b64encode(
            encrypt(b'aaa', object_key, FAKE_IV))
        expected_crypt_info = {
            'cipher': 'AES_CTR_256', 'iv': b'This is an IV123'}
        self.assertEqual(expected_crypt_val, wsgi_to_bytes(crypted_val))
        self.assertEqual(expected_crypt_info, crypt_info)

        # - Empty string raises a ValueError for safety
        with self.assertRaises(ValueError) as cm:
            encrypter.encrypt_header_val(Crypto(), '', object_key)

        self.assertEqual('empty value is not acceptable',
                         cm.exception.args[0])

        # - None also raises a ValueError for safety
        with self.assertRaises(ValueError) as cm:
            encrypter.encrypt_header_val(Crypto(), None, object_key)

        self.assertEqual('empty value is not acceptable',
                         cm.exception.args[0])
Ejemplo n.º 10
0
 def _title(s):
     s = header_key_dict.HeaderKeyDict._title(s)
     if s.lower() == 'etag':
         # AWS Java SDK expects only 'ETag'.
         return 'ETag'
     if s.lower().startswith('x-amz-'):
         # AWS headers returned by S3 are lowercase.
         return swob.bytes_to_wsgi(swob.wsgi_to_bytes(s).lower())
     return s
Ejemplo n.º 11
0
        def do_test(method, plain_etags, expected_plain_etags=None):
            env = {CRYPTO_KEY_CALLBACK: fetch_crypto_keys}
            match_header_value = ', '.join(plain_etags)
            req = Request.blank(
                '/v1/a/c/o', environ=env, method=method,
                headers={match_header_name: match_header_value})
            app = FakeSwift()
            app.register(method, '/v1/a/c/o', HTTPOk, {})
            resp = req.get_response(encrypter.Encrypter(app, {}))
            self.assertEqual('200 OK', resp.status)

            self.assertEqual(1, len(app.calls), app.calls)
            self.assertEqual(method, app.calls[0][0])
            actual_headers = app.headers[0]

            # verify the alternate etag location has been specified
            if match_header_value and match_header_value != '*':
                self.assertIn('X-Backend-Etag-Is-At', actual_headers)
                self.assertEqual('X-Object-Sysmeta-Crypto-Etag-Mac',
                                 actual_headers['X-Backend-Etag-Is-At'])

            # verify etags have been supplemented with masked values
            self.assertIn(match_header_name, actual_headers)
            actual_etags = set(actual_headers[match_header_name].split(', '))
            # masked values for secret_id None
            key = fetch_crypto_keys()['object']
            masked_etags = [
                '"%s"' % bytes_to_wsgi(base64.b64encode(hmac.new(
                    key, wsgi_to_bytes(etag.strip('"')),
                    hashlib.sha256).digest()))
                for etag in plain_etags if etag not in ('*', '')]
            # masked values for secret_id myid
            key = fetch_crypto_keys(key_id={'secret_id': 'myid'})['object']
            masked_etags_myid = [
                '"%s"' % bytes_to_wsgi(base64.b64encode(hmac.new(
                    key, wsgi_to_bytes(etag.strip('"')),
                    hashlib.sha256).digest()))
                for etag in plain_etags if etag not in ('*', '')]
            expected_etags = set((expected_plain_etags or plain_etags) +
                                 masked_etags + masked_etags_myid)
            self.assertEqual(expected_etags, actual_etags)
            # check that the request environ was returned to original state
            self.assertEqual(set(plain_etags),
                             set(req.headers[match_header_name].split(', ')))
Ejemplo n.º 12
0
    def handle(self, req, start_response):
        app_resp = self._app_call(req.environ)

        try:
            put_crypto_meta = self._read_crypto_meta(
                'X-Object-Sysmeta-Crypto-Body-Meta', True)
            put_keys = self.get_decryption_keys(req, put_crypto_meta)
            post_crypto_meta = self._read_crypto_meta(
                'X-Object-Transient-Sysmeta-Crypto-Meta', False)
            post_keys = self.get_decryption_keys(req, post_crypto_meta)
        except EncryptionException as err:
            self.logger.error(
                "Error decrypting object: %s",
                err)
            raise HTTPInternalServerError(
                body='Error decrypting object',
                content_type='text/plain')

        if put_keys is None and post_keys is None:
            # skip decryption
            start_response(self._response_status, self._response_headers,
                           self._response_exc_info)
            return app_resp

        mod_resp_headers = self.decrypt_resp_headers(put_keys, post_keys)

        if put_crypto_meta and req.method == 'GET' and \
                is_success(self._get_status_int()):
            # 2xx response and encrypted body
            body_key = self.get_unwrapped_key(
                put_crypto_meta, put_keys['object'])
            content_type, content_type_attrs = parse_content_type(
                self._response_header_value('Content-Type'))

            if (self._get_status_int() == 206 and
                    content_type == 'multipart/byteranges'):
                boundary = wsgi_to_bytes(dict(content_type_attrs)["boundary"])
                resp_iter = self.multipart_response_iter(
                    app_resp, boundary, body_key, put_crypto_meta)
            else:
                offset = 0
                content_range = self._response_header_value('Content-Range')
                if content_range:
                    # Determine offset within the whole object if ranged GET
                    offset, end, total = parse_content_range(content_range)
                resp_iter = self.response_iter(
                    app_resp, body_key, put_crypto_meta, offset)
        else:
            # don't decrypt body of unencrypted or non-2xx responses
            resp_iter = app_resp

        mod_resp_headers = purge_crypto_sysmeta_headers(mod_resp_headers)
        start_response(self._response_status, mod_resp_headers,
                       self._response_exc_info)

        return resp_iter
Ejemplo n.º 13
0
    def handle(self, req, start_response):
        app_resp = self._app_call(req.environ)

        try:
            put_crypto_meta = self._read_crypto_meta(
                'X-Object-Sysmeta-Crypto-Body-Meta', True)
            put_keys = self.get_decryption_keys(req, put_crypto_meta)
            post_crypto_meta = self._read_crypto_meta(
                'X-Object-Transient-Sysmeta-Crypto-Meta', False)
            post_keys = self.get_decryption_keys(req, post_crypto_meta)
        except EncryptionException as err:
            self.logger.error(
                "Error decrypting object: %s",
                err)
            raise HTTPInternalServerError(
                body='Error decrypting object',
                content_type='text/plain')

        if put_keys is None and post_keys is None:
            # skip decryption
            start_response(self._response_status, self._response_headers,
                           self._response_exc_info)
            return app_resp

        mod_resp_headers = self.decrypt_resp_headers(put_keys, post_keys)

        if put_crypto_meta and req.method == 'GET' and \
                is_success(self._get_status_int()):
            # 2xx response and encrypted body
            body_key = self.get_unwrapped_key(
                put_crypto_meta, put_keys['object'])
            content_type, content_type_attrs = parse_content_type(
                self._response_header_value('Content-Type'))

            if (self._get_status_int() == 206 and
                    content_type == 'multipart/byteranges'):
                boundary = wsgi_to_bytes(dict(content_type_attrs)["boundary"])
                resp_iter = self.multipart_response_iter(
                    app_resp, boundary, body_key, put_crypto_meta)
            else:
                offset = 0
                content_range = self._response_header_value('Content-Range')
                if content_range:
                    # Determine offset within the whole object if ranged GET
                    offset, end, total = parse_content_range(content_range)
                resp_iter = self.response_iter(
                    app_resp, body_key, put_crypto_meta, offset)
        else:
            # don't decrypt body of unencrypted or non-2xx responses
            resp_iter = app_resp

        mod_resp_headers = purge_crypto_sysmeta_headers(mod_resp_headers)
        start_response(self._response_status, mod_resp_headers,
                       self._response_exc_info)

        return resp_iter
Ejemplo n.º 14
0
def _hmac_etag(key, etag):
    """
    Compute an HMAC-SHA256 using given key and etag.

    :param key: The starting key for the hash.
    :param etag: The etag to hash.
    :returns: a Base64-encoded representation of the HMAC
    """
    if not isinstance(etag, bytes):
        etag = wsgi_to_bytes(etag)
    result = hmac.new(key, etag, digestmod=hashlib.sha256).digest()
    return base64.b64encode(result).decode()
Ejemplo n.º 15
0
 def _verify_user_metadata(self, req_hdrs, name, value, key):
     # verify encrypted version of user metadata
     self.assertNotIn('X-Object-Meta-' + name, req_hdrs)
     expected_hdr = 'X-Object-Transient-Sysmeta-Crypto-Meta-' + name
     self.assertIn(expected_hdr, req_hdrs)
     enc_val, param = req_hdrs[expected_hdr].split(';')
     param = param.strip()
     self.assertTrue(param.startswith('swift_meta='))
     actual_meta = json.loads(
         urlparse.unquote_plus(param[len('swift_meta='):]))
     self.assertEqual(Crypto.cipher, actual_meta['cipher'])
     meta_iv = base64.b64decode(actual_meta['iv'])
     self.assertEqual(FAKE_IV, meta_iv)
     self.assertEqual(
         base64.b64encode(encrypt(wsgi_to_bytes(value), key, meta_iv)),
         wsgi_to_bytes(enc_val))
     # if there is any encrypted user metadata then this header should exist
     self.assertIn('X-Object-Transient-Sysmeta-Crypto-Meta', req_hdrs)
     common_meta = json.loads(urlparse.unquote_plus(
         req_hdrs['X-Object-Transient-Sysmeta-Crypto-Meta']))
     self.assertDictEqual({'cipher': Crypto.cipher,
                           'key_id': {'v': 'fake', 'path': '/a/c/fake'}},
                          common_meta)
Ejemplo n.º 16
0
def translate_swift_to_s3(key, val):
    _key = swob.bytes_to_wsgi(swob.wsgi_to_bytes(key).lower())

    def translate_meta_key(_key):
        if not _key.startswith('x-object-meta-'):
            return _key
        # Note that AWS allows user-defined metadata with underscores in the
        # header, while WSGI (and other protocols derived from CGI) does not
        # differentiate between an underscore and a dash. Fortunately,
        # eventlet exposes the raw headers from the client, so we could
        # translate '_' to '=5F' on the way in. Now, we translate back.
        return 'x-amz-meta-' + _key[14:].replace('=5f', '_')

    if _key.startswith('x-object-meta-'):
        return translate_meta_key(_key), val
    elif _key in ('content-length', 'content-type', 'content-range',
                  'content-encoding', 'content-disposition',
                  'content-language', 'etag', 'last-modified', 'x-robots-tag',
                  'cache-control', 'expires'):
        return key, val
    elif _key == 'x-object-version-id':
        return 'x-amz-version-id', val
    elif _key == 'x-copied-from-version-id':
        return 'x-amz-copy-source-version-id', val
    elif _key == 'x-backend-content-type' and \
            val == DELETE_MARKER_CONTENT_TYPE:
        return 'x-amz-delete-marker', 'true'
    elif _key == 'access-control-expose-headers':
        exposed_headers = val.split(', ')
        exposed_headers.extend([
            'x-amz-request-id',
            'x-amz-id-2',
        ])
        return 'access-control-expose-headers', ', '.join(
            translate_meta_key(h) for h in exposed_headers)
    elif _key == 'access-control-allow-methods':
        methods = val.split(', ')
        try:
            methods.remove('COPY')  # that's not a thing in S3
        except ValueError:
            pass  # not there? don't worry about it
        return key, ', '.join(methods)
    elif _key.startswith('access-control-'):
        return key, val
    # else, drop the header
    return None
Ejemplo n.º 17
0
    def create_key(self, path, secret_id=None):
        """
        Creates an encryption key that is unique for the given path.

        :param path: the (WSGI string) path of the resource being encrypted.
        :param secret_id: the id of the root secret from which the key should
            be derived.
        :return: an encryption key.
        :raises UnknownSecretIdError: if the secret_id is not recognised.
        """
        try:
            key = self._root_secrets[secret_id]
        except KeyError:
            self.logger.warning('Unrecognised secret id: %s' % secret_id)
            raise UnknownSecretIdError(secret_id)
        else:
            return hmac.new(key, wsgi_to_bytes(path),
                            digestmod=hashlib.sha256).digest()
Ejemplo n.º 18
0
    def create_key(self, path, secret_id=None):
        """
        Creates an encryption key that is unique for the given path.

        :param path: the (WSGI string) path of the resource being encrypted.
        :param secret_id: the id of the root secret from which the key should
            be derived.
        :return: an encryption key.
        :raises UnknownSecretIdError: if the secret_id is not recognised.
        """
        try:
            key = self._root_secrets[secret_id]
        except KeyError:
            self.logger.warning('Unrecognised secret id: %s' % secret_id)
            raise UnknownSecretIdError(secret_id)
        else:
            return hmac.new(key, wsgi_to_bytes(path),
                            digestmod=hashlib.sha256).digest()
Ejemplo n.º 19
0
def encrypt_header_val(crypto, value, key):
    """
    Encrypt a header value using the supplied key.

    :param crypto: a Crypto instance
    :param value: value to encrypt
    :param key: crypto key to use
    :returns: a tuple of (encrypted value, crypto_meta) where crypto_meta is a
        dict of form returned by
        :py:func:`~swift.common.middleware.crypto.Crypto.get_crypto_meta`
    :raises ValueError: if value is empty
    """
    if not value:
        raise ValueError('empty value is not acceptable')

    crypto_meta = crypto.create_crypto_meta()
    crypto_ctxt = crypto.create_encryption_ctxt(key, crypto_meta['iv'])
    enc_val = bytes_to_wsgi(
        base64.b64encode(crypto_ctxt.update(wsgi_to_bytes(value))))
    return enc_val, crypto_meta
Ejemplo n.º 20
0
def http_response_to_document_iters(response, read_chunk_size=4096):
    """
    Takes a successful object-GET HTTP response and turns it into an
    iterator of (first-byte, last-byte, length, headers, body-file)
    5-tuples.

    The response must either be a 200 or a 206; if you feed in a 204 or
    something similar, this probably won't work.

    :param response: HTTP response, like from bufferedhttp.http_connect(),
        not a swob.Response.
    """
    chunked = is_chunked(dict(response.getheaders()))

    if response.status == 200:
        if chunked:
            # Single "range" that's the whole object with an unknown length
            return iter([(0, None, None, response.getheaders(),
                          response)])

        # Single "range" that's the whole object
        content_length = int(response.getheader('Content-Length'))
        return iter([(0, content_length - 1, content_length,
                      response.getheaders(), response)])

    content_type, params_list = parse_content_type(
        response.getheader('Content-Type'))
    if content_type != 'multipart/byteranges':
        # Single range; no MIME framing, just the bytes. The start and end
        # byte indices are in the Content-Range header.
        start, end, length = parse_content_range(
            response.getheader('Content-Range'))
        return iter([(start, end, length, response.getheaders(), response)])
    else:
        # Multiple ranges; the response body is a multipart/byteranges MIME
        # document, and we have to parse it using the MIME boundary
        # extracted from the Content-Type header.
        params = dict(params_list)
        return multipart_byteranges_to_document_iters(
            response, wsgi_to_bytes(params['boundary']), read_chunk_size)
Ejemplo n.º 21
0
def http_response_to_document_iters(response, read_chunk_size=4096):
    """
    Takes a successful object-GET HTTP response and turns it into an
    iterator of (first-byte, last-byte, length, headers, body-file)
    5-tuples.

    The response must either be a 200 or a 206; if you feed in a 204 or
    something similar, this probably won't work.

    :param response: HTTP response, like from bufferedhttp.http_connect(),
        not a swob.Response.
    """
    chunked = is_chunked(dict(response.getheaders()))

    if response.status == 200:
        if chunked:
            # Single "range" that's the whole object with an unknown length
            return iter([(0, None, None, response.getheaders(),
                          response)])

        # Single "range" that's the whole object
        content_length = int(response.getheader('Content-Length'))
        return iter([(0, content_length - 1, content_length,
                      response.getheaders(), response)])

    content_type, params_list = parse_content_type(
        response.getheader('Content-Type'))
    if content_type != 'multipart/byteranges':
        # Single range; no MIME framing, just the bytes. The start and end
        # byte indices are in the Content-Range header.
        start, end, length = parse_content_range(
            response.getheader('Content-Range'))
        return iter([(start, end, length, response.getheaders(), response)])
    else:
        # Multiple ranges; the response body is a multipart/byteranges MIME
        # document, and we have to parse it using the MIME boundary
        # extracted from the Content-Type header.
        params = dict(params_list)
        return multipart_byteranges_to_document_iters(
            response, wsgi_to_bytes(params['boundary']), read_chunk_size)
Ejemplo n.º 22
0
    def __call__(self, env, start_response):
        req = Request(env)
        try:
            # account and container only
            version, acct, cont = req.split_path(2, 3)
        except ValueError:
            is_account_or_container_req = False
        else:
            is_account_or_container_req = True
        if not is_account_or_container_req:
            return self.app(env, start_response)

        if not valid_api_version(version) or req.method not in ('GET', 'HEAD'):
            return self.app(env, start_response)

        # OK, definitely have an account/container request.
        # Get the desired content-type, then force it to a JSON request.
        try:
            out_content_type = get_listing_content_type(req)
        except HTTPException as err:
            return err(env, start_response)

        params = req.params
        can_vary = 'format' not in params
        params['format'] = 'json'
        req.params = params

        # Give other middlewares a chance to be in charge
        env.setdefault('swift.format_listing', True)
        status, headers, resp_iter = req.call_application(self.app)
        if not env.get('swift.format_listing'):
            start_response(status, headers)
            return resp_iter

        header_to_index = {}
        resp_content_type = resp_length = None
        for i, (header, value) in enumerate(headers):
            header = header.lower()
            if header == 'content-type':
                header_to_index[header] = i
                resp_content_type = value.partition(';')[0]
            elif header == 'content-length':
                header_to_index[header] = i
                resp_length = int(value)
            elif header == 'vary':
                header_to_index[header] = i

        if not status.startswith(('200 ', '204 ')):
            start_response(status, headers)
            return resp_iter

        if can_vary:
            if 'vary' in header_to_index:
                value = headers[header_to_index['vary']][1]
                if 'accept' not in list_from_csv(value.lower()):
                    headers[header_to_index['vary']] = ('Vary',
                                                        value + ', Accept')
            else:
                headers.append(('Vary', 'Accept'))

        if resp_content_type != 'application/json':
            start_response(status, headers)
            return resp_iter

        if resp_length is None or \
                resp_length > MAX_CONTAINER_LISTING_CONTENT_LENGTH:
            start_response(status, headers)
            return resp_iter

        def set_header(header, value):
            if value is None:
                del headers[header_to_index[header]]
            else:
                headers[header_to_index[header]] = (
                    headers[header_to_index[header]][0], str(value))

        if req.method == 'HEAD':
            set_header('content-type', out_content_type + '; charset=utf-8')
            set_header('content-length', None)  # don't know, can't determine
            start_response(status, headers)
            return resp_iter

        body = b''.join(resp_iter)
        try:
            listing = json.loads(body)
            # Do a couple sanity checks
            if not isinstance(listing, list):
                raise ValueError
            if not all(isinstance(item, dict) for item in listing):
                raise ValueError
        except ValueError:
            # Static web listing that's returning invalid JSON?
            # Just pass it straight through; that's about all we *can* do.
            start_response(status, headers)
            return [body]

        if not req.allow_reserved_names:
            listing = self.filter_reserved(listing, acct, cont)

        try:
            if out_content_type.endswith('/xml'):
                if cont:
                    body = container_to_xml(
                        listing,
                        wsgi_to_bytes(cont).decode('utf-8'))
                else:
                    body = account_to_xml(listing,
                                          wsgi_to_bytes(acct).decode('utf-8'))
            elif out_content_type == 'text/plain':
                body = listing_to_text(listing)
            else:
                body = json.dumps(listing).encode('ascii')
        except KeyError:
            # listing was in a bad format -- funky static web listing??
            start_response(status, headers)
            return [body]

        if not body:
            status = '%s %s' % (HTTP_NO_CONTENT,
                                RESPONSE_REASONS[HTTP_NO_CONTENT][0])

        set_header('content-type', out_content_type + '; charset=utf-8')
        set_header('content-length', len(body))
        start_response(status, headers)
        return [body]
Ejemplo n.º 23
0
    def _get_from_shards(self, req, resp):
        # construct listing using shards described by the response body
        shard_ranges = [
            ShardRange.from_dict(data) for data in json.loads(resp.body)
        ]
        self.app.logger.debug('GET listing from %s shards for: %s',
                              len(shard_ranges), req.path_qs)
        if not shard_ranges:
            # can't find ranges or there was a problem getting the ranges. So
            # return what we have.
            return resp

        objects = []
        req_limit = int(req.params.get('limit', CONTAINER_LISTING_LIMIT))
        params = req.params.copy()
        params.pop('states', None)
        req.headers.pop('X-Backend-Record-Type', None)
        reverse = config_true_value(params.get('reverse'))
        marker = params.get('marker')
        end_marker = params.get('end_marker')

        limit = req_limit
        for shard_range in shard_ranges:
            params['limit'] = limit
            # Always set marker to ensure that object names less than or equal
            # to those already in the listing are not fetched; if the listing
            # is empty then the original request marker, if any, is used. This
            # allows misplaced objects below the expected shard range to be
            # included in the listing.
            if objects:
                last_name = objects[-1].get('name',
                                            objects[-1].get('subdir', u''))
                params['marker'] = last_name.encode('utf-8')
            elif marker:
                params['marker'] = marker
            else:
                params['marker'] = ''
            # Always set end_marker to ensure that misplaced objects beyond the
            # expected shard range are not fetched. This prevents a misplaced
            # object obscuring correctly placed objects in the next shard
            # range.
            if end_marker and end_marker in shard_range:
                params['end_marker'] = end_marker
            elif reverse:
                params['end_marker'] = str_to_wsgi(shard_range.lower_str)
            else:
                params['end_marker'] = str_to_wsgi(shard_range.end_marker)

            if (shard_range.account == self.account_name
                    and shard_range.container == self.container_name):
                # directed back to same container - force GET of objects
                headers = {'X-Backend-Record-Type': 'object'}
            else:
                headers = None
            self.app.logger.debug('Getting from %s %s with %s', shard_range,
                                  shard_range.name, headers)
            objs, shard_resp = self._get_container_listing(
                req,
                shard_range.account,
                shard_range.container,
                headers=headers,
                params=params)

            if not objs:
                # tolerate errors or empty shard containers
                continue

            objects.extend(objs)
            limit -= len(objs)

            if limit <= 0:
                break
            if (end_marker and reverse
                    and (wsgi_to_bytes(end_marker) >=
                         objects[-1]['name'].encode('utf-8'))):
                break
            if (end_marker and not reverse
                    and (wsgi_to_bytes(end_marker) <=
                         objects[-1]['name'].encode('utf-8'))):
                break

        resp.body = json.dumps(objects).encode('ascii')
        constrained = any(
            req.params.get(constraint)
            for constraint in ('marker', 'end_marker', 'path', 'prefix',
                               'delimiter'))
        if not constrained and len(objects) < req_limit:
            self.app.logger.debug('Setting object count to %s' % len(objects))
            # prefer the actual listing stats over the potentially outdated
            # root stats. This condition is only likely when a sharded
            # container is shrinking or in tests; typically a sharded container
            # will have more than CONTAINER_LISTING_LIMIT objects so any
            # unconstrained listing will be capped by the limit and total
            # object stats cannot therefore be inferred from the listing.
            resp.headers['X-Container-Object-Count'] = len(objects)
            resp.headers['X-Container-Bytes-Used'] = sum(
                [o['bytes'] for o in objects])
        return resp
Ejemplo n.º 24
0
    def __init__(self, *args, **kwargs):
        swob.Response.__init__(self, *args, **kwargs)

        s3_sysmeta_headers = swob.HeaderKeyDict()
        sw_headers = swob.HeaderKeyDict()
        headers = HeaderKeyDict()
        self.is_slo = False

        def is_swift3_sysmeta(sysmeta_key, server_type):
            swift3_sysmeta_prefix = (
                'x-%s-sysmeta-swift3' % server_type).lower()
            return sysmeta_key.lower().startswith(swift3_sysmeta_prefix)

        def is_s3api_sysmeta(sysmeta_key, server_type):
            s3api_sysmeta_prefix = sysmeta_prefix(_server_type).lower()
            return sysmeta_key.lower().startswith(s3api_sysmeta_prefix)

        for key, val in self.headers.items():
            if is_sys_meta('object', key) or is_sys_meta('container', key):
                _server_type = key.split('-')[1]
                if is_swift3_sysmeta(key, _server_type):
                    # To be compatible with older swift3, translate swift3
                    # sysmeta to s3api sysmeta here
                    key = sysmeta_prefix(_server_type) + \
                        key[len('x-%s-sysmeta-swift3-' % _server_type):]

                    if key not in s3_sysmeta_headers:
                        # To avoid overwrite s3api sysmeta by older swift3
                        # sysmeta set the key only when the key does not exist
                        s3_sysmeta_headers[key] = val
                elif is_s3api_sysmeta(key, _server_type):
                    s3_sysmeta_headers[key] = val
                else:
                    sw_headers[key] = val
            else:
                sw_headers[key] = val

        # Handle swift headers
        for key, val in sw_headers.items():
            _key = swob.bytes_to_wsgi(swob.wsgi_to_bytes(key).lower())

            if _key.startswith('x-object-meta-'):
                # Note that AWS ignores user-defined headers with '=' in the
                # header name. We translated underscores to '=5F' on the way
                # in, though.
                headers['x-amz-meta-' + _key[14:].replace('=5f', '_')] = val
            elif _key in ('content-length', 'content-type',
                          'content-range', 'content-encoding',
                          'content-disposition', 'content-language',
                          'etag', 'last-modified', 'x-robots-tag',
                          'cache-control', 'expires'):
                headers[key] = val
            elif _key == 'x-object-version-id':
                headers['x-amz-version-id'] = val
            elif _key == 'x-copied-from-version-id':
                headers['x-amz-copy-source-version-id'] = val
            elif _key == 'x-static-large-object':
                # for delete slo
                self.is_slo = config_true_value(val)
            elif _key == 'x-backend-content-type' and \
                    val == DELETE_MARKER_CONTENT_TYPE:
                headers['x-amz-delete-marker'] = 'true'

        # Check whether we stored the AWS-style etag on upload
        override_etag = s3_sysmeta_headers.get(
            sysmeta_header('object', 'etag'))
        if override_etag not in (None, ''):
            # Multipart uploads in AWS have ETags like
            #   <MD5(part_etag1 || ... || part_etagN)>-<number of parts>
            headers['etag'] = override_etag
        elif self.is_slo and 'etag' in headers:
            # Many AWS clients use the presence of a '-' to decide whether
            # to attempt client-side download validation, so even if we
            # didn't store the AWS-style header, tack on a '-N'. (Use 'N'
            # because we don't actually know how many parts there are.)
            headers['etag'] += '-N'

        self.headers = headers

        if self.etag:
            # add double quotes to the etag header
            self.etag = self.etag

        # Used for pure swift header handling at the request layer
        self.sw_headers = sw_headers
        self.sysmeta_headers = s3_sysmeta_headers
Ejemplo n.º 25
0
    def _get_from_shards(self, req, resp):
        # construct listing using shards described by the response body
        shard_ranges = [ShardRange.from_dict(data)
                        for data in json.loads(resp.body)]
        self.app.logger.debug('GET listing from %s shards for: %s',
                              len(shard_ranges), req.path_qs)
        if not shard_ranges:
            # can't find ranges or there was a problem getting the ranges. So
            # return what we have.
            return resp

        objects = []
        req_limit = int(req.params.get('limit', CONTAINER_LISTING_LIMIT))
        params = req.params.copy()
        params.pop('states', None)
        req.headers.pop('X-Backend-Record-Type', None)
        reverse = config_true_value(params.get('reverse'))
        marker = params.get('marker')
        end_marker = params.get('end_marker')

        limit = req_limit
        for shard_range in shard_ranges:
            params['limit'] = limit
            # Always set marker to ensure that object names less than or equal
            # to those already in the listing are not fetched; if the listing
            # is empty then the original request marker, if any, is used. This
            # allows misplaced objects below the expected shard range to be
            # included in the listing.
            if objects:
                last_name = objects[-1].get('name',
                                            objects[-1].get('subdir', u''))
                params['marker'] = last_name.encode('utf-8')
            elif marker:
                params['marker'] = marker
            else:
                params['marker'] = ''
            # Always set end_marker to ensure that misplaced objects beyond the
            # expected shard range are not fetched. This prevents a misplaced
            # object obscuring correctly placed objects in the next shard
            # range.
            if end_marker and end_marker in shard_range:
                params['end_marker'] = end_marker
            elif reverse:
                params['end_marker'] = str_to_wsgi(shard_range.lower_str)
            else:
                params['end_marker'] = str_to_wsgi(shard_range.end_marker)

            if (shard_range.account == self.account_name and
                    shard_range.container == self.container_name):
                # directed back to same container - force GET of objects
                headers = {'X-Backend-Record-Type': 'object'}
            else:
                headers = None
            self.app.logger.debug('Getting from %s %s with %s',
                                  shard_range, shard_range.name, headers)
            objs, shard_resp = self._get_container_listing(
                req, shard_range.account, shard_range.container,
                headers=headers, params=params)

            if not objs:
                # tolerate errors or empty shard containers
                continue

            objects.extend(objs)
            limit -= len(objs)

            if limit <= 0:
                break
            if (end_marker and reverse and
                (wsgi_to_bytes(end_marker) >=
                 objects[-1]['name'].encode('utf-8'))):
                break
            if (end_marker and not reverse and
                (wsgi_to_bytes(end_marker) <=
                 objects[-1]['name'].encode('utf-8'))):
                break

        resp.body = json.dumps(objects).encode('ascii')
        constrained = any(req.params.get(constraint) for constraint in (
            'marker', 'end_marker', 'path', 'prefix', 'delimiter'))
        if not constrained and len(objects) < req_limit:
            self.app.logger.debug('Setting object count to %s' % len(objects))
            # prefer the actual listing stats over the potentially outdated
            # root stats. This condition is only likely when a sharded
            # container is shrinking or in tests; typically a sharded container
            # will have more than CONTAINER_LISTING_LIMIT objects so any
            # unconstrained listing will be capped by the limit and total
            # object stats cannot therefore be inferred from the listing.
            resp.headers['X-Container-Object-Count'] = len(objects)
            resp.headers['X-Container-Bytes-Used'] = sum(
                [o['bytes'] for o in objects])
        return resp