def multipart_response_iter(self, resp, boundary, body_key, crypto_meta): """ Decrypts a multipart mime doc response body. :param resp: application response :param boundary: multipart boundary string :param body_key: decryption key for the response body :param crypto_meta: crypto_meta for the response body :return: generator for decrypted response body """ with closing_if_possible(resp): parts_iter = multipart_byteranges_to_document_iters( FileLikeIter(resp), boundary) for first_byte, last_byte, length, headers, body in parts_iter: yield b"--" + boundary + b"\r\n" for header, value in headers: yield b"%s: %s\r\n" % (wsgi_to_bytes(header), wsgi_to_bytes(value)) yield b"\r\n" decrypt_ctxt = self.crypto.create_decryption_ctxt( body_key, crypto_meta['iv'], first_byte) for chunk in iter(lambda: body.read(DECRYPT_CHUNK_SIZE), b''): yield decrypt_ctxt.update(chunk) yield b"\r\n" yield b"--" + boundary + b"--"
def _verify_user_metadata(self, req_hdrs, name, value, key): # verify encrypted version of user metadata self.assertNotIn('X-Object-Meta-' + name, req_hdrs) expected_hdr = 'X-Object-Transient-Sysmeta-Crypto-Meta-' + name self.assertIn(expected_hdr, req_hdrs) enc_val, param = req_hdrs[expected_hdr].split(';') param = param.strip() self.assertTrue(param.startswith('swift_meta=')) actual_meta = json.loads( urlparse.unquote_plus(param[len('swift_meta='):])) self.assertEqual(Crypto.cipher, actual_meta['cipher']) meta_iv = base64.b64decode(actual_meta['iv']) self.assertEqual(FAKE_IV, meta_iv) self.assertEqual( base64.b64encode(encrypt(wsgi_to_bytes(value), key, meta_iv)), wsgi_to_bytes(enc_val)) # if there is any encrypted user metadata then this header should exist self.assertIn('X-Object-Transient-Sysmeta-Crypto-Meta', req_hdrs) common_meta = json.loads( urlparse.unquote_plus( req_hdrs['X-Object-Transient-Sysmeta-Crypto-Meta'])) self.assertDictEqual( { 'cipher': Crypto.cipher, 'key_id': { 'v': 'fake', 'path': '/a/c/fake' } }, common_meta)
def parse_request(self): if not six.PY2: # request lines *should* be ascii per the RFC, but historically # we've allowed (and even have func tests that use) arbitrary # bytes. This breaks on py3 (see https://bugs.python.org/issue33973 # ) but the work-around is simple: munge the request line to be # properly quoted. py2 will do the right thing without this, but it # doesn't hurt to re-write the request line like this and it # simplifies testing. if self.raw_requestline.count(b' ') >= 2: parts = self.raw_requestline.split(b' ', 2) path, q, query = parts[1].partition(b'?') # unquote first, so we don't over-quote something # that was *correctly* quoted path = wsgi_to_bytes(wsgi_quote(wsgi_unquote( bytes_to_wsgi(path)))) query = b'&'.join( sep.join([ wsgi_to_bytes(wsgi_quote_plus(wsgi_unquote_plus( bytes_to_wsgi(key)))), wsgi_to_bytes(wsgi_quote_plus(wsgi_unquote_plus( bytes_to_wsgi(val)))) ]) for part in query.split(b'&') for key, sep, val in (part.partition(b'='), )) parts[1] = path + q + query self.raw_requestline = b' '.join(parts) # else, mangled protocol, most likely; let base class deal with it return wsgi.HttpProtocol.parse_request(self)
def check_metadata(req, target_type): """ Check metadata sent in the request headers. This should only check that the metadata in the request given is valid. Checks against account/container overall metadata should be forwarded on to its respective server to be checked. :param req: request object :param target_type: str: one of: object, container, or account: indicates which type the target storage for the metadata is :returns: HTTPBadRequest with bad metadata otherwise None """ target_type = target_type.lower() prefix = 'x-%s-meta-' % target_type meta_count = 0 meta_size = 0 for key, value in req.headers.items(): if (isinstance(value, six.string_types) and len(value) > MAX_HEADER_SIZE): return HTTPBadRequest(body=b'Header value too long: %s' % wsgi_to_bytes(key[:MAX_META_NAME_LENGTH]), request=req, content_type='text/plain') if not key.lower().startswith(prefix): continue key = key[len(prefix):] if not key: return HTTPBadRequest(body='Metadata name cannot be empty', request=req, content_type='text/plain') bad_key = not check_utf8(wsgi_to_str(key)) bad_value = value and not check_utf8(wsgi_to_str(value)) if target_type in ('account', 'container') and (bad_key or bad_value): return HTTPBadRequest(body='Metadata must be valid UTF-8', request=req, content_type='text/plain') meta_count += 1 meta_size += len(key) + len(value) if len(key) > MAX_META_NAME_LENGTH: return HTTPBadRequest( body=wsgi_to_bytes('Metadata name too long: %s%s' % ( prefix, key)), request=req, content_type='text/plain') if len(value) > MAX_META_VALUE_LENGTH: return HTTPBadRequest( body=wsgi_to_bytes('Metadata value longer than %d: %s%s' % ( MAX_META_VALUE_LENGTH, prefix, key)), request=req, content_type='text/plain') if meta_count > MAX_META_COUNT: return HTTPBadRequest( body='Too many metadata items; max %d' % MAX_META_COUNT, request=req, content_type='text/plain') if meta_size > MAX_META_OVERALL_SIZE: return HTTPBadRequest( body='Total metadata too large; max %d' % MAX_META_OVERALL_SIZE, request=req, content_type='text/plain') return None
def do_test(method, plain_etags, expected_plain_etags=None): env = {CRYPTO_KEY_CALLBACK: fetch_crypto_keys} match_header_value = ', '.join(plain_etags) req = Request.blank( '/v1/a/c/o', environ=env, method=method, headers={match_header_name: match_header_value}) app = FakeSwift() app.register(method, '/v1/a/c/o', HTTPOk, {}) resp = req.get_response(encrypter.Encrypter(app, {})) self.assertEqual('200 OK', resp.status) self.assertEqual(1, len(app.calls), app.calls) self.assertEqual(method, app.calls[0][0]) actual_headers = app.headers[0] # verify the alternate etag location has been specified if match_header_value and match_header_value != '*': self.assertIn('X-Backend-Etag-Is-At', actual_headers) self.assertEqual('X-Object-Sysmeta-Crypto-Etag-Mac', actual_headers['X-Backend-Etag-Is-At']) # verify etags have been supplemented with masked values self.assertIn(match_header_name, actual_headers) actual_etags = set(actual_headers[match_header_name].split(', ')) # masked values for secret_id None key = fetch_crypto_keys()['object'] masked_etags = [ '"%s"' % bytes_to_wsgi( base64.b64encode( hmac.new(key, wsgi_to_bytes(etag.strip('"')), hashlib.sha256).digest())) for etag in plain_etags if etag not in ('*', '') ] # masked values for secret_id myid key = fetch_crypto_keys(key_id={'secret_id': 'myid'})['object'] masked_etags_myid = [ '"%s"' % bytes_to_wsgi( base64.b64encode( hmac.new(key, wsgi_to_bytes(etag.strip('"')), hashlib.sha256).digest())) for etag in plain_etags if etag not in ('*', '') ] expected_etags = set((expected_plain_etags or plain_etags) + masked_etags + masked_etags_myid) self.assertEqual(expected_etags, actual_etags) # check that the request environ was returned to original state self.assertEqual(set(plain_etags), set(req.headers[match_header_name].split(', ')))
def test_encrypt_header_val(self): # Prepare key and Crypto instance object_key = fetch_crypto_keys()['object'] # - Normal string can be crypted encrypted = encrypter.encrypt_header_val(Crypto(), 'aaa', object_key) # sanity: return value is 2 item tuple self.assertEqual(2, len(encrypted)) crypted_val, crypt_info = encrypted expected_crypt_val = base64.b64encode( encrypt(b'aaa', object_key, FAKE_IV)) expected_crypt_info = { 'cipher': 'AES_CTR_256', 'iv': b'This is an IV123' } self.assertEqual(expected_crypt_val, wsgi_to_bytes(crypted_val)) self.assertEqual(expected_crypt_info, crypt_info) # - Empty string raises a ValueError for safety with self.assertRaises(ValueError) as cm: encrypter.encrypt_header_val(Crypto(), '', object_key) self.assertEqual('empty value is not acceptable', cm.exception.args[0]) # - None also raises a ValueError for safety with self.assertRaises(ValueError) as cm: encrypter.encrypt_header_val(Crypto(), None, object_key) self.assertEqual('empty value is not acceptable', cm.exception.args[0])
def test_encrypt_header_val(self): # Prepare key and Crypto instance object_key = fetch_crypto_keys()['object'] # - Normal string can be crypted encrypted = encrypter.encrypt_header_val(Crypto(), 'aaa', object_key) # sanity: return value is 2 item tuple self.assertEqual(2, len(encrypted)) crypted_val, crypt_info = encrypted expected_crypt_val = base64.b64encode( encrypt(b'aaa', object_key, FAKE_IV)) expected_crypt_info = { 'cipher': 'AES_CTR_256', 'iv': b'This is an IV123'} self.assertEqual(expected_crypt_val, wsgi_to_bytes(crypted_val)) self.assertEqual(expected_crypt_info, crypt_info) # - Empty string raises a ValueError for safety with self.assertRaises(ValueError) as cm: encrypter.encrypt_header_val(Crypto(), '', object_key) self.assertEqual('empty value is not acceptable', cm.exception.args[0]) # - None also raises a ValueError for safety with self.assertRaises(ValueError) as cm: encrypter.encrypt_header_val(Crypto(), None, object_key) self.assertEqual('empty value is not acceptable', cm.exception.args[0])
def _title(s): s = header_key_dict.HeaderKeyDict._title(s) if s.lower() == 'etag': # AWS Java SDK expects only 'ETag'. return 'ETag' if s.lower().startswith('x-amz-'): # AWS headers returned by S3 are lowercase. return swob.bytes_to_wsgi(swob.wsgi_to_bytes(s).lower()) return s
def do_test(method, plain_etags, expected_plain_etags=None): env = {CRYPTO_KEY_CALLBACK: fetch_crypto_keys} match_header_value = ', '.join(plain_etags) req = Request.blank( '/v1/a/c/o', environ=env, method=method, headers={match_header_name: match_header_value}) app = FakeSwift() app.register(method, '/v1/a/c/o', HTTPOk, {}) resp = req.get_response(encrypter.Encrypter(app, {})) self.assertEqual('200 OK', resp.status) self.assertEqual(1, len(app.calls), app.calls) self.assertEqual(method, app.calls[0][0]) actual_headers = app.headers[0] # verify the alternate etag location has been specified if match_header_value and match_header_value != '*': self.assertIn('X-Backend-Etag-Is-At', actual_headers) self.assertEqual('X-Object-Sysmeta-Crypto-Etag-Mac', actual_headers['X-Backend-Etag-Is-At']) # verify etags have been supplemented with masked values self.assertIn(match_header_name, actual_headers) actual_etags = set(actual_headers[match_header_name].split(', ')) # masked values for secret_id None key = fetch_crypto_keys()['object'] masked_etags = [ '"%s"' % bytes_to_wsgi(base64.b64encode(hmac.new( key, wsgi_to_bytes(etag.strip('"')), hashlib.sha256).digest())) for etag in plain_etags if etag not in ('*', '')] # masked values for secret_id myid key = fetch_crypto_keys(key_id={'secret_id': 'myid'})['object'] masked_etags_myid = [ '"%s"' % bytes_to_wsgi(base64.b64encode(hmac.new( key, wsgi_to_bytes(etag.strip('"')), hashlib.sha256).digest())) for etag in plain_etags if etag not in ('*', '')] expected_etags = set((expected_plain_etags or plain_etags) + masked_etags + masked_etags_myid) self.assertEqual(expected_etags, actual_etags) # check that the request environ was returned to original state self.assertEqual(set(plain_etags), set(req.headers[match_header_name].split(', ')))
def handle(self, req, start_response): app_resp = self._app_call(req.environ) try: put_crypto_meta = self._read_crypto_meta( 'X-Object-Sysmeta-Crypto-Body-Meta', True) put_keys = self.get_decryption_keys(req, put_crypto_meta) post_crypto_meta = self._read_crypto_meta( 'X-Object-Transient-Sysmeta-Crypto-Meta', False) post_keys = self.get_decryption_keys(req, post_crypto_meta) except EncryptionException as err: self.logger.error( "Error decrypting object: %s", err) raise HTTPInternalServerError( body='Error decrypting object', content_type='text/plain') if put_keys is None and post_keys is None: # skip decryption start_response(self._response_status, self._response_headers, self._response_exc_info) return app_resp mod_resp_headers = self.decrypt_resp_headers(put_keys, post_keys) if put_crypto_meta and req.method == 'GET' and \ is_success(self._get_status_int()): # 2xx response and encrypted body body_key = self.get_unwrapped_key( put_crypto_meta, put_keys['object']) content_type, content_type_attrs = parse_content_type( self._response_header_value('Content-Type')) if (self._get_status_int() == 206 and content_type == 'multipart/byteranges'): boundary = wsgi_to_bytes(dict(content_type_attrs)["boundary"]) resp_iter = self.multipart_response_iter( app_resp, boundary, body_key, put_crypto_meta) else: offset = 0 content_range = self._response_header_value('Content-Range') if content_range: # Determine offset within the whole object if ranged GET offset, end, total = parse_content_range(content_range) resp_iter = self.response_iter( app_resp, body_key, put_crypto_meta, offset) else: # don't decrypt body of unencrypted or non-2xx responses resp_iter = app_resp mod_resp_headers = purge_crypto_sysmeta_headers(mod_resp_headers) start_response(self._response_status, mod_resp_headers, self._response_exc_info) return resp_iter
def _hmac_etag(key, etag): """ Compute an HMAC-SHA256 using given key and etag. :param key: The starting key for the hash. :param etag: The etag to hash. :returns: a Base64-encoded representation of the HMAC """ if not isinstance(etag, bytes): etag = wsgi_to_bytes(etag) result = hmac.new(key, etag, digestmod=hashlib.sha256).digest() return base64.b64encode(result).decode()
def _verify_user_metadata(self, req_hdrs, name, value, key): # verify encrypted version of user metadata self.assertNotIn('X-Object-Meta-' + name, req_hdrs) expected_hdr = 'X-Object-Transient-Sysmeta-Crypto-Meta-' + name self.assertIn(expected_hdr, req_hdrs) enc_val, param = req_hdrs[expected_hdr].split(';') param = param.strip() self.assertTrue(param.startswith('swift_meta=')) actual_meta = json.loads( urlparse.unquote_plus(param[len('swift_meta='):])) self.assertEqual(Crypto.cipher, actual_meta['cipher']) meta_iv = base64.b64decode(actual_meta['iv']) self.assertEqual(FAKE_IV, meta_iv) self.assertEqual( base64.b64encode(encrypt(wsgi_to_bytes(value), key, meta_iv)), wsgi_to_bytes(enc_val)) # if there is any encrypted user metadata then this header should exist self.assertIn('X-Object-Transient-Sysmeta-Crypto-Meta', req_hdrs) common_meta = json.loads(urlparse.unquote_plus( req_hdrs['X-Object-Transient-Sysmeta-Crypto-Meta'])) self.assertDictEqual({'cipher': Crypto.cipher, 'key_id': {'v': 'fake', 'path': '/a/c/fake'}}, common_meta)
def translate_swift_to_s3(key, val): _key = swob.bytes_to_wsgi(swob.wsgi_to_bytes(key).lower()) def translate_meta_key(_key): if not _key.startswith('x-object-meta-'): return _key # Note that AWS allows user-defined metadata with underscores in the # header, while WSGI (and other protocols derived from CGI) does not # differentiate between an underscore and a dash. Fortunately, # eventlet exposes the raw headers from the client, so we could # translate '_' to '=5F' on the way in. Now, we translate back. return 'x-amz-meta-' + _key[14:].replace('=5f', '_') if _key.startswith('x-object-meta-'): return translate_meta_key(_key), val elif _key in ('content-length', 'content-type', 'content-range', 'content-encoding', 'content-disposition', 'content-language', 'etag', 'last-modified', 'x-robots-tag', 'cache-control', 'expires'): return key, val elif _key == 'x-object-version-id': return 'x-amz-version-id', val elif _key == 'x-copied-from-version-id': return 'x-amz-copy-source-version-id', val elif _key == 'x-backend-content-type' and \ val == DELETE_MARKER_CONTENT_TYPE: return 'x-amz-delete-marker', 'true' elif _key == 'access-control-expose-headers': exposed_headers = val.split(', ') exposed_headers.extend([ 'x-amz-request-id', 'x-amz-id-2', ]) return 'access-control-expose-headers', ', '.join( translate_meta_key(h) for h in exposed_headers) elif _key == 'access-control-allow-methods': methods = val.split(', ') try: methods.remove('COPY') # that's not a thing in S3 except ValueError: pass # not there? don't worry about it return key, ', '.join(methods) elif _key.startswith('access-control-'): return key, val # else, drop the header return None
def create_key(self, path, secret_id=None): """ Creates an encryption key that is unique for the given path. :param path: the (WSGI string) path of the resource being encrypted. :param secret_id: the id of the root secret from which the key should be derived. :return: an encryption key. :raises UnknownSecretIdError: if the secret_id is not recognised. """ try: key = self._root_secrets[secret_id] except KeyError: self.logger.warning('Unrecognised secret id: %s' % secret_id) raise UnknownSecretIdError(secret_id) else: return hmac.new(key, wsgi_to_bytes(path), digestmod=hashlib.sha256).digest()
def encrypt_header_val(crypto, value, key): """ Encrypt a header value using the supplied key. :param crypto: a Crypto instance :param value: value to encrypt :param key: crypto key to use :returns: a tuple of (encrypted value, crypto_meta) where crypto_meta is a dict of form returned by :py:func:`~swift.common.middleware.crypto.Crypto.get_crypto_meta` :raises ValueError: if value is empty """ if not value: raise ValueError('empty value is not acceptable') crypto_meta = crypto.create_crypto_meta() crypto_ctxt = crypto.create_encryption_ctxt(key, crypto_meta['iv']) enc_val = bytes_to_wsgi( base64.b64encode(crypto_ctxt.update(wsgi_to_bytes(value)))) return enc_val, crypto_meta
def http_response_to_document_iters(response, read_chunk_size=4096): """ Takes a successful object-GET HTTP response and turns it into an iterator of (first-byte, last-byte, length, headers, body-file) 5-tuples. The response must either be a 200 or a 206; if you feed in a 204 or something similar, this probably won't work. :param response: HTTP response, like from bufferedhttp.http_connect(), not a swob.Response. """ chunked = is_chunked(dict(response.getheaders())) if response.status == 200: if chunked: # Single "range" that's the whole object with an unknown length return iter([(0, None, None, response.getheaders(), response)]) # Single "range" that's the whole object content_length = int(response.getheader('Content-Length')) return iter([(0, content_length - 1, content_length, response.getheaders(), response)]) content_type, params_list = parse_content_type( response.getheader('Content-Type')) if content_type != 'multipart/byteranges': # Single range; no MIME framing, just the bytes. The start and end # byte indices are in the Content-Range header. start, end, length = parse_content_range( response.getheader('Content-Range')) return iter([(start, end, length, response.getheaders(), response)]) else: # Multiple ranges; the response body is a multipart/byteranges MIME # document, and we have to parse it using the MIME boundary # extracted from the Content-Type header. params = dict(params_list) return multipart_byteranges_to_document_iters( response, wsgi_to_bytes(params['boundary']), read_chunk_size)
def __call__(self, env, start_response): req = Request(env) try: # account and container only version, acct, cont = req.split_path(2, 3) except ValueError: is_account_or_container_req = False else: is_account_or_container_req = True if not is_account_or_container_req: return self.app(env, start_response) if not valid_api_version(version) or req.method not in ('GET', 'HEAD'): return self.app(env, start_response) # OK, definitely have an account/container request. # Get the desired content-type, then force it to a JSON request. try: out_content_type = get_listing_content_type(req) except HTTPException as err: return err(env, start_response) params = req.params can_vary = 'format' not in params params['format'] = 'json' req.params = params # Give other middlewares a chance to be in charge env.setdefault('swift.format_listing', True) status, headers, resp_iter = req.call_application(self.app) if not env.get('swift.format_listing'): start_response(status, headers) return resp_iter header_to_index = {} resp_content_type = resp_length = None for i, (header, value) in enumerate(headers): header = header.lower() if header == 'content-type': header_to_index[header] = i resp_content_type = value.partition(';')[0] elif header == 'content-length': header_to_index[header] = i resp_length = int(value) elif header == 'vary': header_to_index[header] = i if not status.startswith(('200 ', '204 ')): start_response(status, headers) return resp_iter if can_vary: if 'vary' in header_to_index: value = headers[header_to_index['vary']][1] if 'accept' not in list_from_csv(value.lower()): headers[header_to_index['vary']] = ('Vary', value + ', Accept') else: headers.append(('Vary', 'Accept')) if resp_content_type != 'application/json': start_response(status, headers) return resp_iter if resp_length is None or \ resp_length > MAX_CONTAINER_LISTING_CONTENT_LENGTH: start_response(status, headers) return resp_iter def set_header(header, value): if value is None: del headers[header_to_index[header]] else: headers[header_to_index[header]] = ( headers[header_to_index[header]][0], str(value)) if req.method == 'HEAD': set_header('content-type', out_content_type + '; charset=utf-8') set_header('content-length', None) # don't know, can't determine start_response(status, headers) return resp_iter body = b''.join(resp_iter) try: listing = json.loads(body) # Do a couple sanity checks if not isinstance(listing, list): raise ValueError if not all(isinstance(item, dict) for item in listing): raise ValueError except ValueError: # Static web listing that's returning invalid JSON? # Just pass it straight through; that's about all we *can* do. start_response(status, headers) return [body] if not req.allow_reserved_names: listing = self.filter_reserved(listing, acct, cont) try: if out_content_type.endswith('/xml'): if cont: body = container_to_xml( listing, wsgi_to_bytes(cont).decode('utf-8')) else: body = account_to_xml(listing, wsgi_to_bytes(acct).decode('utf-8')) elif out_content_type == 'text/plain': body = listing_to_text(listing) else: body = json.dumps(listing).encode('ascii') except KeyError: # listing was in a bad format -- funky static web listing?? start_response(status, headers) return [body] if not body: status = '%s %s' % (HTTP_NO_CONTENT, RESPONSE_REASONS[HTTP_NO_CONTENT][0]) set_header('content-type', out_content_type + '; charset=utf-8') set_header('content-length', len(body)) start_response(status, headers) return [body]
def _get_from_shards(self, req, resp): # construct listing using shards described by the response body shard_ranges = [ ShardRange.from_dict(data) for data in json.loads(resp.body) ] self.app.logger.debug('GET listing from %s shards for: %s', len(shard_ranges), req.path_qs) if not shard_ranges: # can't find ranges or there was a problem getting the ranges. So # return what we have. return resp objects = [] req_limit = int(req.params.get('limit', CONTAINER_LISTING_LIMIT)) params = req.params.copy() params.pop('states', None) req.headers.pop('X-Backend-Record-Type', None) reverse = config_true_value(params.get('reverse')) marker = params.get('marker') end_marker = params.get('end_marker') limit = req_limit for shard_range in shard_ranges: params['limit'] = limit # Always set marker to ensure that object names less than or equal # to those already in the listing are not fetched; if the listing # is empty then the original request marker, if any, is used. This # allows misplaced objects below the expected shard range to be # included in the listing. if objects: last_name = objects[-1].get('name', objects[-1].get('subdir', u'')) params['marker'] = last_name.encode('utf-8') elif marker: params['marker'] = marker else: params['marker'] = '' # Always set end_marker to ensure that misplaced objects beyond the # expected shard range are not fetched. This prevents a misplaced # object obscuring correctly placed objects in the next shard # range. if end_marker and end_marker in shard_range: params['end_marker'] = end_marker elif reverse: params['end_marker'] = str_to_wsgi(shard_range.lower_str) else: params['end_marker'] = str_to_wsgi(shard_range.end_marker) if (shard_range.account == self.account_name and shard_range.container == self.container_name): # directed back to same container - force GET of objects headers = {'X-Backend-Record-Type': 'object'} else: headers = None self.app.logger.debug('Getting from %s %s with %s', shard_range, shard_range.name, headers) objs, shard_resp = self._get_container_listing( req, shard_range.account, shard_range.container, headers=headers, params=params) if not objs: # tolerate errors or empty shard containers continue objects.extend(objs) limit -= len(objs) if limit <= 0: break if (end_marker and reverse and (wsgi_to_bytes(end_marker) >= objects[-1]['name'].encode('utf-8'))): break if (end_marker and not reverse and (wsgi_to_bytes(end_marker) <= objects[-1]['name'].encode('utf-8'))): break resp.body = json.dumps(objects).encode('ascii') constrained = any( req.params.get(constraint) for constraint in ('marker', 'end_marker', 'path', 'prefix', 'delimiter')) if not constrained and len(objects) < req_limit: self.app.logger.debug('Setting object count to %s' % len(objects)) # prefer the actual listing stats over the potentially outdated # root stats. This condition is only likely when a sharded # container is shrinking or in tests; typically a sharded container # will have more than CONTAINER_LISTING_LIMIT objects so any # unconstrained listing will be capped by the limit and total # object stats cannot therefore be inferred from the listing. resp.headers['X-Container-Object-Count'] = len(objects) resp.headers['X-Container-Bytes-Used'] = sum( [o['bytes'] for o in objects]) return resp
def __init__(self, *args, **kwargs): swob.Response.__init__(self, *args, **kwargs) s3_sysmeta_headers = swob.HeaderKeyDict() sw_headers = swob.HeaderKeyDict() headers = HeaderKeyDict() self.is_slo = False def is_swift3_sysmeta(sysmeta_key, server_type): swift3_sysmeta_prefix = ( 'x-%s-sysmeta-swift3' % server_type).lower() return sysmeta_key.lower().startswith(swift3_sysmeta_prefix) def is_s3api_sysmeta(sysmeta_key, server_type): s3api_sysmeta_prefix = sysmeta_prefix(_server_type).lower() return sysmeta_key.lower().startswith(s3api_sysmeta_prefix) for key, val in self.headers.items(): if is_sys_meta('object', key) or is_sys_meta('container', key): _server_type = key.split('-')[1] if is_swift3_sysmeta(key, _server_type): # To be compatible with older swift3, translate swift3 # sysmeta to s3api sysmeta here key = sysmeta_prefix(_server_type) + \ key[len('x-%s-sysmeta-swift3-' % _server_type):] if key not in s3_sysmeta_headers: # To avoid overwrite s3api sysmeta by older swift3 # sysmeta set the key only when the key does not exist s3_sysmeta_headers[key] = val elif is_s3api_sysmeta(key, _server_type): s3_sysmeta_headers[key] = val else: sw_headers[key] = val else: sw_headers[key] = val # Handle swift headers for key, val in sw_headers.items(): _key = swob.bytes_to_wsgi(swob.wsgi_to_bytes(key).lower()) if _key.startswith('x-object-meta-'): # Note that AWS ignores user-defined headers with '=' in the # header name. We translated underscores to '=5F' on the way # in, though. headers['x-amz-meta-' + _key[14:].replace('=5f', '_')] = val elif _key in ('content-length', 'content-type', 'content-range', 'content-encoding', 'content-disposition', 'content-language', 'etag', 'last-modified', 'x-robots-tag', 'cache-control', 'expires'): headers[key] = val elif _key == 'x-object-version-id': headers['x-amz-version-id'] = val elif _key == 'x-copied-from-version-id': headers['x-amz-copy-source-version-id'] = val elif _key == 'x-static-large-object': # for delete slo self.is_slo = config_true_value(val) elif _key == 'x-backend-content-type' and \ val == DELETE_MARKER_CONTENT_TYPE: headers['x-amz-delete-marker'] = 'true' # Check whether we stored the AWS-style etag on upload override_etag = s3_sysmeta_headers.get( sysmeta_header('object', 'etag')) if override_etag not in (None, ''): # Multipart uploads in AWS have ETags like # <MD5(part_etag1 || ... || part_etagN)>-<number of parts> headers['etag'] = override_etag elif self.is_slo and 'etag' in headers: # Many AWS clients use the presence of a '-' to decide whether # to attempt client-side download validation, so even if we # didn't store the AWS-style header, tack on a '-N'. (Use 'N' # because we don't actually know how many parts there are.) headers['etag'] += '-N' self.headers = headers if self.etag: # add double quotes to the etag header self.etag = self.etag # Used for pure swift header handling at the request layer self.sw_headers = sw_headers self.sysmeta_headers = s3_sysmeta_headers
def _get_from_shards(self, req, resp): # construct listing using shards described by the response body shard_ranges = [ShardRange.from_dict(data) for data in json.loads(resp.body)] self.app.logger.debug('GET listing from %s shards for: %s', len(shard_ranges), req.path_qs) if not shard_ranges: # can't find ranges or there was a problem getting the ranges. So # return what we have. return resp objects = [] req_limit = int(req.params.get('limit', CONTAINER_LISTING_LIMIT)) params = req.params.copy() params.pop('states', None) req.headers.pop('X-Backend-Record-Type', None) reverse = config_true_value(params.get('reverse')) marker = params.get('marker') end_marker = params.get('end_marker') limit = req_limit for shard_range in shard_ranges: params['limit'] = limit # Always set marker to ensure that object names less than or equal # to those already in the listing are not fetched; if the listing # is empty then the original request marker, if any, is used. This # allows misplaced objects below the expected shard range to be # included in the listing. if objects: last_name = objects[-1].get('name', objects[-1].get('subdir', u'')) params['marker'] = last_name.encode('utf-8') elif marker: params['marker'] = marker else: params['marker'] = '' # Always set end_marker to ensure that misplaced objects beyond the # expected shard range are not fetched. This prevents a misplaced # object obscuring correctly placed objects in the next shard # range. if end_marker and end_marker in shard_range: params['end_marker'] = end_marker elif reverse: params['end_marker'] = str_to_wsgi(shard_range.lower_str) else: params['end_marker'] = str_to_wsgi(shard_range.end_marker) if (shard_range.account == self.account_name and shard_range.container == self.container_name): # directed back to same container - force GET of objects headers = {'X-Backend-Record-Type': 'object'} else: headers = None self.app.logger.debug('Getting from %s %s with %s', shard_range, shard_range.name, headers) objs, shard_resp = self._get_container_listing( req, shard_range.account, shard_range.container, headers=headers, params=params) if not objs: # tolerate errors or empty shard containers continue objects.extend(objs) limit -= len(objs) if limit <= 0: break if (end_marker and reverse and (wsgi_to_bytes(end_marker) >= objects[-1]['name'].encode('utf-8'))): break if (end_marker and not reverse and (wsgi_to_bytes(end_marker) <= objects[-1]['name'].encode('utf-8'))): break resp.body = json.dumps(objects).encode('ascii') constrained = any(req.params.get(constraint) for constraint in ( 'marker', 'end_marker', 'path', 'prefix', 'delimiter')) if not constrained and len(objects) < req_limit: self.app.logger.debug('Setting object count to %s' % len(objects)) # prefer the actual listing stats over the potentially outdated # root stats. This condition is only likely when a sharded # container is shrinking or in tests; typically a sharded container # will have more than CONTAINER_LISTING_LIMIT objects so any # unconstrained listing will be capped by the limit and total # object stats cannot therefore be inferred from the listing. resp.headers['X-Container-Object-Count'] = len(objects) resp.headers['X-Container-Bytes-Used'] = sum( [o['bytes'] for o in objects]) return resp