def _test_etag_is_at_not_duplicated(self, method): # verify only one occurrence of X-Object-Sysmeta-Crypto-Etag-Mac in # X-Backend-Etag-Is-At key = fetch_crypto_keys()['object'] env = {CRYPTO_KEY_CALLBACK: fetch_crypto_keys} req = Request.blank('/v1/a/c/o', environ=env, method=method, headers={ 'If-Match': '"an etag"', 'If-None-Match': '"another etag"' }) self.app.register(method, '/v1/a/c/o', HTTPOk, {}) resp = req.get_response(self.encrypter) self.assertEqual('200 OK', resp.status) self.assertEqual(1, len(self.app.calls), self.app.calls) self.assertEqual(method, self.app.calls[0][0]) actual_headers = self.app.headers[0] self.assertIn('X-Backend-Etag-Is-At', actual_headers) self.assertEqual('X-Object-Sysmeta-Crypto-Etag-Mac', actual_headers['X-Backend-Etag-Is-At']) self.assertIn( '"%s"' % bytes_to_wsgi( base64.b64encode( hmac.new(key, b'an etag', hashlib.sha256).digest())), actual_headers['If-Match']) self.assertIn('"another etag"', actual_headers['If-None-Match']) self.assertIn( '"%s"' % bytes_to_wsgi( base64.b64encode( hmac.new(key, b'another etag', hashlib.sha256).digest())), actual_headers['If-None-Match'])
def _test_etag_is_at_not_duplicated(self, method): # verify only one occurrence of X-Object-Sysmeta-Crypto-Etag-Mac in # X-Backend-Etag-Is-At key = fetch_crypto_keys()['object'] env = {CRYPTO_KEY_CALLBACK: fetch_crypto_keys} req = Request.blank( '/v1/a/c/o', environ=env, method=method, headers={'If-Match': '"an etag"', 'If-None-Match': '"another etag"'}) self.app.register(method, '/v1/a/c/o', HTTPOk, {}) resp = req.get_response(self.encrypter) self.assertEqual('200 OK', resp.status) self.assertEqual(1, len(self.app.calls), self.app.calls) self.assertEqual(method, self.app.calls[0][0]) actual_headers = self.app.headers[0] self.assertIn('X-Backend-Etag-Is-At', actual_headers) self.assertEqual('X-Object-Sysmeta-Crypto-Etag-Mac', actual_headers['X-Backend-Etag-Is-At']) self.assertIn('"%s"' % bytes_to_wsgi(base64.b64encode( hmac.new(key, b'an etag', hashlib.sha256).digest())), actual_headers['If-Match']) self.assertIn('"another etag"', actual_headers['If-None-Match']) self.assertIn('"%s"' % bytes_to_wsgi(base64.b64encode( hmac.new(key, b'another etag', hashlib.sha256).digest())), actual_headers['If-None-Match'])
def parse_request(self): if not six.PY2: # request lines *should* be ascii per the RFC, but historically # we've allowed (and even have func tests that use) arbitrary # bytes. This breaks on py3 (see https://bugs.python.org/issue33973 # ) but the work-around is simple: munge the request line to be # properly quoted. py2 will do the right thing without this, but it # doesn't hurt to re-write the request line like this and it # simplifies testing. if self.raw_requestline.count(b' ') >= 2: parts = self.raw_requestline.split(b' ', 2) path, q, query = parts[1].partition(b'?') # unquote first, so we don't over-quote something # that was *correctly* quoted path = wsgi_to_bytes(wsgi_quote(wsgi_unquote( bytes_to_wsgi(path)))) query = b'&'.join( sep.join([ wsgi_to_bytes(wsgi_quote_plus(wsgi_unquote_plus( bytes_to_wsgi(key)))), wsgi_to_bytes(wsgi_quote_plus(wsgi_unquote_plus( bytes_to_wsgi(val)))) ]) for part in query.split(b'&') for key, sep, val in (part.partition(b'='), )) parts[1] = path + q + query self.raw_requestline = b' '.join(parts) # else, mangled protocol, most likely; let base class deal with it return wsgi.HttpProtocol.parse_request(self)
def test_check_symlink_header(self): def do_test(headers): req = Request.blank('/v1/a/c/o', method='PUT', headers=headers) symlink._check_symlink_header(req) # normal cases do_test({'X-Symlink-Target': 'c1/o1'}) do_test({'X-Symlink-Target': 'c1/sub/o1'}) do_test({'X-Symlink-Target': 'c1%2Fo1'}) # specify account do_test({ 'X-Symlink-Target': 'c1/o1', 'X-Symlink-Target-Account': 'another' }) # URL encoded is safe do_test({'X-Symlink-Target': 'c1%2Fo1'}) # URL encoded + multibytes is also safe target = u'\u30b0\u30e9\u30d6\u30eb/\u30a2\u30ba\u30ec\u30f3' target = swob.bytes_to_wsgi(target.encode('utf8')) do_test({'X-Symlink-Target': target}) do_test({'X-Symlink-Target': swob.wsgi_quote(target)}) target = swob.bytes_to_wsgi(u'\u30b0\u30e9\u30d6\u30eb'.encode('utf8')) do_test({ 'X-Symlink-Target': 'cont/obj', 'X-Symlink-Target-Account': target })
def _listing_pages_iter(self, account_name, lcontainer, lprefix, req, marker='', end_marker='', reverse=True): '''Get "pages" worth of objects that start with a prefix. The optional keyword arguments ``marker``, ``end_marker``, and ``reverse`` are used similar to how they are for containers. We're either coming: - directly from ``_listing_iter``, in which case none of the optional args are specified, or - from ``_in_proxy_reverse_listing``, in which case ``reverse`` is ``False`` and both ``marker`` and ``end_marker`` are specified (although they may still be blank). ''' while True: lreq = make_pre_authed_request( req.environ, method='GET', swift_source='VW', path=wsgi_quote('/v1/%s/%s' % (account_name, lcontainer))) lreq.environ['QUERY_STRING'] = \ 'prefix=%s&marker=%s' % (wsgi_quote(lprefix), wsgi_quote(marker)) if end_marker: lreq.environ['QUERY_STRING'] += '&end_marker=%s' % ( wsgi_quote(end_marker)) if reverse: lreq.environ['QUERY_STRING'] += '&reverse=on' lresp = lreq.get_response(self.app) if not is_success(lresp.status_int): close_if_possible(lresp.app_iter) if lresp.status_int == HTTP_NOT_FOUND: raise ListingIterNotFound() elif is_client_error(lresp.status_int): raise HTTPPreconditionFailed(request=req) else: raise ListingIterError() if not lresp.body: break sublisting = json.loads(lresp.body) if not sublisting: break # When using the ``reverse`` param, check that the listing is # actually reversed first_item = bytes_to_wsgi(sublisting[0]['name'].encode('utf-8')) last_item = bytes_to_wsgi(sublisting[-1]['name'].encode('utf-8')) page_is_after_marker = marker and first_item > marker if reverse and (first_item < last_item or page_is_after_marker): # Apparently there's at least one pre-2.6.0 container server yield self._in_proxy_reverse_listing( account_name, lcontainer, lprefix, req, marker, sublisting) return marker = last_item yield sublisting
def _listing_pages_iter(self, account_name, lcontainer, lprefix, req, marker='', end_marker='', reverse=True): '''Get "pages" worth of objects that start with a prefix. The optional keyword arguments ``marker``, ``end_marker``, and ``reverse`` are used similar to how they are for containers. We're either coming: - directly from ``_listing_iter``, in which case none of the optional args are specified, or - from ``_in_proxy_reverse_listing``, in which case ``reverse`` is ``False`` and both ``marker`` and ``end_marker`` are specified (although they may still be blank). ''' while True: lreq = make_pre_authed_request( req.environ, method='GET', swift_source='VW', path=wsgi_quote('/v1/%s/%s' % (account_name, lcontainer))) lreq.environ['QUERY_STRING'] = \ 'prefix=%s&marker=%s' % (wsgi_quote(lprefix), wsgi_quote(marker)) if end_marker: lreq.environ['QUERY_STRING'] += '&end_marker=%s' % ( wsgi_quote(end_marker)) if reverse: lreq.environ['QUERY_STRING'] += '&reverse=on' lresp = lreq.get_response(self.app) if not is_success(lresp.status_int): close_if_possible(lresp.app_iter) if lresp.status_int == HTTP_NOT_FOUND: raise ListingIterNotFound() elif is_client_error(lresp.status_int): raise HTTPPreconditionFailed(request=req) else: raise ListingIterError() if not lresp.body: break sublisting = json.loads(lresp.body) if not sublisting: break # When using the ``reverse`` param, check that the listing is # actually reversed first_item = bytes_to_wsgi(sublisting[0]['name'].encode('utf-8')) last_item = bytes_to_wsgi(sublisting[-1]['name'].encode('utf-8')) page_is_after_marker = marker and first_item > marker if reverse and (first_item < last_item or page_is_after_marker): # Apparently there's at least one pre-2.6.0 container server yield self._in_proxy_reverse_listing( account_name, lcontainer, lprefix, req, marker, sublisting) return marker = last_item yield sublisting
def do_test(method, plain_etags, expected_plain_etags=None): env = {CRYPTO_KEY_CALLBACK: fetch_crypto_keys} match_header_value = ', '.join(plain_etags) req = Request.blank( '/v1/a/c/o', environ=env, method=method, headers={match_header_name: match_header_value}) app = FakeSwift() app.register(method, '/v1/a/c/o', HTTPOk, {}) resp = req.get_response(encrypter.Encrypter(app, {})) self.assertEqual('200 OK', resp.status) self.assertEqual(1, len(app.calls), app.calls) self.assertEqual(method, app.calls[0][0]) actual_headers = app.headers[0] # verify the alternate etag location has been specified if match_header_value and match_header_value != '*': self.assertIn('X-Backend-Etag-Is-At', actual_headers) self.assertEqual('X-Object-Sysmeta-Crypto-Etag-Mac', actual_headers['X-Backend-Etag-Is-At']) # verify etags have been supplemented with masked values self.assertIn(match_header_name, actual_headers) actual_etags = set(actual_headers[match_header_name].split(', ')) # masked values for secret_id None key = fetch_crypto_keys()['object'] masked_etags = [ '"%s"' % bytes_to_wsgi( base64.b64encode( hmac.new(key, wsgi_to_bytes(etag.strip('"')), hashlib.sha256).digest())) for etag in plain_etags if etag not in ('*', '') ] # masked values for secret_id myid key = fetch_crypto_keys(key_id={'secret_id': 'myid'})['object'] masked_etags_myid = [ '"%s"' % bytes_to_wsgi( base64.b64encode( hmac.new(key, wsgi_to_bytes(etag.strip('"')), hashlib.sha256).digest())) for etag in plain_etags if etag not in ('*', '') ] expected_etags = set((expected_plain_etags or plain_etags) + masked_etags + masked_etags_myid) self.assertEqual(expected_etags, actual_etags) # check that the request environ was returned to original state self.assertEqual(set(plain_etags), set(req.headers[match_header_name].split(', ')))
def DELETE(self, req): """ Handles Abort Multipart Upload. """ upload_id = req.params['uploadId'] _check_upload_info(req, self.app, upload_id) # First check to see if this multi-part upload was already # completed. Look in the primary container, if the object exists, # then it was completed and we return an error here. container = req.container_name + MULTIUPLOAD_SUFFIX obj = '%s/%s' % (req.object_name, upload_id) req.get_response(self.app, container=container, obj=obj) # The completed object was not found so this # must be a multipart upload abort. # We must delete any uploaded segments for this UploadID and then # delete the object in the main container as well query = { 'format': 'json', 'prefix': '%s/%s/' % (req.object_name, upload_id), 'delimiter': '/', } resp = req.get_response(self.app, 'GET', container, '', query=query) # Iterate over the segment objects and delete them individually objects = json.loads(resp.body) for o in objects: container = req.container_name + MULTIUPLOAD_SUFFIX obj = bytes_to_wsgi(o['name'].encode('utf-8')) req.get_response(self.app, container=container, obj=obj) return HTTPNoContent()
def DELETE(self, req): """ Handles Abort Multipart Upload. """ upload_id = req.params['uploadId'] _check_upload_info(req, self.app, upload_id) # First check to see if this multi-part upload was already # completed. Look in the primary container, if the object exists, # then it was completed and we return an error here. container = req.container_name + MULTIUPLOAD_SUFFIX obj = '%s/%s' % (req.object_name, upload_id) req.get_response(self.app, container=container, obj=obj) # The completed object was not found so this # must be a multipart upload abort. # We must delete any uploaded segments for this UploadID and then # delete the object in the main container as well query = { 'format': 'json', 'prefix': '%s/%s/' % (req.object_name, upload_id), 'delimiter': '/', } resp = req.get_response(self.app, 'GET', container, '', query=query) # Iterate over the segment objects and delete them individually objects = json.loads(resp.body) for o in objects: container = req.container_name + MULTIUPLOAD_SUFFIX obj = bytes_to_wsgi(o['name'].encode('utf-8')) req.get_response(self.app, container=container, obj=obj) return HTTPNoContent()
def get_objs_to_delete(self, req): """ Will populate objs_to_delete with data from request input. :params req: a Swob request :returns: a list of the contents of req.body when separated by newline. :raises HTTPException: on failures """ line = b'' data_remaining = True objs_to_delete = [] if req.content_length is None and \ req.headers.get('transfer-encoding', '').lower() != 'chunked': raise HTTPLengthRequired(request=req) while data_remaining: if b'\n' in line: obj_to_delete, line = line.split(b'\n', 1) if six.PY2: obj_to_delete = wsgi_unquote(obj_to_delete.strip()) else: # yeah, all this chaining is pretty terrible... # but it gets even worse trying to use UTF-8 and # errors='surrogateescape' when dealing with terrible # input like b'\xe2%98\x83' obj_to_delete = wsgi_to_str(wsgi_unquote( bytes_to_wsgi(obj_to_delete.strip()))) objs_to_delete.append({'name': obj_to_delete}) else: data = req.body_file.read(self.max_path_length) if data: line += data else: data_remaining = False if six.PY2: obj_to_delete = wsgi_unquote(line.strip()) else: obj_to_delete = wsgi_to_str(wsgi_unquote( bytes_to_wsgi(line.strip()))) if obj_to_delete: objs_to_delete.append({'name': obj_to_delete}) if len(objs_to_delete) > self.max_deletes_per_request: raise HTTPRequestEntityTooLarge( 'Maximum Bulk Deletes: %d per request' % self.max_deletes_per_request) if len(line) > self.max_path_length * 2: raise HTTPBadRequest('Invalid File Name') return objs_to_delete
def get_objs_to_delete(self, req): """ Will populate objs_to_delete with data from request input. :params req: a Swob request :returns: a list of the contents of req.body when separated by newline. :raises HTTPException: on failures """ line = b'' data_remaining = True objs_to_delete = [] if req.content_length is None and \ req.headers.get('transfer-encoding', '').lower() != 'chunked': raise HTTPLengthRequired(request=req) while data_remaining: if b'\n' in line: obj_to_delete, line = line.split(b'\n', 1) if six.PY2: obj_to_delete = wsgi_unquote(obj_to_delete.strip()) else: # yeah, all this chaining is pretty terrible... # but it gets even worse trying to use UTF-8 and # errors='surrogateescape' when dealing with terrible # input like b'\xe2%98\x83' obj_to_delete = wsgi_to_str( wsgi_unquote(bytes_to_wsgi(obj_to_delete.strip()))) objs_to_delete.append({'name': obj_to_delete}) else: data = req.body_file.read(self.max_path_length) if data: line += data else: data_remaining = False if six.PY2: obj_to_delete = wsgi_unquote(line.strip()) else: obj_to_delete = wsgi_to_str( wsgi_unquote(bytes_to_wsgi(line.strip()))) if obj_to_delete: objs_to_delete.append({'name': obj_to_delete}) if len(objs_to_delete) > self.max_deletes_per_request: raise HTTPRequestEntityTooLarge( 'Maximum Bulk Deletes: %d per request' % self.max_deletes_per_request) if len(line) > self.max_path_length * 2: raise HTTPBadRequest('Invalid File Name') return objs_to_delete
def _iter_items(self, path, marker='', end_marker='', prefix='', acceptable_statuses=(2, HTTP_NOT_FOUND)): """ Returns an iterator of items from a json listing. Assumes listing has 'name' key defined and uses markers. :param path: Path to do GET on. :param marker: Prefix of first desired item, defaults to ''. :param end_marker: Last item returned will be 'less' than this, defaults to ''. :param prefix: Prefix of items :param acceptable_statuses: List of status for valid responses, defaults to (2, HTTP_NOT_FOUND). :raises UnexpectedResponse: Exception raised when requests fail to get a response with an acceptable status :raises Exception: Exception is raised when code fails in an unexpected way. """ if not isinstance(marker, bytes): marker = marker.encode('utf8') if not isinstance(end_marker, bytes): end_marker = end_marker.encode('utf8') if not isinstance(prefix, bytes): prefix = prefix.encode('utf8') while True: resp = self.make_request( 'GET', '%s?format=json&marker=%s&end_marker=%s&prefix=%s' % (path, bytes_to_wsgi(quote(marker)), bytes_to_wsgi(quote(end_marker)), bytes_to_wsgi( quote(prefix))), {}, acceptable_statuses) if not resp.status_int == 200: if resp.status_int >= HTTP_MULTIPLE_CHOICES: b''.join(resp.app_iter) break data = json.loads(resp.body) if not data: break for item in data: yield item marker = data[-1]['name'].encode('utf8')
def _title(s): s = header_key_dict.HeaderKeyDict._title(s) if s.lower() == 'etag': # AWS Java SDK expects only 'ETag'. return 'ETag' if s.lower().startswith('x-amz-'): # AWS headers returned by S3 are lowercase. return swob.bytes_to_wsgi(swob.wsgi_to_bytes(s).lower()) return s
def _iter_items( self, path, marker='', end_marker='', prefix='', acceptable_statuses=(2, HTTP_NOT_FOUND)): """ Returns an iterator of items from a json listing. Assumes listing has 'name' key defined and uses markers. :param path: Path to do GET on. :param marker: Prefix of first desired item, defaults to ''. :param end_marker: Last item returned will be 'less' than this, defaults to ''. :param prefix: Prefix of items :param acceptable_statuses: List of status for valid responses, defaults to (2, HTTP_NOT_FOUND). :raises UnexpectedResponse: Exception raised when requests fail to get a response with an acceptable status :raises Exception: Exception is raised when code fails in an unexpected way. """ if not isinstance(marker, bytes): marker = marker.encode('utf8') if not isinstance(end_marker, bytes): end_marker = end_marker.encode('utf8') if not isinstance(prefix, bytes): prefix = prefix.encode('utf8') while True: resp = self.make_request( 'GET', '%s?format=json&marker=%s&end_marker=%s&prefix=%s' % (path, bytes_to_wsgi(quote(marker)), bytes_to_wsgi(quote(end_marker)), bytes_to_wsgi(quote(prefix))), {}, acceptable_statuses) if not resp.status_int == 200: if resp.status_int >= HTTP_MULTIPLE_CHOICES: b''.join(resp.app_iter) break data = json.loads(resp.body) if not data: break for item in data: yield item marker = data[-1]['name'].encode('utf8')
def do_test(method, plain_etags, expected_plain_etags=None): env = {CRYPTO_KEY_CALLBACK: fetch_crypto_keys} match_header_value = ', '.join(plain_etags) req = Request.blank( '/v1/a/c/o', environ=env, method=method, headers={match_header_name: match_header_value}) app = FakeSwift() app.register(method, '/v1/a/c/o', HTTPOk, {}) resp = req.get_response(encrypter.Encrypter(app, {})) self.assertEqual('200 OK', resp.status) self.assertEqual(1, len(app.calls), app.calls) self.assertEqual(method, app.calls[0][0]) actual_headers = app.headers[0] # verify the alternate etag location has been specified if match_header_value and match_header_value != '*': self.assertIn('X-Backend-Etag-Is-At', actual_headers) self.assertEqual('X-Object-Sysmeta-Crypto-Etag-Mac', actual_headers['X-Backend-Etag-Is-At']) # verify etags have been supplemented with masked values self.assertIn(match_header_name, actual_headers) actual_etags = set(actual_headers[match_header_name].split(', ')) # masked values for secret_id None key = fetch_crypto_keys()['object'] masked_etags = [ '"%s"' % bytes_to_wsgi(base64.b64encode(hmac.new( key, wsgi_to_bytes(etag.strip('"')), hashlib.sha256).digest())) for etag in plain_etags if etag not in ('*', '')] # masked values for secret_id myid key = fetch_crypto_keys(key_id={'secret_id': 'myid'})['object'] masked_etags_myid = [ '"%s"' % bytes_to_wsgi(base64.b64encode(hmac.new( key, wsgi_to_bytes(etag.strip('"')), hashlib.sha256).digest())) for etag in plain_etags if etag not in ('*', '')] expected_etags = set((expected_plain_etags or plain_etags) + masked_etags + masked_etags_myid) self.assertEqual(expected_etags, actual_etags) # check that the request environ was returned to original state self.assertEqual(set(plain_etags), set(req.headers[match_header_name].split(', ')))
def accepter(sock, return_code): try: with Timeout(3): inc = sock.makefile('rb') out = sock.makefile('wb') out.write(b'HTTP/1.1 %d OK\r\nContent-Length: 0\r\n\r\n' % return_code) out.flush() self.assertEqual(inc.readline(), b'PUT /sda1/0/a/c/o HTTP/1.1\r\n') headers = HeaderKeyDict() line = bytes_to_wsgi(inc.readline()) while line and line != '\r\n': headers[line.split(':')[0]] = \ line.split(':')[1].strip() line = bytes_to_wsgi(inc.readline()) self.assertIn('x-container-timestamp', headers) self.assertIn('X-Backend-Storage-Policy-Index', headers) except BaseException as err: return err return None
def _delete_segments_bucket(self, req): """ Before delete bucket, delete segments bucket if existing. """ container = req.container_name + MULTIUPLOAD_SUFFIX marker = '' seg = '' try: resp = req.get_response(self.app, 'HEAD') if int(resp.sw_headers['X-Container-Object-Count']) > 0: raise BucketNotEmpty() # FIXME: This extra HEAD saves unexpected segment deletion # but if a complete multipart upload happen while cleanup # segment container below, completed object may be missing its # segments unfortunately. To be safer, it might be good # to handle if the segments can be deleted for each object. except NoSuchBucket: pass try: while True: # delete all segments resp = req.get_response(self.app, 'GET', container, query={ 'format': 'json', 'marker': marker }) segments = json.loads(resp.body) for seg in segments: try: req.get_response( self.app, 'DELETE', container, swob.bytes_to_wsgi(seg['name'].encode('utf8'))) except NoSuchKey: pass except InternalError: raise ServiceUnavailable() if segments: marker = seg['name'] else: break req.get_response(self.app, 'DELETE', container) except NoSuchBucket: return except (BucketNotEmpty, InternalError): raise ServiceUnavailable()
def translate_swift_to_s3(key, val): _key = swob.bytes_to_wsgi(swob.wsgi_to_bytes(key).lower()) def translate_meta_key(_key): if not _key.startswith('x-object-meta-'): return _key # Note that AWS allows user-defined metadata with underscores in the # header, while WSGI (and other protocols derived from CGI) does not # differentiate between an underscore and a dash. Fortunately, # eventlet exposes the raw headers from the client, so we could # translate '_' to '=5F' on the way in. Now, we translate back. return 'x-amz-meta-' + _key[14:].replace('=5f', '_') if _key.startswith('x-object-meta-'): return translate_meta_key(_key), val elif _key in ('content-length', 'content-type', 'content-range', 'content-encoding', 'content-disposition', 'content-language', 'etag', 'last-modified', 'x-robots-tag', 'cache-control', 'expires'): return key, val elif _key == 'x-object-version-id': return 'x-amz-version-id', val elif _key == 'x-copied-from-version-id': return 'x-amz-copy-source-version-id', val elif _key == 'x-backend-content-type' and \ val == DELETE_MARKER_CONTENT_TYPE: return 'x-amz-delete-marker', 'true' elif _key == 'access-control-expose-headers': exposed_headers = val.split(', ') exposed_headers.extend([ 'x-amz-request-id', 'x-amz-id-2', ]) return 'access-control-expose-headers', ', '.join( translate_meta_key(h) for h in exposed_headers) elif _key == 'access-control-allow-methods': methods = val.split(', ') try: methods.remove('COPY') # that's not a thing in S3 except ValueError: pass # not there? don't worry about it return key, ', '.join(methods) elif _key.startswith('access-control-'): return key, val # else, drop the header return None
def _delete_segments_bucket(self, req): """ Before delete bucket, delete segments bucket if existing. """ container = req.container_name + MULTIUPLOAD_SUFFIX marker = '' seg = '' try: resp = req.get_response(self.app, 'HEAD') if int(resp.sw_headers['X-Container-Object-Count']) > 0: raise BucketNotEmpty() # FIXME: This extra HEAD saves unexpected segment deletion # but if a complete multipart upload happen while cleanup # segment container below, completed object may be missing its # segments unfortunately. To be safer, it might be good # to handle if the segments can be deleted for each object. except NoSuchBucket: pass try: while True: # delete all segments resp = req.get_response(self.app, 'GET', container, query={'format': 'json', 'marker': marker}) segments = json.loads(resp.body) for seg in segments: try: req.get_response( self.app, 'DELETE', container, swob.bytes_to_wsgi(seg['name'].encode('utf8'))) except NoSuchKey: pass except InternalError: raise ServiceUnavailable() if segments: marker = seg['name'] else: break req.get_response(self.app, 'DELETE', container) except NoSuchBucket: return except (BucketNotEmpty, InternalError): raise ServiceUnavailable()
def _in_proxy_reverse_listing(self, account_name, lcontainer, lprefix, req, failed_marker, failed_listing): '''Get the complete prefix listing and reverse it on the proxy. This is only necessary if we encounter a response from a container-server that does not respect the ``reverse`` param included by default in ``_listing_pages_iter``. This may happen during rolling upgrades from pre-2.6.0 swift. :param failed_marker: the marker that was used when we encountered the non-reversed listing :param failed_listing: the non-reversed listing that was encountered. If ``failed_marker`` is blank, we can use this to save ourselves a request :returns: an iterator over all objects starting with ``lprefix`` (up to but not including the failed marker) in reverse order ''' complete_listing = [] if not failed_marker: # We've never gotten a reversed listing. So save a request and # use the failed listing. complete_listing.extend(failed_listing) marker = bytes_to_wsgi(complete_listing[-1]['name'].encode('utf8')) else: # We've gotten at least one reversed listing. Have to start at # the beginning. marker = '' # First, take the *entire* prefix listing into memory try: for page in self._listing_pages_iter(account_name, lcontainer, lprefix, req, marker, end_marker=failed_marker, reverse=False): complete_listing.extend(page) except ListingIterNotFound: pass # Now that we've got everything, return the whole listing as one giant # reversed page return reversed(complete_listing)
def GET(self, req): """ Handle GET Service request """ self.set_s3api_command(req, 'list-buckets') resp = req.get_response(self.app, query={'format': 'json'}) containers = json.loads(resp.body) containers = filter( lambda item: validate_bucket_name( item['name'], self.conf.dns_compliant_bucket_names), containers) # we don't keep the creation time of a bucket (s3cmd doesn't # work without that) so we use something bogus. elem = Element('ListAllMyBucketsResult') owner = SubElement(elem, 'Owner') SubElement(owner, 'ID').text = req.user_id SubElement(owner, 'DisplayName').text = req.user_id buckets = SubElement(elem, 'Buckets') for c in containers: if self.conf.s3_acl and self.conf.check_bucket_owner: container = bytes_to_wsgi(c['name'].encode('utf8')) try: req.get_response(self.app, 'HEAD', container) except AccessDenied: continue except NoSuchBucket: continue bucket = SubElement(buckets, 'Bucket') SubElement(bucket, 'Name').text = c['name'] SubElement(bucket, 'CreationDate').text = \ '2009-02-03T16:45:09.000Z' body = tostring(elem) return HTTPOk(content_type='application/xml', body=body)
def encrypt_header_val(crypto, value, key): """ Encrypt a header value using the supplied key. :param crypto: a Crypto instance :param value: value to encrypt :param key: crypto key to use :returns: a tuple of (encrypted value, crypto_meta) where crypto_meta is a dict of form returned by :py:func:`~swift.common.middleware.crypto.Crypto.get_crypto_meta` :raises ValueError: if value is empty """ if not value: raise ValueError('empty value is not acceptable') crypto_meta = crypto.create_crypto_meta() crypto_ctxt = crypto.create_encryption_ctxt(key, crypto_meta['iv']) enc_val = bytes_to_wsgi( base64.b64encode(crypto_ctxt.update(wsgi_to_bytes(value)))) return enc_val, crypto_meta
def GET(self, req): """ Handle GET Service request """ resp = req.get_response(self.app, query={'format': 'json'}) containers = json.loads(resp.body) containers = filter( lambda item: validate_bucket_name( item['name'], self.conf.dns_compliant_bucket_names), containers) # we don't keep the creation time of a bucket (s3cmd doesn't # work without that) so we use something bogus. elem = Element('ListAllMyBucketsResult') owner = SubElement(elem, 'Owner') SubElement(owner, 'ID').text = req.user_id SubElement(owner, 'DisplayName').text = req.user_id buckets = SubElement(elem, 'Buckets') for c in containers: if self.conf.s3_acl and self.conf.check_bucket_owner: container = bytes_to_wsgi(c['name'].encode('utf8')) try: req.get_response(self.app, 'HEAD', container) except AccessDenied: continue except NoSuchBucket: continue bucket = SubElement(buckets, 'Bucket') SubElement(bucket, 'Name').text = c['name'] SubElement(bucket, 'CreationDate').text = \ '2009-02-03T16:45:09.000Z' body = tostring(elem) return HTTPOk(content_type='application/xml', body=body)
def _in_proxy_reverse_listing(self, account_name, lcontainer, lprefix, req, failed_marker, failed_listing): '''Get the complete prefix listing and reverse it on the proxy. This is only necessary if we encounter a response from a container-server that does not respect the ``reverse`` param included by default in ``_listing_pages_iter``. This may happen during rolling upgrades from pre-2.6.0 swift. :param failed_marker: the marker that was used when we encountered the non-reversed listing :param failed_listing: the non-reversed listing that was encountered. If ``failed_marker`` is blank, we can use this to save ourselves a request :returns: an iterator over all objects starting with ``lprefix`` (up to but not including the failed marker) in reverse order ''' complete_listing = [] if not failed_marker: # We've never gotten a reversed listing. So save a request and # use the failed listing. complete_listing.extend(failed_listing) marker = bytes_to_wsgi(complete_listing[-1]['name'].encode('utf8')) else: # We've gotten at least one reversed listing. Have to start at # the beginning. marker = '' # First, take the *entire* prefix listing into memory try: for page in self._listing_pages_iter( account_name, lcontainer, lprefix, req, marker, end_marker=failed_marker, reverse=False): complete_listing.extend(page) except ListingIterNotFound: pass # Now that we've got everything, return the whole listing as one giant # reversed page return reversed(complete_listing)
def handle_obj_versions_delete_pop(self, req, versions_cont, api_version, account_name, container_name, object_name): """ Handle DELETE requests when in stack mode. Delete current version of object and pop previous version in its place. :param req: original request. :param versions_cont: container where previous versions of the object are stored. :param api_version: api version. :param account_name: account name. :param container_name: container name. :param object_name: object name. """ listing_prefix = self._build_versions_object_prefix(object_name) item_iter = self._listing_iter(account_name, versions_cont, listing_prefix, req) auth_token_header = {'X-Auth-Token': req.headers.get('X-Auth-Token')} authed = False for previous_version in item_iter: if not authed: # validate the write access to the versioned container before # making any backend requests if 'swift.authorize' in req.environ: container_info = get_container_info(req.environ, self.app, swift_source='VW') req.acl = container_info.get('write_acl') aresp = req.environ['swift.authorize'](req) if aresp: return aresp authed = True if previous_version['content_type'] == DELETE_MARKER_CONTENT_TYPE: # check whether we have data in the versioned container obj_head_headers = {'X-Newest': 'True'} obj_head_headers.update(auth_token_header) head_req = make_pre_authed_request(req.environ, path=wsgi_quote( req.path_info), method='HEAD', headers=obj_head_headers, swift_source='VW') hresp = head_req.get_response(self.app) drain_and_close(hresp) if hresp.status_int != HTTP_NOT_FOUND: self._check_response_error(req, hresp) # if there's an existing object, then just let the delete # through (i.e., restore to the delete-marker state): break # no data currently in the container (delete marker is current) for version_to_restore in item_iter: if version_to_restore['content_type'] == \ DELETE_MARKER_CONTENT_TYPE: # Nothing to restore break obj_to_restore = bytes_to_wsgi( version_to_restore['name'].encode('utf-8')) req.environ['QUERY_STRING'] = '' restored_path = self._restore_data( req, versions_cont, api_version, account_name, container_name, object_name, obj_to_restore) if not restored_path: continue old_del_req = make_pre_authed_request( req.environ, path=wsgi_quote(restored_path), method='DELETE', headers=auth_token_header, swift_source='VW') del_resp = old_del_req.get_response(self.app) drain_and_close(del_resp) if del_resp.status_int != HTTP_NOT_FOUND: self._check_response_error(req, del_resp) # else, well, it existed long enough to do the # copy; we won't worry too much break prev_obj_name = bytes_to_wsgi( previous_version['name'].encode('utf-8')) marker_path = "/%s/%s/%s/%s" % (api_version, account_name, versions_cont, prev_obj_name) # done restoring, redirect the delete to the marker req = make_pre_authed_request(req.environ, path=wsgi_quote(marker_path), method='DELETE', headers=auth_token_header, swift_source='VW') else: # there are older versions so copy the previous version to the # current object and delete the previous version prev_obj_name = bytes_to_wsgi( previous_version['name'].encode('utf-8')) req.environ['QUERY_STRING'] = '' restored_path = self._restore_data(req, versions_cont, api_version, account_name, container_name, object_name, prev_obj_name) if not restored_path: continue # redirect the original DELETE to the source of the reinstated # version object - we already auth'd original req so make a # pre-authed request req = make_pre_authed_request(req.environ, path=wsgi_quote(restored_path), method='DELETE', headers=auth_token_header, swift_source='VW') # remove 'X-If-Delete-At', since it is not for the older copy if 'X-If-Delete-At' in req.headers: del req.headers['X-If-Delete-At'] break # handle DELETE request here in case it was modified return req.get_response(self.app)
def updates(self): """ Handles the UPDATES step of an SSYNC request. Receives a set of PUT and DELETE subrequests that will be routed to the object server itself for processing. These contain the information requested by the MISSING_CHECK step. The PUT and DELETE subrequests are formatted pretty much exactly like regular HTTP requests, excepting the HTTP version on the first request line. The process is generally: 1. Sender sends `:UPDATES: START` and begins sending the PUT and DELETE subrequests. 2. Receiver gets `:UPDATES: START` and begins routing the subrequests to the object server. 3. Sender sends `:UPDATES: END`. 4. Receiver gets `:UPDATES: END` and sends `:UPDATES: START` and `:UPDATES: END` (assuming no errors). 5. Sender gets `:UPDATES: START` and `:UPDATES: END`. If too many subrequests fail, as configured by replication_failure_threshold and replication_failure_ratio, the receiver will hang up the request early so as to not waste any more time. At step 4, the receiver will send back an error if there were any failures (that didn't cause a hangup due to the above thresholds) so the sender knows the whole was not entirely a success. This is so the sender knows if it can remove an out of place partition, for example. """ with exceptions.MessageTimeout( self.app.client_timeout, 'updates start'): line = self.fp.readline(self.app.network_chunk_size) if line.strip() != b':UPDATES: START': raise Exception('Looking for :UPDATES: START got %r' % line[:1024]) successes = 0 failures = 0 while True: with exceptions.MessageTimeout( self.app.client_timeout, 'updates line'): line = self.fp.readline(self.app.network_chunk_size) if not line or line.strip() == b':UPDATES: END': break # Read first line METHOD PATH of subrequest. method, path = swob.bytes_to_wsgi(line.strip()).split(' ', 1) subreq = swob.Request.blank( '/%s/%s%s' % (self.device, self.partition, path), environ={'REQUEST_METHOD': method}) # Read header lines. content_length = None replication_headers = [] while True: with exceptions.MessageTimeout(self.app.client_timeout): line = self.fp.readline(self.app.network_chunk_size) if not line: raise Exception( 'Got no headers for %s %s' % (method, path)) line = line.strip() if not line: break header, value = swob.bytes_to_wsgi(line).split(':', 1) header = header.strip().lower() value = value.strip() subreq.headers[header] = value if header != 'etag': # make sure ssync doesn't cause 'Etag' to be added to # obj metadata in addition to 'ETag' which object server # sets (note capitalization) replication_headers.append(header) if header == 'content-length': content_length = int(value) # Establish subrequest body, if needed. if method in ('DELETE', 'POST'): if content_length not in (None, 0): raise Exception( '%s subrequest with content-length %s' % (method, path)) elif method == 'PUT': if content_length is None: raise Exception( 'No content-length sent for %s %s' % (method, path)) def subreq_iter(): left = content_length while left > 0: with exceptions.MessageTimeout( self.app.client_timeout, 'updates content'): chunk = self.fp.read( min(left, self.app.network_chunk_size)) if not chunk: raise exceptions.ChunkReadError( 'Early termination for %s %s' % (method, path)) left -= len(chunk) yield chunk subreq.environ['wsgi.input'] = utils.FileLikeIter( subreq_iter()) else: raise Exception('Invalid subrequest method %s' % method) subreq.headers['X-Backend-Storage-Policy-Index'] = int(self.policy) subreq.headers['X-Backend-Replication'] = 'True' if self.frag_index is not None: # primary node should not 409 if it has a non-primary fragment subreq.headers['X-Backend-Ssync-Frag-Index'] = self.frag_index if replication_headers: subreq.headers['X-Backend-Replication-Headers'] = \ ' '.join(replication_headers) # Route subrequest and translate response. resp = subreq.get_response(self.app) if http.is_success(resp.status_int) or \ resp.status_int == http.HTTP_NOT_FOUND: successes += 1 else: self.app.logger.warning( 'ssync subrequest failed with %s: %s %s' % (resp.status_int, method, subreq.path)) failures += 1 if failures >= self.app.replication_failure_threshold and ( not successes or float(failures) / successes > self.app.replication_failure_ratio): raise Exception( 'Too many %d failures to %d successes' % (failures, successes)) # The subreq may have failed, but we want to read the rest of the # body from the remote side so we can continue on with the next # subreq. for junk in subreq.environ['wsgi.input']: pass if failures: raise swob.HTTPInternalServerError( 'ERROR: With :UPDATES: %d failures to %d successes' % (failures, successes)) yield b':UPDATES: START\r\n' yield b':UPDATES: END\r\n'
def _get_from_shards(self, req, resp): # construct listing using shards described by the response body shard_ranges = [ShardRange.from_dict(data) for data in json.loads(resp.body)] self.app.logger.debug('GET listing from %s shards for: %s', len(shard_ranges), req.path_qs) if not shard_ranges: # can't find ranges or there was a problem getting the ranges. So # return what we have. return resp objects = [] req_limit = int(req.params.get('limit') or CONTAINER_LISTING_LIMIT) params = req.params.copy() params.pop('states', None) req.headers.pop('X-Backend-Record-Type', None) reverse = config_true_value(params.get('reverse')) marker = wsgi_to_str(params.get('marker')) end_marker = wsgi_to_str(params.get('end_marker')) prefix = wsgi_to_str(params.get('prefix')) limit = req_limit for shard_range in shard_ranges: params['limit'] = limit # Always set marker to ensure that object names less than or equal # to those already in the listing are not fetched; if the listing # is empty then the original request marker, if any, is used. This # allows misplaced objects below the expected shard range to be # included in the listing. if objects: last_name = objects[-1].get('name', objects[-1].get('subdir', u'')) params['marker'] = bytes_to_wsgi(last_name.encode('utf-8')) elif marker: params['marker'] = str_to_wsgi(marker) else: params['marker'] = '' # Always set end_marker to ensure that misplaced objects beyond the # expected shard range are not fetched. This prevents a misplaced # object obscuring correctly placed objects in the next shard # range. if end_marker and end_marker in shard_range: params['end_marker'] = str_to_wsgi(end_marker) elif reverse: params['end_marker'] = str_to_wsgi(shard_range.lower_str) else: params['end_marker'] = str_to_wsgi(shard_range.end_marker) if (shard_range.account == self.account_name and shard_range.container == self.container_name): # directed back to same container - force GET of objects headers = {'X-Backend-Record-Type': 'object'} else: headers = None if prefix: if prefix > shard_range: continue try: just_past = prefix[:-1] + chr(ord(prefix[-1]) + 1) except ValueError: pass else: if just_past < shard_range: continue self.app.logger.debug('Getting from %s %s with %s', shard_range, shard_range.name, headers) objs, shard_resp = self._get_container_listing( req, shard_range.account, shard_range.container, headers=headers, params=params) if not objs: # tolerate errors or empty shard containers continue objects.extend(objs) limit -= len(objs) if limit <= 0: break last_name = objects[-1].get('name', objects[-1].get('subdir', u'')) if six.PY2: last_name = last_name.encode('utf8') if end_marker and reverse and end_marker >= last_name: break if end_marker and not reverse and end_marker <= last_name: break resp.body = json.dumps(objects).encode('ascii') constrained = any(req.params.get(constraint) for constraint in ( 'marker', 'end_marker', 'path', 'prefix', 'delimiter')) if not constrained and len(objects) < req_limit: self.app.logger.debug('Setting object count to %s' % len(objects)) # prefer the actual listing stats over the potentially outdated # root stats. This condition is only likely when a sharded # container is shrinking or in tests; typically a sharded container # will have more than CONTAINER_LISTING_LIMIT objects so any # unconstrained listing will be capped by the limit and total # object stats cannot therefore be inferred from the listing. resp.headers['X-Container-Object-Count'] = len(objects) resp.headers['X-Container-Bytes-Used'] = sum( [o['bytes'] for o in objects]) return resp
def handle_obj_versions_delete_pop(self, req, versions_cont, api_version, account_name, container_name, object_name): """ Handle DELETE requests when in stack mode. Delete current version of object and pop previous version in its place. :param req: original request. :param versions_cont: container where previous versions of the object are stored. :param api_version: api version. :param account_name: account name. :param container_name: container name. :param object_name: object name. """ listing_prefix = self._build_versions_object_prefix(object_name) item_iter = self._listing_iter(account_name, versions_cont, listing_prefix, req) auth_token_header = {'X-Auth-Token': req.headers.get('X-Auth-Token')} authed = False for previous_version in item_iter: if not authed: # validate the write access to the versioned container before # making any backend requests if 'swift.authorize' in req.environ: container_info = get_container_info( req.environ, self.app) req.acl = container_info.get('write_acl') aresp = req.environ['swift.authorize'](req) if aresp: return aresp authed = True if previous_version['content_type'] == DELETE_MARKER_CONTENT_TYPE: # check whether we have data in the versioned container obj_head_headers = {'X-Newest': 'True'} obj_head_headers.update(auth_token_header) head_req = make_pre_authed_request( req.environ, path=wsgi_quote(req.path_info), method='HEAD', headers=obj_head_headers, swift_source='VW') hresp = head_req.get_response(self.app) close_if_possible(hresp.app_iter) if hresp.status_int != HTTP_NOT_FOUND: self._check_response_error(req, hresp) # if there's an existing object, then just let the delete # through (i.e., restore to the delete-marker state): break # no data currently in the container (delete marker is current) for version_to_restore in item_iter: if version_to_restore['content_type'] == \ DELETE_MARKER_CONTENT_TYPE: # Nothing to restore break obj_to_restore = bytes_to_wsgi( version_to_restore['name'].encode('utf-8')) restored_path = self._restore_data( req, versions_cont, api_version, account_name, container_name, object_name, obj_to_restore) if not restored_path: continue old_del_req = make_pre_authed_request( req.environ, path=wsgi_quote(restored_path), method='DELETE', headers=auth_token_header, swift_source='VW') del_resp = old_del_req.get_response(self.app) close_if_possible(del_resp.app_iter) if del_resp.status_int != HTTP_NOT_FOUND: self._check_response_error(req, del_resp) # else, well, it existed long enough to do the # copy; we won't worry too much break prev_obj_name = bytes_to_wsgi( previous_version['name'].encode('utf-8')) marker_path = "/%s/%s/%s/%s" % ( api_version, account_name, versions_cont, prev_obj_name) # done restoring, redirect the delete to the marker req = make_pre_authed_request( req.environ, path=wsgi_quote(marker_path), method='DELETE', headers=auth_token_header, swift_source='VW') else: # there are older versions so copy the previous version to the # current object and delete the previous version prev_obj_name = bytes_to_wsgi( previous_version['name'].encode('utf-8')) restored_path = self._restore_data( req, versions_cont, api_version, account_name, container_name, object_name, prev_obj_name) if not restored_path: continue # redirect the original DELETE to the source of the reinstated # version object - we already auth'd original req so make a # pre-authed request req = make_pre_authed_request( req.environ, path=wsgi_quote(restored_path), method='DELETE', headers=auth_token_header, swift_source='VW') # remove 'X-If-Delete-At', since it is not for the older copy if 'X-If-Delete-At' in req.headers: del req.headers['X-If-Delete-At'] break # handle DELETE request here in case it was modified return req.get_response(self.app)
def test_check_symlink_header_invalid_format(self): def do_test(headers, status, err_msg): req = Request.blank('/v1/a/c/o', method='PUT', headers=headers) with self.assertRaises(swob.HTTPException) as cm: symlink._check_symlink_header(req) self.assertEqual(cm.exception.status, status) self.assertEqual(cm.exception.body, err_msg) do_test({'X-Symlink-Target': '/c1/o1'}, '412 Precondition Failed', b'X-Symlink-Target header must be of the ' b'form <container name>/<object name>') do_test({'X-Symlink-Target': 'c1o1'}, '412 Precondition Failed', b'X-Symlink-Target header must be of the ' b'form <container name>/<object name>') do_test( { 'X-Symlink-Target': 'c1/o1', 'X-Symlink-Target-Account': '/another' }, '412 Precondition Failed', b'Account name cannot contain slashes') do_test( { 'X-Symlink-Target': 'c1/o1', 'X-Symlink-Target-Account': 'an/other' }, '412 Precondition Failed', b'Account name cannot contain slashes') # url encoded case do_test({'X-Symlink-Target': '%2Fc1%2Fo1'}, '412 Precondition Failed', b'X-Symlink-Target header must be of the ' b'form <container name>/<object name>') do_test( { 'X-Symlink-Target': 'c1/o1', 'X-Symlink-Target-Account': '%2Fanother' }, '412 Precondition Failed', b'Account name cannot contain slashes') do_test( { 'X-Symlink-Target': 'c1/o1', 'X-Symlink-Target-Account': 'an%2Fother' }, '412 Precondition Failed', b'Account name cannot contain slashes') # with multi-bytes do_test( { 'X-Symlink-Target': u'/\u30b0\u30e9\u30d6\u30eb/\u30a2\u30ba\u30ec\u30f3' }, '412 Precondition Failed', b'X-Symlink-Target header must be of the ' b'form <container name>/<object name>') target = u'/\u30b0\u30e9\u30d6\u30eb/\u30a2\u30ba\u30ec\u30f3' target = swob.bytes_to_wsgi(target.encode('utf8')) do_test({'X-Symlink-Target': swob.wsgi_quote(target)}, '412 Precondition Failed', b'X-Symlink-Target header must be of the ' b'form <container name>/<object name>') account = u'\u30b0\u30e9\u30d6\u30eb/\u30a2\u30ba\u30ec\u30f3' do_test( { 'X-Symlink-Target': 'c/o', 'X-Symlink-Target-Account': account }, '412 Precondition Failed', b'Account name cannot contain slashes') account = swob.bytes_to_wsgi(account.encode('utf8')) do_test( { 'X-Symlink-Target': 'c/o', 'X-Symlink-Target-Account': swob.wsgi_quote(account) }, '412 Precondition Failed', b'Account name cannot contain slashes')
def __init__(self, *args, **kwargs): swob.Response.__init__(self, *args, **kwargs) s3_sysmeta_headers = swob.HeaderKeyDict() sw_headers = swob.HeaderKeyDict() headers = HeaderKeyDict() self.is_slo = False def is_swift3_sysmeta(sysmeta_key, server_type): swift3_sysmeta_prefix = ( 'x-%s-sysmeta-swift3' % server_type).lower() return sysmeta_key.lower().startswith(swift3_sysmeta_prefix) def is_s3api_sysmeta(sysmeta_key, server_type): s3api_sysmeta_prefix = sysmeta_prefix(_server_type).lower() return sysmeta_key.lower().startswith(s3api_sysmeta_prefix) for key, val in self.headers.items(): if is_sys_meta('object', key) or is_sys_meta('container', key): _server_type = key.split('-')[1] if is_swift3_sysmeta(key, _server_type): # To be compatible with older swift3, translate swift3 # sysmeta to s3api sysmeta here key = sysmeta_prefix(_server_type) + \ key[len('x-%s-sysmeta-swift3-' % _server_type):] if key not in s3_sysmeta_headers: # To avoid overwrite s3api sysmeta by older swift3 # sysmeta set the key only when the key does not exist s3_sysmeta_headers[key] = val elif is_s3api_sysmeta(key, _server_type): s3_sysmeta_headers[key] = val else: sw_headers[key] = val else: sw_headers[key] = val # Handle swift headers for key, val in sw_headers.items(): _key = swob.bytes_to_wsgi(swob.wsgi_to_bytes(key).lower()) if _key.startswith('x-object-meta-'): # Note that AWS ignores user-defined headers with '=' in the # header name. We translated underscores to '=5F' on the way # in, though. headers['x-amz-meta-' + _key[14:].replace('=5f', '_')] = val elif _key in ('content-length', 'content-type', 'content-range', 'content-encoding', 'content-disposition', 'content-language', 'etag', 'last-modified', 'x-robots-tag', 'cache-control', 'expires'): headers[key] = val elif _key == 'x-object-version-id': headers['x-amz-version-id'] = val elif _key == 'x-copied-from-version-id': headers['x-amz-copy-source-version-id'] = val elif _key == 'x-static-large-object': # for delete slo self.is_slo = config_true_value(val) elif _key == 'x-backend-content-type' and \ val == DELETE_MARKER_CONTENT_TYPE: headers['x-amz-delete-marker'] = 'true' # Check whether we stored the AWS-style etag on upload override_etag = s3_sysmeta_headers.get( sysmeta_header('object', 'etag')) if override_etag not in (None, ''): # Multipart uploads in AWS have ETags like # <MD5(part_etag1 || ... || part_etagN)>-<number of parts> headers['etag'] = override_etag elif self.is_slo and 'etag' in headers: # Many AWS clients use the presence of a '-' to decide whether # to attempt client-side download validation, so even if we # didn't store the AWS-style header, tack on a '-N'. (Use 'N' # because we don't actually know how many parts there are.) headers['etag'] += '-N' self.headers = headers if self.etag: # add double quotes to the etag header self.etag = self.etag # Used for pure swift header handling at the request layer self.sw_headers = sw_headers self.sysmeta_headers = s3_sysmeta_headers
def _get_from_shards(self, req, resp): # Construct listing using shards described by the response body. # The history of containers that have returned shard ranges is # maintained in the request environ so that loops can be avoided by # forcing an object listing if the same container is visited again. # This can happen in at least two scenarios: # 1. a container has filled a gap in its shard ranges with a # shard range pointing to itself # 2. a root container returns a (stale) shard range pointing to a # shard that has shrunk into the root, in which case the shrunken # shard may return the root's shard range. shard_listing_history = req.environ.setdefault( 'swift.shard_listing_history', []) shard_listing_history.append((self.account_name, self.container_name)) shard_ranges = [ ShardRange.from_dict(data) for data in json.loads(resp.body) ] self.app.logger.debug('GET listing from %s shards for: %s', len(shard_ranges), req.path_qs) if not shard_ranges: # can't find ranges or there was a problem getting the ranges. So # return what we have. return resp objects = [] req_limit = constrain_req_limit(req, CONTAINER_LISTING_LIMIT) params = req.params.copy() params.pop('states', None) req.headers.pop('X-Backend-Record-Type', None) reverse = config_true_value(params.get('reverse')) marker = wsgi_to_str(params.get('marker')) end_marker = wsgi_to_str(params.get('end_marker')) prefix = wsgi_to_str(params.get('prefix')) limit = req_limit for i, shard_range in enumerate(shard_ranges): params['limit'] = limit # Always set marker to ensure that object names less than or equal # to those already in the listing are not fetched; if the listing # is empty then the original request marker, if any, is used. This # allows misplaced objects below the expected shard range to be # included in the listing. if objects: last_name = objects[-1].get('name', objects[-1].get('subdir', u'')) params['marker'] = bytes_to_wsgi(last_name.encode('utf-8')) elif marker: params['marker'] = str_to_wsgi(marker) else: params['marker'] = '' # Always set end_marker to ensure that misplaced objects beyond the # expected shard range are not fetched. This prevents a misplaced # object obscuring correctly placed objects in the next shard # range. if end_marker and end_marker in shard_range: params['end_marker'] = str_to_wsgi(end_marker) elif reverse: params['end_marker'] = str_to_wsgi(shard_range.lower_str) else: params['end_marker'] = str_to_wsgi(shard_range.end_marker) headers = {} if ((shard_range.account, shard_range.container) in shard_listing_history): # directed back to same container - force GET of objects headers['X-Backend-Record-Type'] = 'object' if config_true_value(req.headers.get('x-newest', False)): headers['X-Newest'] = 'true' if prefix: if prefix > shard_range: continue try: just_past = prefix[:-1] + chr(ord(prefix[-1]) + 1) except ValueError: pass else: if just_past < shard_range: continue self.app.logger.debug( 'Getting listing part %d from shard %s %s with %s', i, shard_range, shard_range.name, headers) objs, shard_resp = self._get_container_listing( req, shard_range.account, shard_range.container, headers=headers, params=params) sharding_state = shard_resp.headers.get('x-backend-sharding-state', 'unknown') if objs is None: # tolerate errors self.app.logger.debug( 'Failed to get objects from shard (state=%s), total = %d', sharding_state, len(objects)) continue self.app.logger.debug( 'Found %d objects in shard (state=%s), total = %d', len(objs), sharding_state, len(objs) + len(objects)) if not objs: # tolerate empty shard containers continue objects.extend(objs) limit -= len(objs) if limit <= 0: break last_name = objects[-1].get('name', objects[-1].get('subdir', u'')) if six.PY2: last_name = last_name.encode('utf8') if end_marker and reverse and end_marker >= last_name: break if end_marker and not reverse and end_marker <= last_name: break resp.body = json.dumps(objects).encode('ascii') constrained = any( req.params.get(constraint) for constraint in ('marker', 'end_marker', 'path', 'prefix', 'delimiter')) if not constrained and len(objects) < req_limit: self.app.logger.debug('Setting object count to %s' % len(objects)) # prefer the actual listing stats over the potentially outdated # root stats. This condition is only likely when a sharded # container is shrinking or in tests; typically a sharded container # will have more than CONTAINER_LISTING_LIMIT objects so any # unconstrained listing will be capped by the limit and total # object stats cannot therefore be inferred from the listing. resp.headers['X-Container-Object-Count'] = len(objects) resp.headers['X-Container-Bytes-Used'] = sum( [o['bytes'] for o in objects]) return resp
def setup_objects(self): self.objects = (('lily', '2011-01-05T02:19:14.275290', '0', '3909'), ('rose', '2011-01-05T02:19:14.275290', 0, 303), ('viola', '2011-01-05T02:19:14.275290', '0', 3909), (u'lily-\u062a', '2011-01-05T02:19:14.275290', 0, 390), ('mu', '2011-01-05T02:19:14.275290', 'md5-of-the-manifest; s3_etag=0', '3909'), ('slo', '2011-01-05T02:19:14.275290', 'md5-of-the-manifest', '3909'), ('with space', '2011-01-05T02:19:14.275290', 0, 390), ('with%20space', '2011-01-05T02:19:14.275290', 0, 390)) objects = [ {'name': item[0], 'last_modified': str(item[1]), 'hash': str(item[2]), 'bytes': str(item[3])} for item in self.objects] objects[5]['slo_etag'] = '"0"' object_list = json.dumps(objects) self.prefixes = ['rose', 'viola', 'lily'] object_list_subdir = [{"subdir": p} for p in self.prefixes] self.swift.register('DELETE', '/v1/AUTH_test/bucket+segments', swob.HTTPNoContent, {}, json.dumps([])) for name, _, _, _ in self.objects: self.swift.register( 'DELETE', '/v1/AUTH_test/bucket+segments/' + swob.bytes_to_wsgi(name.encode('utf-8')), swob.HTTPNoContent, {}, json.dumps([])) self.swift.register( 'GET', '/v1/AUTH_test/bucket+segments?format=json&marker=with%2520space', swob.HTTPOk, {'Content-Type': 'application/json; charset=utf-8'}, json.dumps([])) self.swift.register( 'GET', '/v1/AUTH_test/bucket+segments?format=json&marker=', swob.HTTPOk, {'Content-Type': 'application/json'}, object_list) self.swift.register( 'HEAD', '/v1/AUTH_test/junk', swob.HTTPNoContent, {}, None) self.swift.register( 'HEAD', '/v1/AUTH_test/nojunk', swob.HTTPNotFound, {}, None) self.swift.register( 'GET', '/v1/AUTH_test/junk', swob.HTTPOk, {'Content-Type': 'application/json'}, object_list) self.swift.register( 'GET', '/v1/AUTH_test/junk?delimiter=a&format=json&limit=3&marker=viola', swob.HTTPOk, {'Content-Type': 'application/json; charset=utf-8'}, json.dumps(objects[2:])) self.swift.register( 'GET', '/v1/AUTH_test/junk-subdir', swob.HTTPOk, {'Content-Type': 'application/json; charset=utf-8'}, json.dumps(object_list_subdir)) self.swift.register( 'GET', '/v1/AUTH_test/subdirs?delimiter=/&format=json&limit=3', swob.HTTPOk, {}, json.dumps([ {'subdir': 'nothing/'}, {'subdir': u'but-\u062a/'}, {'subdir': 'subdirs/'}, ]))
def handle_extract_iter(self, req, compress_type, out_content_type='text/plain'): """ A generator that can be assigned to a swob Response's app_iter which, when iterated over, will extract and PUT the objects pulled from the request body. Will occasionally yield whitespace while request is being processed. When the request is completed will yield a response body that can be parsed to determine success. See above documentation for details. :params req: a swob Request :params compress_type: specifying the compression type of the tar. Accepts '', 'gz', or 'bz2' """ resp_dict = {'Response Status': HTTPCreated().status, 'Response Body': '', 'Number Files Created': 0} failed_files = [] last_yield = time() if out_content_type and out_content_type.endswith('/xml'): to_yield = b'<?xml version="1.0" encoding="UTF-8"?>\n' else: to_yield = b' ' separator = b'' containers_accessed = set() req.environ['eventlet.minimum_write_chunk_size'] = 0 try: if not out_content_type: raise HTTPNotAcceptable(request=req) if req.content_length is None and \ req.headers.get('transfer-encoding', '').lower() != 'chunked': raise HTTPLengthRequired(request=req) try: vrs, account, extract_base = req.split_path(2, 3, True) except ValueError: raise HTTPNotFound(request=req) extract_base = extract_base or '' extract_base = extract_base.rstrip('/') tar = tarfile.open(mode='r|' + compress_type, fileobj=req.body_file) failed_response_type = HTTPBadRequest containers_created = 0 while True: if last_yield + self.yield_frequency < time(): last_yield = time() yield to_yield to_yield, separator = b' ', b'\r\n\r\n' tar_info = tar.next() if tar_info is None or \ len(failed_files) >= self.max_failed_extractions: break if tar_info.isfile(): obj_path = tar_info.name if not six.PY2: obj_path = obj_path.encode('utf-8', 'surrogateescape') obj_path = bytes_to_wsgi(obj_path) if obj_path.startswith('./'): obj_path = obj_path[2:] obj_path = obj_path.lstrip('/') if extract_base: obj_path = extract_base + '/' + obj_path if '/' not in obj_path: continue # ignore base level file destination = '/'.join( ['', vrs, account, obj_path]) container = obj_path.split('/', 1)[0] if not constraints.check_utf8(wsgi_to_str(destination)): failed_files.append( [wsgi_quote(obj_path[:self.max_path_length]), HTTPPreconditionFailed().status]) continue if tar_info.size > constraints.MAX_FILE_SIZE: failed_files.append([ wsgi_quote(obj_path[:self.max_path_length]), HTTPRequestEntityTooLarge().status]) continue container_failure = None if container not in containers_accessed: cont_path = '/'.join(['', vrs, account, container]) try: if self.create_container(req, cont_path): containers_created += 1 if containers_created > self.max_containers: raise HTTPBadRequest( 'More than %d containers to create ' 'from tar.' % self.max_containers) except CreateContainerError as err: # the object PUT to this container still may # succeed if acls are set container_failure = [ wsgi_quote(cont_path[:self.max_path_length]), err.status] if err.status_int == HTTP_UNAUTHORIZED: raise HTTPUnauthorized(request=req) except ValueError: failed_files.append([ wsgi_quote(obj_path[:self.max_path_length]), HTTPBadRequest().status]) continue tar_file = tar.extractfile(tar_info) create_headers = { 'Content-Length': tar_info.size, 'X-Auth-Token': req.headers.get('X-Auth-Token'), } create_obj_req = make_subrequest( req.environ, method='PUT', path=wsgi_quote(destination), headers=create_headers, agent='%(orig)s BulkExpand', swift_source='EA') create_obj_req.environ['wsgi.input'] = tar_file for pax_key, pax_value in tar_info.pax_headers.items(): header_name = pax_key_to_swift_header(pax_key) if header_name: # Both pax_key and pax_value are unicode # strings; the key is already UTF-8 encoded, but # we still have to encode the value. create_obj_req.headers[header_name] = \ pax_value.encode("utf-8") resp = create_obj_req.get_response(self.app) containers_accessed.add(container) if resp.is_success: resp_dict['Number Files Created'] += 1 else: if container_failure: failed_files.append(container_failure) if resp.status_int == HTTP_UNAUTHORIZED: failed_files.append([ wsgi_quote(obj_path[:self.max_path_length]), HTTPUnauthorized().status]) raise HTTPUnauthorized(request=req) if resp.status_int // 100 == 5: failed_response_type = HTTPBadGateway failed_files.append([ wsgi_quote(obj_path[:self.max_path_length]), resp.status]) if failed_files: resp_dict['Response Status'] = failed_response_type().status elif not resp_dict['Number Files Created']: resp_dict['Response Status'] = HTTPBadRequest().status resp_dict['Response Body'] = 'Invalid Tar File: No Valid Files' except HTTPException as err: resp_dict['Response Status'] = err.status resp_dict['Response Body'] = err.body.decode('utf-8') except (tarfile.TarError, zlib.error) as tar_error: resp_dict['Response Status'] = HTTPBadRequest().status resp_dict['Response Body'] = 'Invalid Tar File: %s' % tar_error except Exception: self.logger.exception('Error in extract archive.') resp_dict['Response Status'] = HTTPServerError().status yield separator + get_response_body( out_content_type, resp_dict, failed_files, 'extract')
def handle_extract_iter(self, req, compress_type, out_content_type='text/plain'): """ A generator that can be assigned to a swob Response's app_iter which, when iterated over, will extract and PUT the objects pulled from the request body. Will occasionally yield whitespace while request is being processed. When the request is completed will yield a response body that can be parsed to determine success. See above documentation for details. :params req: a swob Request :params compress_type: specifying the compression type of the tar. Accepts '', 'gz', or 'bz2' """ resp_dict = { 'Response Status': HTTPCreated().status, 'Response Body': '', 'Number Files Created': 0 } failed_files = [] last_yield = time() if out_content_type and out_content_type.endswith('/xml'): to_yield = b'<?xml version="1.0" encoding="UTF-8"?>\n' else: to_yield = b' ' separator = b'' containers_accessed = set() req.environ['eventlet.minimum_write_chunk_size'] = 0 try: if not out_content_type: raise HTTPNotAcceptable(request=req) if req.content_length is None and \ req.headers.get('transfer-encoding', '').lower() != 'chunked': raise HTTPLengthRequired(request=req) try: vrs, account, extract_base = req.split_path(2, 3, True) except ValueError: raise HTTPNotFound(request=req) extract_base = extract_base or '' extract_base = extract_base.rstrip('/') tar = tarfile.open(mode='r|' + compress_type, fileobj=req.body_file) failed_response_type = HTTPBadRequest containers_created = 0 while True: if last_yield + self.yield_frequency < time(): last_yield = time() yield to_yield to_yield, separator = b' ', b'\r\n\r\n' tar_info = tar.next() if tar_info is None or \ len(failed_files) >= self.max_failed_extractions: break if tar_info.isfile(): obj_path = tar_info.name if not six.PY2: obj_path = obj_path.encode('utf-8', 'surrogateescape') obj_path = bytes_to_wsgi(obj_path) if obj_path.startswith('./'): obj_path = obj_path[2:] obj_path = obj_path.lstrip('/') if extract_base: obj_path = extract_base + '/' + obj_path if '/' not in obj_path: continue # ignore base level file destination = '/'.join(['', vrs, account, obj_path]) container = obj_path.split('/', 1)[0] if not constraints.check_utf8(wsgi_to_str(destination)): failed_files.append([ wsgi_quote(obj_path[:self.max_path_length]), HTTPPreconditionFailed().status ]) continue if tar_info.size > constraints.MAX_FILE_SIZE: failed_files.append([ wsgi_quote(obj_path[:self.max_path_length]), HTTPRequestEntityTooLarge().status ]) continue container_failure = None if container not in containers_accessed: cont_path = '/'.join(['', vrs, account, container]) try: if self.create_container(req, cont_path): containers_created += 1 if containers_created > self.max_containers: raise HTTPBadRequest( 'More than %d containers to create ' 'from tar.' % self.max_containers) except CreateContainerError as err: # the object PUT to this container still may # succeed if acls are set container_failure = [ wsgi_quote(cont_path[:self.max_path_length]), err.status ] if err.status_int == HTTP_UNAUTHORIZED: raise HTTPUnauthorized(request=req) except ValueError: failed_files.append([ wsgi_quote(obj_path[:self.max_path_length]), HTTPBadRequest().status ]) continue tar_file = tar.extractfile(tar_info) create_headers = { 'Content-Length': tar_info.size, 'X-Auth-Token': req.headers.get('X-Auth-Token'), } # Copy some whitelisted headers to the subrequest for k, v in req.headers.items(): if ((k.lower() in ('x-delete-at', 'x-delete-after')) or is_user_meta('object', k)): create_headers[k] = v create_obj_req = make_subrequest( req.environ, method='PUT', path=wsgi_quote(destination), headers=create_headers, agent='%(orig)s BulkExpand', swift_source='EA') create_obj_req.environ['wsgi.input'] = tar_file for pax_key, pax_value in tar_info.pax_headers.items(): header_name = pax_key_to_swift_header(pax_key) if header_name: # Both pax_key and pax_value are unicode # strings; the key is already UTF-8 encoded, but # we still have to encode the value. create_obj_req.headers[header_name] = \ pax_value.encode("utf-8") resp = create_obj_req.get_response(self.app) containers_accessed.add(container) if resp.is_success: resp_dict['Number Files Created'] += 1 else: if container_failure: failed_files.append(container_failure) if resp.status_int == HTTP_UNAUTHORIZED: failed_files.append([ wsgi_quote(obj_path[:self.max_path_length]), HTTPUnauthorized().status ]) raise HTTPUnauthorized(request=req) if resp.status_int // 100 == 5: failed_response_type = HTTPBadGateway failed_files.append([ wsgi_quote(obj_path[:self.max_path_length]), resp.status ]) if failed_files: resp_dict['Response Status'] = failed_response_type().status elif not resp_dict['Number Files Created']: resp_dict['Response Status'] = HTTPBadRequest().status resp_dict['Response Body'] = 'Invalid Tar File: No Valid Files' except HTTPException as err: resp_dict['Response Status'] = err.status resp_dict['Response Body'] = err.body.decode('utf-8') except (tarfile.TarError, zlib.error) as tar_error: resp_dict['Response Status'] = HTTPBadRequest().status resp_dict['Response Body'] = 'Invalid Tar File: %s' % tar_error except Exception: self.logger.exception('Error in extract archive.') resp_dict['Response Status'] = HTTPServerError().status yield separator + get_response_body(out_content_type, resp_dict, failed_files, 'extract')
def mkstr(prefix): return bytes_to_wsgi((prefix + u'\U0001f44d').encode('utf8'))