def PUT(self, app): if not self.acl_checked: resp = self._handle_acl(app, 'HEAD', obj='') req_acl = ACL.from_headers(self.req.headers, resp.bucket_acl.owner, Owner(self.user_id, self.user_id)) acl_headers = encode_acl('object', req_acl) self.req.headers[sysmeta_header('object', 'tmpacl')] = \ acl_headers[sysmeta_header('object', 'acl')] self.acl_checked = True
def POST(self, req): """ Handles Initiate Multipart Upload. """ # Create a unique S3 upload id from UUID to avoid duplicates. upload_id = unique_id() seg_container = req.container_name + MULTIUPLOAD_SUFFIX content_type = req.headers.get('Content-Type') if content_type: req.headers[sysmeta_header('object', 'has-content-type')] = 'yes' req.headers[sysmeta_header('object', 'content-type')] = content_type else: req.headers[sysmeta_header('object', 'has-content-type')] = 'no' req.headers['Content-Type'] = 'application/directory' try: seg_req = copy.copy(req) seg_req.environ = copy.copy(req.environ) seg_req.container_name = seg_container seg_req.get_container_info(self.app) except NoSuchBucket: try: # multi-upload bucket doesn't exist, create one with # same storage policy and acls as the primary bucket info = req.get_container_info(self.app) policy_name = POLICIES[info['storage_policy']].name hdrs = {'X-Storage-Policy': policy_name} if info.get('read_acl'): hdrs['X-Container-Read'] = info['read_acl'] if info.get('write_acl'): hdrs['X-Container-Write'] = info['write_acl'] seg_req.get_response(self.app, 'PUT', seg_container, '', headers=hdrs) except (BucketAlreadyExists, BucketAlreadyOwnedByYou): pass obj = '%s/%s' % (req.object_name, upload_id) req.headers.pop('Etag', None) req.headers.pop('Content-Md5', None) req.get_response(self.app, 'PUT', seg_container, obj, body='') result_elem = Element('InitiateMultipartUploadResult') SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'UploadId').text = upload_id body = tostring(result_elem) return HTTPOk(body=body, content_type='application/xml')
def PUT(self, app): if not self.acl_checked: resp = self._handle_acl(app, 'HEAD', obj='') req_acl = ACL.from_headers(self.req.headers, resp.bucket_acl.owner, Owner(self.user_id, self.user_id)) acl_headers = encode_acl('object', req_acl) self.req.headers[sysmeta_header('object', 'tmpacl')] = \ acl_headers[sysmeta_header('object', 'acl')] self.acl_checked = True
def PUT(self, app): container = self.req.container_name + MULTIUPLOAD_SUFFIX obj = '%s/%s' % (self.obj, self.req.params['uploadId']) self.req.environ['oio.ephemeral_object'] = True try: resp = self.req._get_response(app, 'HEAD', container, obj) finally: self.req.environ['oio.ephemeral_object'] = False self.req.headers[sysmeta_header('object', 'acl')] = \ resp.sysmeta_headers.get(sysmeta_header('object', 'tmpacl'))
def test_fake_swift_sysmeta(self): swift = FakeSwift() orig_headers = HeaderKeyDict() orig_headers.update({sysmeta_header('container', 'acl'): 'test', 'x-container-meta-foo': 'bar'}) swift.register(self.method, self.path, MagicMock(), orig_headers, None) self._check_headers(swift, self.method, self.path, orig_headers) new_headers = orig_headers.copy() del new_headers[sysmeta_header('container', 'acl').title()] swift.register(self.method, self.path, MagicMock(), new_headers, None) self._check_headers(swift, self.method, self.path, orig_headers)
def _get_upload_info(req, app, upload_id): container = req.container_name + MULTIUPLOAD_SUFFIX obj = '%s/%s' % (req.object_name, upload_id) # XXX: if we leave the copy-source header, somewhere later we might # drop in a ?version-id=... query string that's utterly inappropriate # for the upload marker. Until we get around to fixing that, just pop # it off for now... copy_source = req.headers.pop('X-Amz-Copy-Source', None) try: return req.get_response(app, 'HEAD', container=container, obj=obj) except NoSuchKey: try: resp = req.get_response(app, 'HEAD') if resp.sysmeta_headers.get(sysmeta_header( 'object', 'upload-id')) == upload_id: return resp except NoSuchKey: pass raise NoSuchUpload(upload_id=upload_id) finally: # ...making sure to restore any copy-source before returning if copy_source is not None: req.headers['X-Amz-Copy-Source'] = copy_source
def GETorHEAD(self, req): had_match = False for match_header in ('if-match', 'if-none-match'): if match_header not in req.headers: continue had_match = True for value in list_from_csv(req.headers[match_header]): value = normalize_etag(value) if value.endswith('-N'): # Deal with fake S3-like etags for SLOs uploaded via Swift req.headers[match_header] += ', ' + value[:-2] if had_match: # Update where to look update_etag_is_at_header(req, sysmeta_header('object', 'etag')) resp = req.get_response(self.app) if req.method == 'HEAD': resp.app_iter = None for key in ('content-type', 'content-language', 'expires', 'cache-control', 'content-disposition', 'content-encoding'): if 'response-' + key in req.params: resp.headers[key] = req.params['response-' + key] return resp
def test_fake_swift_sysmeta(self): swift = FakeSwift() orig_headers = HeaderKeyDict() orig_headers.update({ sysmeta_header('container', 'acl'): 'test', 'x-container-meta-foo': 'bar' }) swift.register(self.method, self.path, MagicMock(), orig_headers, None) self._check_headers(swift, self.method, self.path, orig_headers) new_headers = orig_headers.copy() del new_headers[sysmeta_header('container', 'acl').title()] swift.register(self.method, self.path, MagicMock(), new_headers, None) self._check_headers(swift, self.method, self.path, orig_headers)
def test_encode_acl_object(self): acl = ACLPrivate(Owner(id='test:tester', name='test:tester')) acp = encode_acl('object', acl) header_value = json.loads(acp[sysmeta_header('object', 'acl')]) self.assertTrue('Owner' in header_value) self.assertTrue('Grant' in header_value) self.assertEqual('test:tester', header_value['Owner']) self.assertEqual(len(header_value['Grant']), 1)
def POST(self, req): """ Handles Initiate Multipart Upload. """ # Create a unique S3 upload id from UUID to avoid duplicates. upload_id = unique_id() orig_container = req.container_name seg_container = orig_container + MULTIUPLOAD_SUFFIX content_type = req.headers.get('Content-Type') if content_type: req.headers[sysmeta_header('object', 'has-content-type')] = 'yes' req.headers[sysmeta_header('object', 'content-type')] = content_type else: req.headers[sysmeta_header('object', 'has-content-type')] = 'no' req.headers['Content-Type'] = 'application/directory' try: req.container_name = seg_container req.get_container_info(self.app) except NoSuchBucket: try: req.get_response(self.app, 'PUT', seg_container, '') except (BucketAlreadyExists, BucketAlreadyOwnedByYou): pass finally: req.container_name = orig_container obj = '%s/%s' % (req.object_name, upload_id) req.headers.pop('Etag', None) req.headers.pop('Content-Md5', None) req.get_response(self.app, 'PUT', seg_container, obj, body='') result_elem = Element('InitiateMultipartUploadResult') SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'UploadId').text = upload_id body = tostring(result_elem) return HTTPOk(body=body, content_type='application/xml')
def test_encode_acl_object(self): acl = ACLPrivate(Owner(id='test:tester', name='test:tester')) acp = encode_acl('object', acl) header_value = json.loads(acp[sysmeta_header('object', 'acl')]) self.assertTrue('Owner' in header_value) self.assertTrue('Grant' in header_value) self.assertEqual('test:tester', header_value['Owner']) self.assertEqual(len(header_value['Grant']), 1)
def POST(self, req): """ Handles Initiate Multipart Upload. """ # Create a unique S3 upload id from UUID to avoid duplicates. upload_id = unique_id() container = req.container_name + MULTIUPLOAD_SUFFIX content_type = req.headers.get('Content-Type') if content_type: req.headers[sysmeta_header('object', 'has-content-type')] = 'yes' req.headers[ sysmeta_header('object', 'content-type')] = content_type else: req.headers[sysmeta_header('object', 'has-content-type')] = 'no' req.headers['Content-Type'] = 'application/directory' try: req.get_response(self.app, 'PUT', container, '') except (BucketAlreadyExists, BucketAlreadyOwnedByYou): pass obj = '%s/%s' % (req.object_name, upload_id) req.headers.pop('Etag', None) req.headers.pop('Content-Md5', None) req.get_response(self.app, 'PUT', container, obj, body='') result_elem = Element('InitiateMultipartUploadResult') SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'UploadId').text = upload_id body = tostring(result_elem) return HTTPOk(body=body, content_type='application/xml')
def test_decode_acl_object(self): access_control_policy = \ {'Owner': 'test:tester', 'Grant': [{'Permission': 'FULL_CONTROL', 'Grantee': 'test:tester'}]} headers = {sysmeta_header('object', 'acl'): json.dumps(access_control_policy)} acl = decode_acl('object', headers, self.allow_no_owner) self.assertEqual(type(acl), ACL) self.assertEqual(acl.owner.id, 'test:tester') self.assertEqual(len(acl.grants), 1) self.assertEqual(str(acl.grants[0].grantee), 'test:tester') self.assertEqual(acl.grants[0].permission, 'FULL_CONTROL')
def test_decode_acl_object(self): access_control_policy = \ {'Owner': 'test:tester', 'Grant': [{'Permission': 'FULL_CONTROL', 'Grantee': 'test:tester'}]} headers = { sysmeta_header('object', 'acl'): json.dumps(access_control_policy) } acl = decode_acl('object', headers, self.allow_no_owner) self.assertEqual(type(acl), ACL) self.assertEqual(acl.owner.id, 'test:tester') self.assertEqual(len(acl.grants), 1) self.assertEqual(str(acl.grants[0].grantee), 'test:tester') self.assertEqual(acl.grants[0].permission, 'FULL_CONTROL')
def GETorHEAD(self, req): had_match = False for match_header in ('if-match', 'if-none-match'): if match_header not in req.headers: continue had_match = True for value in list_from_csv(req.headers[match_header]): value = normalize_etag(value) if value.endswith('-N'): # Deal with fake S3-like etags for SLOs uploaded via Swift req.headers[match_header] += ', ' + value[:-2] if had_match: # Update where to look update_etag_is_at_header(req, sysmeta_header('object', 'etag')) object_name = req.object_name version_id = version_id_param(req) query = {} if version_id is None else {'version-id': version_id} if version_id not in ('null', None): container_info = req.get_container_info(self.app) if not container_info.get( 'sysmeta', {}).get('versions-container', ''): # Versioning has never been enabled raise NoSuchVersion(object_name, version_id) cors_rule = None if req.headers.get('Origin'): cors_rule = get_cors(self.app, req, req.method, req.headers.get('Origin')) resp = req.get_response(self.app, query=query) if req.method == 'HEAD': resp.app_iter = None if 'x-amz-meta-deleted' in resp.headers: raise NoSuchKey(object_name) for key in ('content-type', 'content-language', 'expires', 'cache-control', 'content-disposition', 'content-encoding'): if 'response-' + key in req.params: resp.headers[key] = req.params['response-' + key] if cors_rule is not None: cors_fill_headers(req, resp, cors_rule) return resp
def test_encode_acl_many_grant(self): headers = {} users = [] for i in range(0, 99): users.append('id=test:tester%s' % str(i)) users = ','.join(users) headers['x-amz-grant-read'] = users acl = ACL.from_headers(headers, Owner('test:tester', 'test:tester')) acp = encode_acl('container', acl) header_value = acp[sysmeta_header('container', 'acl')] header_value = json.loads(header_value) self.assertTrue('Owner' in header_value) self.assertTrue('Grant' in header_value) self.assertEqual('test:tester', header_value['Owner']) self.assertEqual(len(header_value['Grant']), 99)
def test_encode_acl_many_grant(self): headers = {} users = [] for i in range(0, 99): users.append('id=test:tester%s' % str(i)) users = ','.join(users) headers['x-amz-grant-read'] = users acl = ACL.from_headers(headers, Owner('test:tester', 'test:tester')) acp = encode_acl('container', acl) header_value = acp[sysmeta_header('container', 'acl')] header_value = json.loads(header_value) self.assertTrue('Owner' in header_value) self.assertTrue('Grant' in header_value) self.assertEqual('test:tester', header_value['Owner']) self.assertEqual(len(header_value['Grant']), 99)
def decode_acl(resource, headers, allow_no_owner): """ Decode Swift metadata to an ACL instance. Given a resource type and HTTP headers, this method returns an ACL instance. """ value = '' key = sysmeta_header(resource, 'acl') if key in headers: value = headers[key] if value == '': # Fix me: In the case of value is empty or not dict instance, # I want an instance of Owner as None. # However, in the above process would occur error in reference # to an instance variable of Owner. return ACL(Owner(None, None), [], True, allow_no_owner) try: encode_value = json.loads(value) if not isinstance(encode_value, dict): return ACL(Owner(None, None), [], True, allow_no_owner) id = None name = None grants = [] if 'Owner' in encode_value: id = encode_value['Owner'] name = encode_value['Owner'] if 'Grant' in encode_value: for grant in encode_value['Grant']: grantee = None # pylint: disable-msg=E1101 for group in Group.__subclasses__(): if group.__name__ == grant['Grantee']: grantee = group() if not grantee: grantee = User(grant['Grantee']) permission = grant['Permission'] grants.append(Grant(grantee, permission)) return ACL(Owner(id, name), grants, True, allow_no_owner) except Exception as e: raise InvalidSubresource((resource, 'acl', value), e)
def decode_acl(resource, headers, allow_no_owner): """ Decode Swift metadata to an ACL instance. Given a resource type and HTTP headers, this method returns an ACL instance. """ value = '' key = sysmeta_header(resource, 'acl') if key in headers: value = headers[key] if value == '': # Fix me: In the case of value is empty or not dict instance, # I want an instance of Owner as None. # However, in the above process would occur error in reference # to an instance variable of Owner. return ACL(Owner(None, None), [], True, allow_no_owner) try: encode_value = json.loads(value) if not isinstance(encode_value, dict): return ACL(Owner(None, None), [], True, allow_no_owner) id = None name = None grants = [] if 'Owner' in encode_value: id = encode_value['Owner'] name = encode_value['Owner'] if 'Grant' in encode_value: for grant in encode_value['Grant']: grantee = None # pylint: disable-msg=E1101 for group in Group.__subclasses__(): if group.__name__ == grant['Grantee']: grantee = group() if not grantee: grantee = User(grant['Grantee']) permission = grant['Permission'] grants.append(Grant(grantee, permission)) return ACL(Owner(id, name), grants, True, allow_no_owner) except Exception as e: raise InvalidSubresource((resource, 'acl', value), e)
def GETorHEAD(self, req): if any(match_header in req.headers for match_header in ('if-match', 'if-none-match')): # Update where to look update_etag_is_at_header(req, sysmeta_header('object', 'etag')) resp = req.get_response(self.app) if req.method == 'HEAD': resp.app_iter = None for key in ('content-type', 'content-language', 'expires', 'cache-control', 'content-disposition', 'content-encoding'): if 'response-' + key in req.params: resp.headers[key] = req.params['response-' + key] return resp
def encode_acl(resource, acl): """ Encode an ACL instance to Swift metadata. Given a resource type and an ACL instance, this method returns HTTP headers, which can be used for Swift metadata. """ header_value = {"Owner": acl.owner.id} grants = [] for grant in acl.grants: grant = {"Permission": grant.permission, "Grantee": str(grant.grantee)} grants.append(grant) header_value.update({"Grant": grants}) headers = {} key = sysmeta_header(resource, 'acl') headers[key] = json.dumps(header_value, separators=(',', ':')) return headers
def encode_acl(resource, acl): """ Encode an ACL instance to Swift metadata. Given a resource type and an ACL instance, this method returns HTTP headers, which can be used for Swift metadata. """ header_value = {"Owner": acl.owner.id} grants = [] for grant in acl.grants: grant = {"Permission": grant.permission, "Grantee": str(grant.grantee)} grants.append(grant) header_value.update({"Grant": grants}) headers = {} key = sysmeta_header(resource, 'acl') headers[key] = json.dumps(header_value, separators=(',', ':')) return headers
def GETorHEAD(self, req): had_match = False for match_header in ('if-match', 'if-none-match'): if match_header not in req.headers: continue had_match = True for value in list_from_csv(req.headers[match_header]): value = normalize_etag(value) if value.endswith('-N'): # Deal with fake S3-like etags for SLOs uploaded via Swift req.headers[match_header] += ', ' + value[:-2] if had_match: # Update where to look update_etag_is_at_header(req, sysmeta_header('object', 'etag')) object_name = req.object_name version_id = req.params.get('versionId') if version_id not in ('null', None) and \ 'object_versioning' not in get_swift_info(): raise S3NotImplemented() query = {} if version_id is None else {'version-id': version_id} resp = req.get_response(self.app, query=query) if req.method == 'HEAD': resp.app_iter = None if 'x-amz-meta-deleted' in resp.headers: raise NoSuchKey(object_name) for key in ('content-type', 'content-language', 'expires', 'cache-control', 'content-disposition', 'content-encoding'): if 'response-' + key in req.params: resp.headers[key] = req.params['response-' + key] return resp
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = {} for key, val in resp.headers.iteritems(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type # Query for the objects in the segments area to make sure it completed query = { 'format': 'json', 'prefix': '%s/%s/' % (req.object_name, upload_id), 'delimiter': '/' } container = req.container_name + MULTIUPLOAD_SUFFIX resp = req.get_response(self.app, 'GET', container, '', query=query) objinfo = json.loads(resp.body) objtable = dict((o['name'], {'path': '/'.join(['', container, o['name']]), 'etag': o['hash'], 'size_bytes': o['bytes']}) for o in objinfo) s3_etag_hasher = md5() manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') complete_elem = fromstring( xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = part_elem.find('./ETag').text if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"': # strip double quotes etag = etag[1:-1] info = objtable.get("%s/%s/%s" % (req.object_name, upload_id, part_number)) if info is None or info['etag'] != etag: raise InvalidPart(upload_id=upload_id, part_number=part_number) s3_etag_hasher.update(etag.decode('hex')) info['size_bytes'] = int(info['size_bytes']) manifest.append(info) except (XMLSyntaxError, DocumentInvalid): raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest)) headers[sysmeta_header('object', 'etag')] = s3_etag # Leave base header value blank; SLO will populate c_etag = '; s3_etag=%s' % s3_etag headers['X-Object-Sysmeta-Container-Update-Override-Etag'] = c_etag # Check the size of each segment except the last and make sure they are # all more than the minimum upload chunk size for info in manifest[:-1]: if info['size_bytes'] < self.conf.min_segment_size: raise EntityTooSmall() try: # TODO: add support for versioning if manifest: resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={'multipart-manifest': 'put'}, headers=headers) else: # the upload must have consisted of a single zero-length part # just write it directly resp = req.get_response(self.app, 'PUT', body='', headers=headers) except BadSwiftRequest as e: msg = str(e) expected_msg = 'too small; each segment must be at least 1 byte' if expected_msg in msg: # FIXME: AWS S3 allows a smaller object than 5 MB if there is # only one part. Use a COPY request to copy the part object # from the segments container instead. raise EntityTooSmall(msg) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: pass # We know that this existed long enough for us to HEAD result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at the # request header when the port is non default value and it makes # req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) if parsed_url.port: host_url += ':%s' % parsed_url.port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag del resp.headers['ETag'] resp.body = tostring(result_elem) resp.status = 200 resp.content_type = "application/xml" return resp
def test_decode_acl_with_invalid_json(self): headers = {sysmeta_header('container', 'acl'): '['} self.assertRaises( InvalidSubresource, decode_acl, 'container', headers, self.allow_no_owner)
def test_decode_acl_empty_list(self): headers = {sysmeta_header('container', 'acl'): '[]'} acl = decode_acl('container', headers, self.allow_no_owner) self.assertEqual(type(acl), ACL) self.assertIsNone(acl.owner.id) self.assertEqual(len(acl.grants), 0)
from six.moves.urllib.parse import parse_qs from swift.common.utils import close_if_possible, public from swift.common.middleware.s3api.controllers.base import Controller, \ check_container_existence from swift.common.middleware.s3api.etree import fromstring, tostring, \ DocumentInvalid, Element, SubElement, XMLSyntaxError from swift.common.middleware.s3api.s3response import HTTPNoContent, HTTPOk, \ MalformedXML, NoSuchTagSet, InvalidArgument from swift.common.middleware.s3api.utils import sysmeta_header HTTP_HEADER_TAGGING_KEY = "x-amz-tagging" SYSMETA_TAGGING_KEY = 'swift3-tagging' BUCKET_TAGGING_HEADER = sysmeta_header('bucket', 'tagging') OBJECT_TAGGING_HEADER = sysmeta_header('object', 'tagging') # Not a swift3 header, cannot use sysmeta_header() VERSION_ID_HEADER = 'X-Object-Sysmeta-Version-Id' # FIXME(FVE): compute better size estimation according to key/value limits # 10 tags with 128b key and 256b value should be 3840 + envelope MAX_TAGGING_BODY_SIZE = 8 * 1024 INVALID_TAGGING = 'An error occurred (InvalidArgument) when calling ' \ 'the PutObject operation: The header \'x-amz-tagging\' ' \ 'shall be encoded as UTF-8 then URLEncoded URL query ' \ 'parameters without tag name duplicates.'
def PUT(self, app): container = self.req.container_name + MULTIUPLOAD_SUFFIX obj = '%s/%s' % (self.obj, self.req.params['uploadId']) resp = self.req._get_response(app, 'HEAD', container, obj) self.req.headers[sysmeta_header('object', 'acl')] = \ resp.sysmeta_headers.get(sysmeta_header('object', 'tmpacl'))
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = { 'Accept': 'application/json', sysmeta_header('object', 'upload-id'): upload_id } for key, val in resp.headers.items(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type container = req.container_name + MULTIUPLOAD_SUFFIX s3_etag_hasher = md5(usedforsecurity=False) manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') if 'content-md5' in req.headers: # If an MD5 was provided, we need to verify it. # Note that S3Request already took care of translating to ETag if req.headers['etag'] != md5( xml, usedforsecurity=False).hexdigest(): raise BadDigest(content_md5=req.headers['content-md5']) # We're only interested in the body here, in the # multipart-upload controller -- *don't* let it get # plumbed down to the object-server del req.headers['etag'] complete_elem = fromstring(xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = normalize_etag(part_elem.find('./ETag').text) if len(etag) != 32 or any(c not in '0123456789abcdef' for c in etag): raise InvalidPart(upload_id=upload_id, part_number=part_number) manifest.append({ 'path': '/%s/%s/%s/%d' % (wsgi_to_str(container), wsgi_to_str( req.object_name), upload_id, part_number), 'etag': etag }) s3_etag_hasher.update(binascii.a2b_hex(etag)) except (XMLSyntaxError, DocumentInvalid): # NB: our schema definitions catch uploads with no parts here raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest)) s3_etag_header = sysmeta_header('object', 'etag') if resp.sysmeta_headers.get(s3_etag_header) == s3_etag: # This header should only already be present if the upload marker # has been cleaned up and the current target uses the same # upload-id; assuming the segments to use haven't changed, the work # is already done return HTTPOk(body=_make_complete_body(req, s3_etag, False), content_type='application/xml') headers[s3_etag_header] = s3_etag # Leave base header value blank; SLO will populate c_etag = '; s3_etag=%s' % s3_etag headers[get_container_update_override_key('etag')] = c_etag too_small_message = ('s3api requires that each segment be at least ' '%d bytes' % self.conf.min_segment_size) def size_checker(manifest): # Check the size of each segment except the last and make sure # they are all more than the minimum upload chunk size. # Note that we need to use the *internal* keys, since we're # looking at the manifest that's about to be written. return [(item['name'], too_small_message) for item in manifest[:-1] if item and item['bytes'] < self.conf.min_segment_size] req.environ['swift.callback.slo_manifest_hook'] = size_checker start_time = time.time() def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={ 'multipart-manifest': 'put', 'heartbeat': 'on' }, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if time.time() - start_time < 10: # Include some grace period to keep # ceph-s3tests happy continue if not yielded_anything: yield (b'<?xml version="1.0" ' b'encoding="UTF-8"?>\n') yielded_anything = True yield chunk continue body.append(chunk) body = json.loads(b''.join(body)) if body['Response Status'] != '201 Created': for seg, err in body['Errors']: if err == too_small_message: raise EntityTooSmall() elif err in ('Etag Mismatch', '404 Not Found'): raise InvalidPart(upload_id=upload_id) raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) if too_small_message in msg: raise EntityTooSmall(msg) elif ', Etag Mismatch' in msg: raise InvalidPart(upload_id=upload_id) elif ', 404 Not Found' in msg: raise InvalidPart(upload_id=upload_id) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # The important thing is that we wrote out a tombstone to # make sure the marker got cleaned up. If it's already # gone (e.g., because of concurrent completes or a retried # complete), so much the better. pass yield _make_complete_body(req, s3_etag, yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield b'\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk resp = HTTPOk() # assume we're good for now... but see above! resp.app_iter = reiterate(response_iter()) resp.content_type = "application/xml" return resp
from swift.common.utils import public from swift.common.middleware.s3api.controllers.base import Controller, \ bucket_operation, check_container_existence, log_s3api_command from swift.common.middleware.s3api.etree import fromstring, \ DocumentInvalid, XMLSyntaxError from swift.common.middleware.s3api.s3response import HTTPOk, HTTPNoContent, \ MalformedXML, NoSuchCORSConfiguration, CORSInvalidRequest from swift.common.middleware.s3api.utils import sysmeta_header VERSION_ID_HEADER = 'X-Object-Sysmeta-Version-Id' MAX_CORS_BODY_SIZE = 10240 BUCKET_CORS_HEADER = sysmeta_header('bucket', 'cors') CORS_ALLOWED_HTTP_METHOD = ('GET', 'POST', 'PUT', 'HEAD', 'DELETE') def match_cors(pattern, value): """ Match the value of a CORS header against the specified pattern. """ pattern_parts = pattern.split('*') if len(pattern_parts) == 1: return pattern == value # protect all non-alphanumerics (except wildcards) as we keep them as is regex = '^' + '.*'.join([re.escape(p) for p in pattern_parts]) + '$' return re.match(regex, value) is not None
def PUT(self, app): container = self.req.container_name + MULTIUPLOAD_SUFFIX obj = '%s/%s' % (self.obj, self.req.params['uploadId']) resp = self.req._get_response(app, 'HEAD', container, obj) self.req.headers[sysmeta_header('object', 'acl')] = \ resp.sysmeta_headers.get(sysmeta_header('object', 'tmpacl'))
def test_decode_acl_with_invalid_json(self): headers = {sysmeta_header('container', 'acl'): '['} self.assertRaises(InvalidSubresource, decode_acl, 'container', headers, self.allow_no_owner)
def __init__(self, *args, **kwargs): swob.Response.__init__(self, *args, **kwargs) s3_sysmeta_headers = swob.HeaderKeyDict() sw_headers = swob.HeaderKeyDict() headers = HeaderKeyDict() self.is_slo = False def is_swift3_sysmeta(sysmeta_key, server_type): swift3_sysmeta_prefix = ( 'x-%s-sysmeta-swift3' % server_type).lower() return sysmeta_key.lower().startswith(swift3_sysmeta_prefix) def is_s3api_sysmeta(sysmeta_key, server_type): s3api_sysmeta_prefix = sysmeta_prefix(_server_type).lower() return sysmeta_key.lower().startswith(s3api_sysmeta_prefix) for key, val in self.headers.items(): if is_sys_meta('object', key) or is_sys_meta('container', key): _server_type = key.split('-')[1] if is_swift3_sysmeta(key, _server_type): # To be compatible with older swift3, translate swift3 # sysmeta to s3api sysmeta here key = sysmeta_prefix(_server_type) + \ key[len('x-%s-sysmeta-swift3-' % _server_type):] if key not in s3_sysmeta_headers: # To avoid overwrite s3api sysmeta by older swift3 # sysmeta set the key only when the key does not exist s3_sysmeta_headers[key] = val elif is_s3api_sysmeta(key, _server_type): s3_sysmeta_headers[key] = val else: sw_headers[key] = val else: sw_headers[key] = val # Handle swift headers for key, val in sw_headers.items(): _key = key.lower() if _key.startswith('x-object-meta-'): # Note that AWS ignores user-defined headers with '=' in the # header name. We translated underscores to '=5F' on the way # in, though. headers['x-amz-meta-' + _key[14:].replace('=5f', '_')] = val elif _key in ('content-length', 'content-type', 'content-range', 'content-encoding', 'content-disposition', 'content-language', 'etag', 'last-modified', 'x-robots-tag', 'cache-control', 'expires'): headers[key] = val elif _key == 'x-static-large-object': # for delete slo self.is_slo = config_true_value(val) # Check whether we stored the AWS-style etag on upload override_etag = s3_sysmeta_headers.get( sysmeta_header('object', 'etag')) if override_etag is not None: # Multipart uploads in AWS have ETags like # <MD5(part_etag1 || ... || part_etagN)>-<number of parts> headers['etag'] = override_etag elif self.is_slo and 'etag' in headers: # Many AWS clients use the presence of a '-' to decide whether # to attempt client-side download validation, so even if we # didn't store the AWS-style header, tack on a '-N'. (Use 'N' # because we don't actually know how many parts there are.) headers['etag'] += '-N' self.headers = headers if self.etag: # add double quotes to the etag header self.etag = self.etag # Used for pure swift header handling at the request layer self.sw_headers = sw_headers self.sysmeta_headers = s3_sysmeta_headers
def __init__(self, *args, **kwargs): swob.Response.__init__(self, *args, **kwargs) sw_sysmeta_headers = swob.HeaderKeyDict() sw_headers = swob.HeaderKeyDict() headers = HeaderKeyDict() self.is_slo = False def is_swift3_sysmeta(sysmeta_key, server_type): swift3_sysmeta_prefix = ('x-%s-sysmeta-swift3' % server_type).lower() return sysmeta_key.lower().startswith(swift3_sysmeta_prefix) def is_s3api_sysmeta(sysmeta_key, server_type): s3api_sysmeta_prefix = sysmeta_prefix(_server_type).lower() return sysmeta_key.lower().startswith(s3api_sysmeta_prefix) for key, val in self.headers.items(): if is_sys_meta('object', key) or is_sys_meta('container', key): _server_type = key.split('-')[1] if is_swift3_sysmeta(key, _server_type): # To be compatible with older swift3, translate swift3 # sysmeta to s3api sysmeta here key = sysmeta_prefix(_server_type) + \ key[len('x-%s-sysmeta-swift3-' % _server_type):] if key not in sw_sysmeta_headers: # To avoid overwrite s3api sysmeta by older swift3 # sysmeta set the key only when the key does not exist sw_sysmeta_headers[key] = val elif is_s3api_sysmeta(key, _server_type): sw_sysmeta_headers[key] = val else: sw_headers[key] = val # Handle swift headers for key, val in sw_headers.items(): _key = key.lower() if _key.startswith('x-object-meta-'): # Note that AWS ignores user-defined headers with '=' in the # header name. We translated underscores to '=5F' on the way # in, though. headers['x-amz-meta-' + _key[14:].replace('=5f', '_')] = val elif _key in ('content-length', 'content-type', 'content-range', 'content-encoding', 'content-disposition', 'content-language', 'etag', 'last-modified', 'x-robots-tag', 'cache-control', 'expires'): headers[key] = val elif _key == 'x-static-large-object': # for delete slo self.is_slo = config_true_value(val) # Check whether we stored the AWS-style etag on upload override_etag = sw_sysmeta_headers.get(sysmeta_header( 'object', 'etag')) if override_etag is not None: # Multipart uploads in AWS have ETags like # <MD5(part_etag1 || ... || part_etagN)>-<number of parts> headers['etag'] = override_etag elif self.is_slo and 'etag' in headers: # Many AWS clients use the presence of a '-' to decide whether # to attempt client-side download validation, so even if we # didn't store the AWS-style header, tack on a '-N'. (Use 'N' # because we don't actually know how many parts there are.) headers['etag'] += '-N' self.headers = headers if self.etag: # add double quotes to the etag header self.etag = self.etag # Used for pure swift header handling at the request layer self.sw_headers = sw_headers self.sysmeta_headers = sw_sysmeta_headers
def __init__(self, *args, **kwargs): swob.Response.__init__(self, *args, **kwargs) s3_sysmeta_headers = swob.HeaderKeyDict() sw_headers = swob.HeaderKeyDict() headers = HeaderKeyDict() self.is_slo = False def is_swift3_sysmeta(sysmeta_key, server_type): swift3_sysmeta_prefix = ('x-%s-sysmeta-swift3' % server_type).lower() return sysmeta_key.lower().startswith(swift3_sysmeta_prefix) def is_s3api_sysmeta(sysmeta_key, server_type): s3api_sysmeta_prefix = sysmeta_prefix(_server_type).lower() return sysmeta_key.lower().startswith(s3api_sysmeta_prefix) for key, val in self.headers.items(): if is_sys_meta('object', key) or is_sys_meta('container', key): _server_type = key.split('-')[1] if is_swift3_sysmeta(key, _server_type): # To be compatible with older swift3, translate swift3 # sysmeta to s3api sysmeta here key = sysmeta_prefix(_server_type) + \ key[len('x-%s-sysmeta-swift3-' % _server_type):] if key not in s3_sysmeta_headers: # To avoid overwrite s3api sysmeta by older swift3 # sysmeta set the key only when the key does not exist s3_sysmeta_headers[key] = val elif is_s3api_sysmeta(key, _server_type): s3_sysmeta_headers[key] = val else: sw_headers[key] = val else: sw_headers[key] = val # Handle swift headers for key, val in sw_headers.items(): s3_pair = translate_swift_to_s3(key, val) if s3_pair is None: continue headers[s3_pair[0]] = s3_pair[1] self.is_slo = config_true_value( sw_headers.get('x-static-large-object')) # Check whether we stored the AWS-style etag on upload override_etag = s3_sysmeta_headers.get(sysmeta_header( 'object', 'etag')) if override_etag not in (None, ''): # Multipart uploads in AWS have ETags like # <MD5(part_etag1 || ... || part_etagN)>-<number of parts> headers['etag'] = override_etag elif self.is_slo and 'etag' in headers: # Many AWS clients use the presence of a '-' to decide whether # to attempt client-side download validation, so even if we # didn't store the AWS-style header, tack on a '-N'. (Use 'N' # because we don't actually know how many parts there are.) headers['etag'] += '-N' self.headers = headers if self.etag: # add double quotes to the etag header self.etag = self.etag # Used for pure swift header handling at the request layer self.sw_headers = sw_headers self.sysmeta_headers = s3_sysmeta_headers
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = {'Accept': 'application/json'} for key, val in resp.headers.items(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type # Query for the objects in the segments area to make sure it completed query = { 'format': 'json', 'prefix': '%s/%s/' % (req.object_name, upload_id), 'delimiter': '/' } container = req.container_name + MULTIUPLOAD_SUFFIX resp = req.get_response(self.app, 'GET', container, '', query=query) objinfo = json.loads(resp.body) objtable = dict((o['name'], { 'path': '/'.join(['', container, o['name']]), 'etag': o['hash'], 'size_bytes': o['bytes'] }) for o in objinfo) s3_etag_hasher = md5() manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') complete_elem = fromstring(xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = part_elem.find('./ETag').text if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"': # strip double quotes etag = etag[1:-1] info = objtable.get("%s/%s/%s" % (req.object_name, upload_id, part_number)) if info is None or info['etag'] != etag: raise InvalidPart(upload_id=upload_id, part_number=part_number) s3_etag_hasher.update(etag.decode('hex')) info['size_bytes'] = int(info['size_bytes']) manifest.append(info) except (XMLSyntaxError, DocumentInvalid): # NB: our schema definitions catch uploads with no parts here raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest)) headers[sysmeta_header('object', 'etag')] = s3_etag # Leave base header value blank; SLO will populate c_etag = '; s3_etag=%s' % s3_etag headers['X-Object-Sysmeta-Container-Update-Override-Etag'] = c_etag # Check the size of each segment except the last and make sure they are # all more than the minimum upload chunk size for info in manifest[:-1]: if info['size_bytes'] < self.conf.min_segment_size: raise EntityTooSmall() def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={ 'multipart-manifest': 'put', 'heartbeat': 'on' }, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if not yielded_anything: yield ('<?xml version="1.0" ' 'encoding="UTF-8"?>\n') yielded_anything = True yield chunk body.append(chunk) body = json.loads(''.join(body)) if body['Response Status'] != '201 Created': raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) expected_msg = ('too small; each segment must be ' 'at least 1 byte') if expected_msg in msg: # FIXME: AWS S3 allows a smaller object than 5 MB if # there is only one part. Use a COPY request to copy # the part object from the segments container instead. raise EntityTooSmall(msg) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # We know that this existed long enough for us to HEAD pass result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at # the request header when the port is non default value and it # makes req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) if parsed_url.port: host_url += ':%s' % parsed_url.port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag resp.headers.pop('ETag', None) if yielded_anything: yield '\n' yield tostring(result_elem, xml_declaration=not yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield '\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk resp = HTTPOk() # assume we're good for now... but see above! resp.app_iter = reiterate(response_iter()) resp.content_type = "application/xml" return resp
def test_decode_acl_empty_list(self): headers = {sysmeta_header('container', 'acl'): '[]'} acl = decode_acl('container', headers, self.allow_no_owner) self.assertEqual(type(acl), ACL) self.assertIsNone(acl.owner.id) self.assertEqual(len(acl.grants), 0)
def PUT(self, req): """ Handles Upload Part and Upload Part Copy. """ if 'uploadId' not in req.params: raise InvalidArgument('ResourceType', 'partNumber', 'Unexpected query string parameter') try: part_number = int(req.params['partNumber']) if part_number < 1 or self.conf.max_upload_part_num < part_number: raise Exception() except Exception: err_msg = 'Part number must be an integer between 1 and %d,' \ ' inclusive' % self.conf.max_upload_part_num raise InvalidArgument('partNumber', req.params['partNumber'], err_msg) upload_id = req.params['uploadId'] _get_upload_info(req, self.app, upload_id) req.container_name += MULTIUPLOAD_SUFFIX req.object_name = '%s/%s/%d' % (req.object_name, upload_id, part_number) req_timestamp = S3Timestamp.now() req.headers['X-Timestamp'] = req_timestamp.internal source_resp = req.check_copy_source(self.app) if 'X-Amz-Copy-Source' in req.headers and \ 'X-Amz-Copy-Source-Range' in req.headers: rng = req.headers['X-Amz-Copy-Source-Range'] header_valid = True try: rng_obj = Range(rng) if len(rng_obj.ranges) != 1: header_valid = False except ValueError: header_valid = False if not header_valid: err_msg = ('The x-amz-copy-source-range value must be of the ' 'form bytes=first-last where first and last are ' 'the zero-based offsets of the first and last ' 'bytes to copy') raise InvalidArgument('x-amz-source-range', rng, err_msg) source_size = int(source_resp.headers['Content-Length']) if not rng_obj.ranges_for_length(source_size): err_msg = ('Range specified is not valid for source object ' 'of size: %s' % source_size) raise InvalidArgument('x-amz-source-range', rng, err_msg) req.headers['Range'] = rng del req.headers['X-Amz-Copy-Source-Range'] if 'X-Amz-Copy-Source' in req.headers: # Clear some problematic headers that might be on the source req.headers.update({ sysmeta_header('object', 'etag'): '', 'X-Object-Sysmeta-Swift3-Etag': '', # for legacy data 'X-Object-Sysmeta-Slo-Etag': '', 'X-Object-Sysmeta-Slo-Size': '', get_container_update_override_key('etag'): '', }) resp = req.get_response(self.app) if 'X-Amz-Copy-Source' in req.headers: resp.append_copy_resp_body(req.controller_name, req_timestamp.s3xmlformat) resp.status = 200 return resp
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = {} for key, val in resp.headers.iteritems(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type # Query for the objects in the segments area to make sure it completed query = { 'format': 'json', 'prefix': '%s/%s/' % (req.object_name, upload_id), 'delimiter': '/' } container = req.container_name + MULTIUPLOAD_SUFFIX resp = req.get_response(self.app, 'GET', container, '', query=query) objinfo = json.loads(resp.body) objtable = dict((o['name'], { 'path': '/'.join(['', container, o['name']]), 'etag': o['hash'], 'size_bytes': o['bytes'] }) for o in objinfo) manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') complete_elem = fromstring(xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = part_elem.find('./ETag').text if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"': # strip double quotes etag = etag[1:-1] info = objtable.get("%s/%s/%s" % (req.object_name, upload_id, part_number)) if info is None or info['etag'] != etag: raise InvalidPart(upload_id=upload_id, part_number=part_number) info['size_bytes'] = int(info['size_bytes']) manifest.append(info) except (XMLSyntaxError, DocumentInvalid): raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise # Check the size of each segment except the last and make sure they are # all more than the minimum upload chunk size for info in manifest[:-1]: if info['size_bytes'] < self.conf.min_segment_size: raise EntityTooSmall() try: # TODO: add support for versioning if manifest: resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={'multipart-manifest': 'put'}, headers=headers) else: # the upload must have consisted of a single zero-length part # just write it directly resp = req.get_response(self.app, 'PUT', body='', headers=headers) except BadSwiftRequest as e: msg = str(e) expected_msg = 'too small; each segment must be at least 1 byte' if expected_msg in msg: # FIXME: AWS S3 allows a smaller object than 5 MB if there is # only one part. Use a COPY request to copy the part object # from the segments container instead. raise EntityTooSmall(msg) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: pass # We know that this existed long enough for us to HEAD result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at the # request header when the port is non default value and it makes # req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) if parsed_url.port: host_url += ':%s' % parsed_url.port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = resp.etag resp.body = tostring(result_elem) resp.status = 200 resp.content_type = "application/xml" return resp
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = {'Accept': 'application/json'} for key, val in resp.headers.items(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type container = req.container_name + MULTIUPLOAD_SUFFIX s3_etag_hasher = md5() manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') if 'content-md5' in req.headers: # If an MD5 was provided, we need to verify it. # Note that S3Request already took care of translating to ETag if req.headers['etag'] != md5(xml).hexdigest(): raise BadDigest(content_md5=req.headers['content-md5']) # We're only interested in the body here, in the # multipart-upload controller -- *don't* let it get # plumbed down to the object-server del req.headers['etag'] complete_elem = fromstring( xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = part_elem.find('./ETag').text if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"': # strip double quotes etag = etag[1:-1] if len(etag) != 32 or any(c not in '0123456789abcdef' for c in etag): raise InvalidPart(upload_id=upload_id, part_number=part_number) manifest.append({ 'path': '/%s/%s/%s/%d' % ( container, req.object_name, upload_id, part_number), 'etag': etag}) s3_etag_hasher.update(binascii.a2b_hex(etag)) except (XMLSyntaxError, DocumentInvalid): # NB: our schema definitions catch uploads with no parts here raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest)) headers[sysmeta_header('object', 'etag')] = s3_etag # Leave base header value blank; SLO will populate c_etag = '; s3_etag=%s' % s3_etag headers['X-Object-Sysmeta-Container-Update-Override-Etag'] = c_etag too_small_message = ('s3api requires that each segment be at least ' '%d bytes' % self.conf.min_segment_size) def size_checker(manifest): # Check the size of each segment except the last and make sure # they are all more than the minimum upload chunk size. # Note that we need to use the *internal* keys, since we're # looking at the manifest that's about to be written. return [ (item['name'], too_small_message) for item in manifest[:-1] if item and item['bytes'] < self.conf.min_segment_size] req.environ['swift.callback.slo_manifest_hook'] = size_checker start_time = time.time() def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response( self.app, 'PUT', body=json.dumps(manifest), query={'multipart-manifest': 'put', 'heartbeat': 'on'}, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if time.time() - start_time < 10: # Include some grace period to keep # ceph-s3tests happy continue if not yielded_anything: yield (b'<?xml version="1.0" ' b'encoding="UTF-8"?>\n') yielded_anything = True yield chunk continue body.append(chunk) body = json.loads(b''.join(body)) if body['Response Status'] != '201 Created': for seg, err in body['Errors']: if err == too_small_message: raise EntityTooSmall() elif err in ('Etag Mismatch', '404 Not Found'): raise InvalidPart(upload_id=upload_id) raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) if too_small_message in msg: raise EntityTooSmall(msg) elif ', Etag Mismatch' in msg: raise InvalidPart(upload_id=upload_id) elif ', 404 Not Found' in msg: raise InvalidPart(upload_id=upload_id) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # We know that this existed long enough for us to HEAD pass result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at # the request header when the port is non default value and it # makes req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) # Why are we doing our own port parsing? Because py3 decided # to start raising ValueErrors on access after parsing such # an invalid port netloc = parsed_url.netloc.split('@')[-1].split(']')[-1] if ':' in netloc: port = netloc.split(':', 2)[1] host_url += ':%s' % port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag resp.headers.pop('ETag', None) if yielded_anything: yield b'\n' yield tostring(result_elem, xml_declaration=not yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield b'\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk resp = HTTPOk() # assume we're good for now... but see above! resp.app_iter = reiterate(response_iter()) resp.content_type = "application/xml" return resp
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = {'Accept': 'application/json'} for key, val in resp.headers.items(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type container = req.container_name + MULTIUPLOAD_SUFFIX s3_etag_hasher = md5() manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') if 'content-md5' in req.headers: # If an MD5 was provided, we need to verify it. # Note that S3Request already took care of translating to ETag if req.headers['etag'] != md5(xml).hexdigest(): raise BadDigest(content_md5=req.headers['content-md5']) # We're only interested in the body here, in the # multipart-upload controller -- *don't* let it get # plumbed down to the object-server del req.headers['etag'] complete_elem = fromstring(xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = normalize_etag(part_elem.find('./ETag').text) if len(etag) != 32 or any(c not in '0123456789abcdef' for c in etag): raise InvalidPart(upload_id=upload_id, part_number=part_number) manifest.append({ 'path': '/%s/%s/%s/%d' % (container, req.object_name, upload_id, part_number), 'etag': etag }) s3_etag_hasher.update(binascii.a2b_hex(etag)) except (XMLSyntaxError, DocumentInvalid): # NB: our schema definitions catch uploads with no parts here raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest)) headers[sysmeta_header('object', 'etag')] = s3_etag # Leave base header value blank; SLO will populate c_etag = '; s3_etag=%s' % s3_etag headers[get_container_update_override_key('etag')] = c_etag too_small_message = ('s3api requires that each segment be at least ' '%d bytes' % self.conf.min_segment_size) def size_checker(manifest): # Check the size of each segment except the last and make sure # they are all more than the minimum upload chunk size. # Note that we need to use the *internal* keys, since we're # looking at the manifest that's about to be written. return [(item['name'], too_small_message) for item in manifest[:-1] if item and item['bytes'] < self.conf.min_segment_size] req.environ['swift.callback.slo_manifest_hook'] = size_checker start_time = time.time() def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={ 'multipart-manifest': 'put', 'heartbeat': 'on' }, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if time.time() - start_time < 10: # Include some grace period to keep # ceph-s3tests happy continue if not yielded_anything: yield (b'<?xml version="1.0" ' b'encoding="UTF-8"?>\n') yielded_anything = True yield chunk continue body.append(chunk) body = json.loads(b''.join(body)) if body['Response Status'] != '201 Created': for seg, err in body['Errors']: if err == too_small_message: raise EntityTooSmall() elif err in ('Etag Mismatch', '404 Not Found'): raise InvalidPart(upload_id=upload_id) raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) if too_small_message in msg: raise EntityTooSmall(msg) elif ', Etag Mismatch' in msg: raise InvalidPart(upload_id=upload_id) elif ', 404 Not Found' in msg: raise InvalidPart(upload_id=upload_id) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # We know that this existed long enough for us to HEAD pass result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at # the request header when the port is non default value and it # makes req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) # Why are we doing our own port parsing? Because py3 decided # to start raising ValueErrors on access after parsing such # an invalid port netloc = parsed_url.netloc.split('@')[-1].split(']')[-1] if ':' in netloc: port = netloc.split(':', 2)[1] host_url += ':%s' % port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag resp.headers.pop('ETag', None) if yielded_anything: yield b'\n' yield tostring(result_elem, xml_declaration=not yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield b'\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk resp = HTTPOk() # assume we're good for now... but see above! resp.app_iter = reiterate(response_iter()) resp.content_type = "application/xml" return resp