def GET(self, req): """ Handles GET Bucket acl and GET Object acl. """ resp = req.get_response(self.app) acl = resp.object_acl if req.is_object_request else resp.bucket_acl resp = HTTPOk() resp.body = tostring(acl.elem()) return resp
def GET(self, req): """ Handle GET Bucket (List Objects) request """ tag_max_keys = req.get_validated_param('max-keys', self.conf.max_bucket_listing) # TODO: Separate max_bucket_listing and default_bucket_listing max_keys = min(tag_max_keys, self.conf.max_bucket_listing) encoding_type, query, listing_type, fetch_owner = \ self._parse_request_options(req, max_keys) resp = req.get_response(self.app, query=query) objects = json.loads(resp.body) is_truncated = max_keys > 0 and len(objects) > max_keys objects = objects[:max_keys] if listing_type == 'object-versions': func = self._build_versions_result elif listing_type == 'version-2': func = self._build_list_bucket_result_type_two else: func = self._build_list_bucket_result_type_one elem = func(req, objects, encoding_type, tag_max_keys, is_truncated) self._add_objects_to_result(req, elem, objects, encoding_type, listing_type, fetch_owner) body = tostring(elem) return HTTPOk(body=body, content_type='application/xml')
def GET(self, req): # pylint: disable=invalid-name """ Handles GET Bucket and Object tagging. """ if req.is_object_request: self.set_s3api_command(req, 'get-object-tagging') else: self.set_s3api_command(req, 'get-bucket-tagging') resp = req._get_response(self.app, 'HEAD', req.container_name, req.object_name) headers = dict() if req.is_object_request: body = resp.sysmeta_headers.get(OBJECT_TAGGING_HEADER) # It seems that S3 returns x-amz-version-id, # even if it is not documented. headers['x-amz-version-id'] = resp.sw_headers[VERSION_ID_HEADER] else: body = resp.sysmeta_headers.get(BUCKET_TAGGING_HEADER) close_if_possible(resp.app_iter) if not body: if not req.is_object_request: raise NoSuchTagSet(headers=headers) else: elem = Element('Tagging') SubElement(elem, 'TagSet') body = tostring(elem) return HTTPOk(body=body, content_type='application/xml', headers=headers)
def PUT(self, req): # pylint: disable=invalid-name """ Handles PUT Bucket and Object tagging. """ if req.is_object_request: self.set_s3api_command(req, 'put-object-tagging') else: self.set_s3api_command(req, 'put-bucket-tagging') body = req.xml(MAX_TAGGING_BODY_SIZE) try: # Just validate the body fromstring(body, 'Tagging') except (DocumentInvalid, XMLSyntaxError) as exc: raise MalformedXML(str(exc)) if req.object_name: req.headers[OBJECT_TAGGING_HEADER] = body else: req.headers[BUCKET_TAGGING_HEADER] = body resp = req._get_response(self.app, 'POST', req.container_name, req.object_name) if resp.status_int == 202: headers = dict() if req.object_name: headers['x-amz-version-id'] = \ resp.sw_headers[VERSION_ID_HEADER] return HTTPOk(headers=headers) return resp
def HEAD(self, req): """ Handle HEAD Bucket (Get Metadata) request """ resp = req.get_response(self.app) return HTTPOk(headers=resp.headers)
def PUT(self, req): """ Handles PUT Bucket versioning. """ self.set_s3api_command(req, 'put-bucket-versioning') if 'object_versioning' not in get_swift_info(): raise S3NotImplemented() xml = req.xml(MAX_PUT_VERSIONING_BODY_SIZE) try: elem = fromstring(xml, 'VersioningConfiguration') status = elem.find('./Status').text except (XMLSyntaxError, DocumentInvalid): raise MalformedXML() except Exception as e: self.logger.error(e) raise if status not in ['Enabled', 'Suspended']: raise MalformedXML() # Set up versioning # NB: object_versioning responsible for ensuring its container exists req.headers['X-Versions-Enabled'] = str(status == 'Enabled').lower() req.get_response(self.app, 'POST') return HTTPOk()
def GET(self, req): """ Handles GET Bucket acl and GET Object acl. """ if req.is_object_request: self.set_s3api_command(req, 'get-object-acl') else: self.set_s3api_command(req, 'get-bucket-acl') resp = req.get_response(self.app) acl = resp.object_acl if req.is_object_request else resp.bucket_acl resp = HTTPOk() resp.body = tostring(acl.elem()) return resp
def GET(self, req): # pylint: disable=invalid-name """ Handles GET Bucket CORS. """ sysmeta = req.get_container_info(self.app).get('sysmeta', {}) body = sysmeta.get('s3api-cors') if not body: raise NoSuchCORSConfiguration return HTTPOk(body=body, content_type='application/xml')
def HEAD(self, req): """ Handle HEAD Bucket (Get Metadata) request """ self.set_s3api_command(req, 'head-bucket') resp = req.get_response(self.app) return HTTPOk(headers=resp.headers)
def GET(self, req): """ Handles GET Bucket and Object tagging. """ elem = Element('Tagging') SubElement(elem, 'TagSet') body = tostring(elem) return HTTPOk(body=body, content_type=None)
def POST(self, req): """ Handles Initiate Multipart Upload. """ # Create a unique S3 upload id from UUID to avoid duplicates. upload_id = unique_id() seg_container = req.container_name + MULTIUPLOAD_SUFFIX content_type = req.headers.get('Content-Type') if content_type: req.headers[sysmeta_header('object', 'has-content-type')] = 'yes' req.headers[sysmeta_header('object', 'content-type')] = content_type else: req.headers[sysmeta_header('object', 'has-content-type')] = 'no' req.headers['Content-Type'] = 'application/directory' try: seg_req = copy.copy(req) seg_req.environ = copy.copy(req.environ) seg_req.container_name = seg_container seg_req.get_container_info(self.app) except NoSuchBucket: try: # multi-upload bucket doesn't exist, create one with # same storage policy and acls as the primary bucket info = req.get_container_info(self.app) policy_name = POLICIES[info['storage_policy']].name hdrs = {'X-Storage-Policy': policy_name} if info.get('read_acl'): hdrs['X-Container-Read'] = info['read_acl'] if info.get('write_acl'): hdrs['X-Container-Write'] = info['write_acl'] seg_req.get_response(self.app, 'PUT', seg_container, '', headers=hdrs) except (BucketAlreadyExists, BucketAlreadyOwnedByYou): pass obj = '%s/%s' % (req.object_name, upload_id) req.headers.pop('Etag', None) req.headers.pop('Content-Md5', None) req.get_response(self.app, 'PUT', seg_container, obj, body='') result_elem = Element('InitiateMultipartUploadResult') SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'UploadId').text = upload_id body = tostring(result_elem) return HTTPOk(body=body, content_type='application/xml')
def GET(self, req): """ Handles GET Bucket logging. """ req.get_response(self.app, method='HEAD') # logging disabled elem = Element('BucketLoggingStatus') body = tostring(elem) return HTTPOk(body=body, content_type='application/xml')
def GET(self, req): """ Handles GET Bucket versioning. """ req.get_response(self.app, method='HEAD') # Just report there is no versioning configured here. elem = Element('VersioningConfiguration') body = tostring(elem) return HTTPOk(body=body, content_type="text/plain")
def GET(self, req): """ Handles GET Bucket location. """ req.get_response(self.app, method='HEAD') elem = Element('LocationConstraint') if self.conf.location != 'US': elem.text = self.conf.location body = tostring(elem) return HTTPOk(body=body, content_type='application/xml')
def GET(self, req): """ Handles GET Bucket versioning. """ sysmeta = req.get_container_info(self.app).get('sysmeta', {}) elem = Element('VersioningConfiguration') if sysmeta.get('versions-enabled'): SubElement(elem, 'Status').text = ('Enabled' if config_true_value( sysmeta['versions-enabled']) else 'Suspended') body = tostring(elem) return HTTPOk(body=body, content_type=None)
def PUT(self, req): """ Handles PUT Bucket acl and PUT Object acl. """ if req.is_object_request: self.set_s3api_command(req, 'put-object-acl') else: self.set_s3api_command(req, 'put-bucket-acl') # ACLs will be set as sysmeta req.get_response(self.app, 'POST') return HTTPOk()
def GET(self, req): """ Handle GET Bucket (List Objects) request """ max_keys = req.get_validated_param('max-keys', self.conf.max_bucket_listing) tag_max_keys = max_keys # TODO: Separate max_bucket_listing and default_bucket_listing max_keys = min(max_keys, self.conf.max_bucket_listing) encoding_type, query, listing_type, fetch_owner = \ self._parse_request_options(req, max_keys) if listing_type == 'object-versions': self.set_s3api_command(req, 'list-object-versions') elif listing_type == 'version-2': self.set_s3api_command(req, 'list-objects-v2') else: self.set_s3api_command(req, 'list-objects') query['format'] = 'json' resp = req.get_response(self.app, query=query) objects = json.loads(resp.body) is_truncated = max_keys > 0 and len(objects) > max_keys objects = objects[:max_keys] if listing_type == 'object-versions': elem = self._build_versions_result(req, objects, is_truncated) elif listing_type == 'version-2': elem = self._build_list_bucket_result_type_two( req, objects, is_truncated) else: elem = self._build_list_bucket_result_type_one( req, objects, encoding_type, is_truncated) self._finish_result(req, elem, tag_max_keys, encoding_type, is_truncated) self._add_objects_to_result(req, elem, objects, encoding_type, listing_type, fetch_owner) body = tostring(elem) resp = HTTPOk(body=body, content_type='application/xml') origin = req.headers.get('Origin') if origin: rule = get_cors(self.app, req, "GET", origin) if rule: cors_fill_headers(req, resp, rule) return resp
def GET(self, req): """ Handles GET Bucket location. """ self.set_s3api_command(req, 'get-bucket-location') req.get_response(self.app, method='HEAD') elem = Element('LocationConstraint') if self.conf.location != 'us-east-1': elem.text = self.conf.location body = tostring(elem) return HTTPOk(body=body, content_type='application/xml')
def PUT(self, req): """ Handles PUT Bucket acl and PUT Object acl. """ if req.is_object_request: headers = {} src_path = '/%s/%s' % (req.container_name, req.object_name) # object-sysmeta' can be updated by 'Copy' method, # but can not be by 'POST' method. # So headers['X-Copy-From'] for copy request is added here. headers['X-Copy-From'] = quote(src_path) headers['Content-Length'] = 0 req.get_response(self.app, 'PUT', headers=headers) else: req.get_response(self.app, 'POST') return HTTPOk()
def OPTIONS(self, req): origin = req.headers.get('Origin') if not origin: raise CORSOriginMissing() method = req.headers.get('Access-Control-Request-Method') if method not in CORS_ALLOWED_HTTP_METHOD: raise CORSInvalidAccessControlRequest(method=method) rule = get_cors(self.app, req, method, origin) # FIXME(mbo): we should raise also NoSuchCORSConfiguration if rule is None: raise CORSForbidden(method) resp = HTTPOk(body=None) del resp.headers['Content-Type'] return cors_fill_headers(req, resp, rule)
def POST(self, req): """ Handles Initiate Multipart Upload. """ # Create a unique S3 upload id from UUID to avoid duplicates. upload_id = unique_id() orig_container = req.container_name seg_container = orig_container + MULTIUPLOAD_SUFFIX content_type = req.headers.get('Content-Type') if content_type: req.headers[sysmeta_header('object', 'has-content-type')] = 'yes' req.headers[sysmeta_header('object', 'content-type')] = content_type else: req.headers[sysmeta_header('object', 'has-content-type')] = 'no' req.headers['Content-Type'] = 'application/directory' try: req.container_name = seg_container req.get_container_info(self.app) except NoSuchBucket: try: req.get_response(self.app, 'PUT', seg_container, '') except (BucketAlreadyExists, BucketAlreadyOwnedByYou): pass finally: req.container_name = orig_container obj = '%s/%s' % (req.object_name, upload_id) req.headers.pop('Etag', None) req.headers.pop('Content-Md5', None) req.get_response(self.app, 'PUT', seg_container, obj, body='') result_elem = Element('InitiateMultipartUploadResult') SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'UploadId').text = upload_id body = tostring(result_elem) return HTTPOk(body=body, content_type='application/xml')
def GET(self, req): """ Handle GET Service request """ self.set_s3api_command(req, 'list-buckets') resp = req.get_response(self.app, query={'format': 'json'}) containers = json.loads(resp.body) containers = filter( lambda item: validate_bucket_name( item['name'], self.conf.dns_compliant_bucket_names), containers) # we don't keep the creation time of a bucket (s3cmd doesn't # work without that) so we use something bogus. elem = Element('ListAllMyBucketsResult') owner = SubElement(elem, 'Owner') SubElement(owner, 'ID').text = req.user_id SubElement(owner, 'DisplayName').text = req.user_id buckets = SubElement(elem, 'Buckets') for c in containers: if self.conf.s3_acl and self.conf.check_bucket_owner: container = bytes_to_wsgi(c['name'].encode('utf8')) try: req.get_response(self.app, 'HEAD', container) except AccessDenied: continue except NoSuchBucket: continue bucket = SubElement(buckets, 'Bucket') SubElement(bucket, 'Name').text = c['name'] SubElement(bucket, 'CreationDate').text = \ '2009-02-03T16:45:09.000Z' body = tostring(elem) return HTTPOk(content_type='application/xml', body=body)
def get_acl(account_name, headers): """ Attempts to construct an S3 ACL based on what is found in the swift headers """ elem = Element('AccessControlPolicy') owner = SubElement(elem, 'Owner') SubElement(owner, 'ID').text = account_name SubElement(owner, 'DisplayName').text = account_name access_control_list = SubElement(elem, 'AccessControlList') # grant FULL_CONTROL to myself by default grant = SubElement(access_control_list, 'Grant') grantee = SubElement(grant, 'Grantee', nsmap={'xsi': XMLNS_XSI}) grantee.set('{%s}type' % XMLNS_XSI, 'CanonicalUser') SubElement(grantee, 'ID').text = account_name SubElement(grantee, 'DisplayName').text = account_name SubElement(grant, 'Permission').text = 'FULL_CONTROL' referrers, _ = parse_acl(headers.get('x-container-read')) if referrer_allowed('unknown', referrers): # grant public-read access grant = SubElement(access_control_list, 'Grant') grantee = SubElement(grant, 'Grantee', nsmap={'xsi': XMLNS_XSI}) grantee.set('{%s}type' % XMLNS_XSI, 'Group') SubElement(grantee, 'URI').text = \ 'http://acs.amazonaws.com/groups/global/AllUsers' SubElement(grant, 'Permission').text = 'READ' referrers, _ = parse_acl(headers.get('x-container-write')) if referrer_allowed('unknown', referrers): # grant public-write access grant = SubElement(access_control_list, 'Grant') grantee = SubElement(grant, 'Grantee', nsmap={'xsi': XMLNS_XSI}) grantee.set('{%s}type' % XMLNS_XSI, 'Group') SubElement(grantee, 'URI').text = \ 'http://acs.amazonaws.com/groups/global/AllUsers' SubElement(grant, 'Permission').text = 'WRITE' body = tostring(elem) return HTTPOk(body=body, content_type="text/plain")
def POST(self, req): """ Handles Delete Multiple Objects. """ def object_key_iter(elem): for obj in elem.iterchildren('Object'): key = obj.find('./Key').text if not key: raise UserKeyMustBeSpecified() version = obj.find('./VersionId') if version is not None: version = version.text yield key, version max_body_size = min( # FWIW, AWS limits multideletes to 1000 keys, and swift limits # object names to 1024 bytes (by default). Add a factor of two to # allow some slop. 2 * self.conf.max_multi_delete_objects * MAX_OBJECT_NAME_LENGTH, # But, don't let operators shoot themselves in the foot 10 * 1024 * 1024) try: xml = req.xml(max_body_size) if not xml: raise MissingRequestBodyError() req.check_md5(xml) elem = fromstring(xml, 'Delete', self.logger) quiet = elem.find('./Quiet') if quiet is not None and quiet.text.lower() == 'true': self.quiet = True else: self.quiet = False delete_list = list(object_key_iter(elem)) if len(delete_list) > self.conf.max_multi_delete_objects: raise MalformedXML() except (XMLSyntaxError, DocumentInvalid): raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise elem = Element('DeleteResult') # check bucket existence try: req.get_response(self.app, 'HEAD') except AccessDenied as error: body = self._gen_error_body(error, elem, delete_list) return HTTPOk(body=body) if any(version is not None for _key, version in delete_list): # TODO: support deleting specific versions of objects raise S3NotImplemented() def do_delete(base_req, key, version): req = copy.copy(base_req) req.environ = copy.copy(base_req.environ) req.object_name = key try: query = req.gen_multipart_manifest_delete_query(self.app) resp = req.get_response(self.app, method='DELETE', query=query, headers={'Accept': 'application/json'}) # Have to read the response to actually do the SLO delete if query: try: delete_result = json.loads(resp.body) if delete_result['Errors']: # NB: bulk includes 404s in "Number Not Found", # not "Errors" msg_parts = [delete_result['Response Status']] msg_parts.extend( '%s: %s' % (obj, status) for obj, status in delete_result['Errors']) return key, { 'code': 'SLODeleteError', 'message': '\n'.join(msg_parts) } # else, all good except (ValueError, TypeError, KeyError): # Logs get all the gory details self.logger.exception( 'Could not parse SLO delete response: %r', resp.body) # Client gets something more generic return key, { 'code': 'SLODeleteError', 'message': 'Unexpected swift response' } except NoSuchKey: pass except ErrorResponse as e: return key, {'code': e.__class__.__name__, 'message': e._msg} return key, None with StreamingPile(self.conf.multi_delete_concurrency) as pile: for key, err in pile.asyncstarmap( do_delete, ((req, key, version) for key, version in delete_list)): if err: error = SubElement(elem, 'Error') SubElement(error, 'Key').text = key SubElement(error, 'Code').text = err['code'] SubElement(error, 'Message').text = err['message'] elif not self.quiet: deleted = SubElement(elem, 'Deleted') SubElement(deleted, 'Key').text = key body = tostring(elem) return HTTPOk(body=body)
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = {'Accept': 'application/json'} for key, val in resp.headers.items(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type container = req.container_name + MULTIUPLOAD_SUFFIX s3_etag_hasher = md5() manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') if 'content-md5' in req.headers: # If an MD5 was provided, we need to verify it. # Note that S3Request already took care of translating to ETag if req.headers['etag'] != md5(xml).hexdigest(): raise BadDigest(content_md5=req.headers['content-md5']) # We're only interested in the body here, in the # multipart-upload controller -- *don't* let it get # plumbed down to the object-server del req.headers['etag'] complete_elem = fromstring(xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = normalize_etag(part_elem.find('./ETag').text) if len(etag) != 32 or any(c not in '0123456789abcdef' for c in etag): raise InvalidPart(upload_id=upload_id, part_number=part_number) manifest.append({ 'path': '/%s/%s/%s/%d' % (container, req.object_name, upload_id, part_number), 'etag': etag }) s3_etag_hasher.update(binascii.a2b_hex(etag)) except (XMLSyntaxError, DocumentInvalid): # NB: our schema definitions catch uploads with no parts here raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest)) headers[sysmeta_header('object', 'etag')] = s3_etag # Leave base header value blank; SLO will populate c_etag = '; s3_etag=%s' % s3_etag headers[get_container_update_override_key('etag')] = c_etag too_small_message = ('s3api requires that each segment be at least ' '%d bytes' % self.conf.min_segment_size) def size_checker(manifest): # Check the size of each segment except the last and make sure # they are all more than the minimum upload chunk size. # Note that we need to use the *internal* keys, since we're # looking at the manifest that's about to be written. return [(item['name'], too_small_message) for item in manifest[:-1] if item and item['bytes'] < self.conf.min_segment_size] req.environ['swift.callback.slo_manifest_hook'] = size_checker start_time = time.time() def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={ 'multipart-manifest': 'put', 'heartbeat': 'on' }, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if time.time() - start_time < 10: # Include some grace period to keep # ceph-s3tests happy continue if not yielded_anything: yield (b'<?xml version="1.0" ' b'encoding="UTF-8"?>\n') yielded_anything = True yield chunk continue body.append(chunk) body = json.loads(b''.join(body)) if body['Response Status'] != '201 Created': for seg, err in body['Errors']: if err == too_small_message: raise EntityTooSmall() elif err in ('Etag Mismatch', '404 Not Found'): raise InvalidPart(upload_id=upload_id) raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) if too_small_message in msg: raise EntityTooSmall(msg) elif ', Etag Mismatch' in msg: raise InvalidPart(upload_id=upload_id) elif ', 404 Not Found' in msg: raise InvalidPart(upload_id=upload_id) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # We know that this existed long enough for us to HEAD pass result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at # the request header when the port is non default value and it # makes req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) # Why are we doing our own port parsing? Because py3 decided # to start raising ValueErrors on access after parsing such # an invalid port netloc = parsed_url.netloc.split('@')[-1].split(']')[-1] if ':' in netloc: port = netloc.split(':', 2)[1] host_url += ':%s' % port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag resp.headers.pop('ETag', None) if yielded_anything: yield b'\n' yield tostring(result_elem, xml_declaration=not yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield b'\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk resp = HTTPOk() # assume we're good for now... but see above! resp.app_iter = reiterate(response_iter()) resp.content_type = "application/xml" return resp
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = {'Accept': 'application/json'} for key, val in resp.headers.items(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type # Query for the objects in the segments area to make sure it completed query = { 'format': 'json', 'prefix': '%s/%s/' % (req.object_name, upload_id), 'delimiter': '/' } container = req.container_name + MULTIUPLOAD_SUFFIX resp = req.get_response(self.app, 'GET', container, '', query=query) objinfo = json.loads(resp.body) objtable = dict((o['name'], { 'path': '/'.join(['', container, o['name']]), 'etag': o['hash'], 'size_bytes': o['bytes'] }) for o in objinfo) s3_etag_hasher = md5() manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') complete_elem = fromstring(xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = part_elem.find('./ETag').text if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"': # strip double quotes etag = etag[1:-1] info = objtable.get("%s/%s/%s" % (req.object_name, upload_id, part_number)) if info is None or info['etag'] != etag: raise InvalidPart(upload_id=upload_id, part_number=part_number) s3_etag_hasher.update(etag.decode('hex')) info['size_bytes'] = int(info['size_bytes']) manifest.append(info) except (XMLSyntaxError, DocumentInvalid): # NB: our schema definitions catch uploads with no parts here raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest)) headers[sysmeta_header('object', 'etag')] = s3_etag # Leave base header value blank; SLO will populate c_etag = '; s3_etag=%s' % s3_etag headers['X-Object-Sysmeta-Container-Update-Override-Etag'] = c_etag # Check the size of each segment except the last and make sure they are # all more than the minimum upload chunk size for info in manifest[:-1]: if info['size_bytes'] < self.conf.min_segment_size: raise EntityTooSmall() def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={ 'multipart-manifest': 'put', 'heartbeat': 'on' }, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if not yielded_anything: yield ('<?xml version="1.0" ' 'encoding="UTF-8"?>\n') yielded_anything = True yield chunk body.append(chunk) body = json.loads(''.join(body)) if body['Response Status'] != '201 Created': raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) expected_msg = ('too small; each segment must be ' 'at least 1 byte') if expected_msg in msg: # FIXME: AWS S3 allows a smaller object than 5 MB if # there is only one part. Use a COPY request to copy # the part object from the segments container instead. raise EntityTooSmall(msg) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # We know that this existed long enough for us to HEAD pass result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at # the request header when the port is non default value and it # makes req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) if parsed_url.port: host_url += ':%s' % parsed_url.port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag resp.headers.pop('ETag', None) if yielded_anything: yield '\n' yield tostring(result_elem, xml_declaration=not yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield '\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk resp = HTTPOk() # assume we're good for now... but see above! resp.app_iter = reiterate(response_iter()) resp.content_type = "application/xml" return resp
def GET(self, req): """ Handles List Parts. """ def filter_part_num_marker(o): try: num = int(os.path.basename(o['name'])) return num > part_num_marker except ValueError: return False encoding_type = req.params.get('encoding-type') if encoding_type is not None and encoding_type != 'url': err_msg = 'Invalid Encoding Method specified in Request' raise InvalidArgument('encoding-type', encoding_type, err_msg) upload_id = req.params['uploadId'] _get_upload_info(req, self.app, upload_id) maxparts = req.get_validated_param('max-parts', DEFAULT_MAX_PARTS_LISTING, self.conf.max_parts_listing) part_num_marker = req.get_validated_param('part-number-marker', 0) query = { 'format': 'json', 'prefix': '%s/%s/' % (req.object_name, upload_id), 'delimiter': '/', 'marker': '', } container = req.container_name + MULTIUPLOAD_SUFFIX # Because the parts are out of order in Swift, we list up to the # maximum number of parts and then apply the marker and limit options. objects = [] while True: resp = req.get_response(self.app, container=container, obj='', query=query) new_objects = json.loads(resp.body) if not new_objects: break objects.extend(new_objects) query['marker'] = new_objects[-1]['name'] last_part = 0 # If the caller requested a list starting at a specific part number, # construct a sub-set of the object list. objList = [obj for obj in objects if filter_part_num_marker(obj)] # pylint: disable-msg=E1103 objList.sort(key=lambda o: int(o['name'].split('/')[-1])) if len(objList) > maxparts: objList = objList[:maxparts] truncated = True else: truncated = False # TODO: We have to retrieve object list again when truncated is True # and some objects filtered by invalid name because there could be no # enough objects for limit defined by maxparts. if objList: o = objList[-1] last_part = os.path.basename(o['name']) result_elem = Element('ListPartsResult') SubElement(result_elem, 'Bucket').text = req.container_name name = req.object_name if encoding_type == 'url': name = quote(name) SubElement(result_elem, 'Key').text = name SubElement(result_elem, 'UploadId').text = upload_id initiator_elem = SubElement(result_elem, 'Initiator') SubElement(initiator_elem, 'ID').text = req.user_id SubElement(initiator_elem, 'DisplayName').text = req.user_id owner_elem = SubElement(result_elem, 'Owner') SubElement(owner_elem, 'ID').text = req.user_id SubElement(owner_elem, 'DisplayName').text = req.user_id SubElement(result_elem, 'StorageClass').text = 'STANDARD' SubElement(result_elem, 'PartNumberMarker').text = str(part_num_marker) SubElement(result_elem, 'NextPartNumberMarker').text = str(last_part) SubElement(result_elem, 'MaxParts').text = str(maxparts) if 'encoding-type' in req.params: SubElement(result_elem, 'EncodingType').text = \ req.params['encoding-type'] SubElement(result_elem, 'IsTruncated').text = \ 'true' if truncated else 'false' for i in objList: part_elem = SubElement(result_elem, 'Part') SubElement(part_elem, 'PartNumber').text = i['name'].split('/')[-1] SubElement(part_elem, 'LastModified').text = \ i['last_modified'][:-3] + 'Z' SubElement(part_elem, 'ETag').text = '"%s"' % i['hash'] SubElement(part_elem, 'Size').text = str(i['bytes']) body = tostring(result_elem) return HTTPOk(body=body, content_type='application/xml')
def GET(self, req): """ Handles List Multipart Uploads """ def separate_uploads(uploads, prefix, delimiter): """ separate_uploads will separate uploads into non_delimited_uploads (a subset of uploads) and common_prefixes according to the specified delimiter. non_delimited_uploads is a list of uploads which exclude the delimiter. common_prefixes is a set of prefixes prior to the specified delimiter. Note that the prefix in the common_prefixes includes the delimiter itself. i.e. if '/' delimiter specified and then the uploads is consists of ['foo', 'foo/bar'], this function will return (['foo'], ['foo/']). :param uploads: A list of uploads dictionary :param prefix: A string of prefix reserved on the upload path. (i.e. the delimiter must be searched behind the prefix) :param delimiter: A string of delimiter to split the path in each upload :return (non_delimited_uploads, common_prefixes) """ if six.PY2: (prefix, delimiter) = utf8encode(prefix, delimiter) non_delimited_uploads = [] common_prefixes = set() for upload in uploads: key = upload['key'] end = key.find(delimiter, len(prefix)) if end >= 0: common_prefix = key[:end + len(delimiter)] common_prefixes.add(common_prefix) else: non_delimited_uploads.append(upload) return non_delimited_uploads, sorted(common_prefixes) encoding_type = req.params.get('encoding-type') if encoding_type is not None and encoding_type != 'url': err_msg = 'Invalid Encoding Method specified in Request' raise InvalidArgument('encoding-type', encoding_type, err_msg) keymarker = req.params.get('key-marker', '') uploadid = req.params.get('upload-id-marker', '') maxuploads = req.get_validated_param('max-uploads', DEFAULT_MAX_UPLOADS, DEFAULT_MAX_UPLOADS) query = { 'format': 'json', 'limit': maxuploads + 1, } if uploadid and keymarker: query.update({'marker': '%s/%s' % (keymarker, uploadid)}) elif keymarker: query.update({'marker': '%s/~' % (keymarker)}) if 'prefix' in req.params: query.update({'prefix': req.params['prefix']}) container = req.container_name + MULTIUPLOAD_SUFFIX try: resp = req.get_response(self.app, container=container, query=query) objects = json.loads(resp.body) except NoSuchBucket: # Assume NoSuchBucket as no uploads objects = [] def object_to_upload(object_info): obj, upid = object_info['name'].rsplit('/', 1) obj_dict = { 'key': obj, 'upload_id': upid, 'last_modified': object_info['last_modified'] } return obj_dict # uploads is a list consists of dict, {key, upload_id, last_modified} # Note that pattern matcher will drop whole segments objects like as # object_name/upload_id/1. pattern = re.compile('/[0-9]+$') uploads = [ object_to_upload(obj) for obj in objects if pattern.search(obj.get('name', '')) is None ] prefixes = [] if 'delimiter' in req.params: prefix = req.params.get('prefix', '') delimiter = req.params['delimiter'] uploads, prefixes = separate_uploads(uploads, prefix, delimiter) if len(uploads) > maxuploads: uploads = uploads[:maxuploads] truncated = True else: truncated = False nextkeymarker = '' nextuploadmarker = '' if len(uploads) > 1: nextuploadmarker = uploads[-1]['upload_id'] nextkeymarker = uploads[-1]['key'] result_elem = Element('ListMultipartUploadsResult') SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'KeyMarker').text = keymarker SubElement(result_elem, 'UploadIdMarker').text = uploadid SubElement(result_elem, 'NextKeyMarker').text = nextkeymarker SubElement(result_elem, 'NextUploadIdMarker').text = nextuploadmarker if 'delimiter' in req.params: SubElement(result_elem, 'Delimiter').text = req.params['delimiter'] if 'prefix' in req.params: SubElement(result_elem, 'Prefix').text = req.params['prefix'] SubElement(result_elem, 'MaxUploads').text = str(maxuploads) if encoding_type is not None: SubElement(result_elem, 'EncodingType').text = encoding_type SubElement(result_elem, 'IsTruncated').text = \ 'true' if truncated else 'false' # TODO: don't show uploads which are initiated before this bucket is # created. for u in uploads: upload_elem = SubElement(result_elem, 'Upload') name = u['key'] if encoding_type == 'url': name = quote(name) SubElement(upload_elem, 'Key').text = name SubElement(upload_elem, 'UploadId').text = u['upload_id'] initiator_elem = SubElement(upload_elem, 'Initiator') SubElement(initiator_elem, 'ID').text = req.user_id SubElement(initiator_elem, 'DisplayName').text = req.user_id owner_elem = SubElement(upload_elem, 'Owner') SubElement(owner_elem, 'ID').text = req.user_id SubElement(owner_elem, 'DisplayName').text = req.user_id SubElement(upload_elem, 'StorageClass').text = 'STANDARD' SubElement(upload_elem, 'Initiated').text = \ u['last_modified'][:-3] + 'Z' for p in prefixes: elem = SubElement(result_elem, 'CommonPrefixes') SubElement(elem, 'Prefix').text = p body = tostring(result_elem) return HTTPOk(body=body, content_type='application/xml')
def GET(self, req): """ Handle GET Bucket (List Objects) request """ max_keys = req.get_validated_param('max-keys', self.conf.max_bucket_listing) # TODO: Separate max_bucket_listing and default_bucket_listing tag_max_keys = max_keys max_keys = min(max_keys, self.conf.max_bucket_listing) encoding_type = req.params.get('encoding-type') if encoding_type is not None and encoding_type != 'url': err_msg = 'Invalid Encoding Method specified in Request' raise InvalidArgument('encoding-type', encoding_type, err_msg) query = { 'format': 'json', 'limit': max_keys + 1, } if 'prefix' in req.params: query.update({'prefix': req.params['prefix']}) if 'delimiter' in req.params: query.update({'delimiter': req.params['delimiter']}) fetch_owner = False if 'versions' in req.params: listing_type = 'object-versions' if 'key-marker' in req.params: query.update({'marker': req.params['key-marker']}) elif 'version-id-marker' in req.params: err_msg = ('A version-id marker cannot be specified without ' 'a key marker.') raise InvalidArgument('version-id-marker', req.params['version-id-marker'], err_msg) elif int(req.params.get('list-type', '1')) == 2: listing_type = 'version-2' if 'start-after' in req.params: query.update({'marker': req.params['start-after']}) # continuation-token overrides start-after if 'continuation-token' in req.params: decoded = b64decode(req.params['continuation-token']) query.update({'marker': decoded}) if 'fetch-owner' in req.params: fetch_owner = config_true_value(req.params['fetch-owner']) else: listing_type = 'version-1' if 'marker' in req.params: query.update({'marker': req.params['marker']}) resp = req.get_response(self.app, query=query) objects = json.loads(resp.body) # in order to judge that truncated is valid, check whether # max_keys + 1 th element exists in swift. is_truncated = max_keys > 0 and len(objects) > max_keys objects = objects[:max_keys] if listing_type == 'object-versions': elem = Element('ListVersionsResult') SubElement(elem, 'Name').text = req.container_name SubElement(elem, 'Prefix').text = req.params.get('prefix') SubElement(elem, 'KeyMarker').text = req.params.get('key-marker') SubElement( elem, 'VersionIdMarker').text = req.params.get('version-id-marker') if is_truncated: if 'name' in objects[-1]: SubElement(elem, 'NextKeyMarker').text = \ objects[-1]['name'] if 'subdir' in objects[-1]: SubElement(elem, 'NextKeyMarker').text = \ objects[-1]['subdir'] SubElement(elem, 'NextVersionIdMarker').text = 'null' else: elem = Element('ListBucketResult') SubElement(elem, 'Name').text = req.container_name SubElement(elem, 'Prefix').text = req.params.get('prefix') if listing_type == 'version-1': SubElement(elem, 'Marker').text = req.params.get('marker') if is_truncated and 'delimiter' in req.params: if 'name' in objects[-1]: name = objects[-1]['name'] else: name = objects[-1]['subdir'] if encoding_type == 'url': name = quote(name) SubElement(elem, 'NextMarker').text = name elif listing_type == 'version-2': if is_truncated: if 'name' in objects[-1]: SubElement(elem, 'NextContinuationToken').text = \ b64encode(objects[-1]['name'].encode('utf8')) if 'subdir' in objects[-1]: SubElement(elem, 'NextContinuationToken').text = \ b64encode(objects[-1]['subdir'].encode('utf8')) if 'continuation-token' in req.params: SubElement(elem, 'ContinuationToken').text = \ req.params['continuation-token'] if 'start-after' in req.params: SubElement(elem, 'StartAfter').text = \ req.params['start-after'] SubElement(elem, 'KeyCount').text = str(len(objects)) SubElement(elem, 'MaxKeys').text = str(tag_max_keys) if 'delimiter' in req.params: SubElement(elem, 'Delimiter').text = req.params['delimiter'] if encoding_type == 'url': SubElement(elem, 'EncodingType').text = encoding_type SubElement(elem, 'IsTruncated').text = \ 'true' if is_truncated else 'false' for o in objects: if 'subdir' not in o: name = o['name'] if encoding_type == 'url': name = quote(name.encode('utf-8')) if listing_type == 'object-versions': contents = SubElement(elem, 'Version') SubElement(contents, 'Key').text = name SubElement(contents, 'VersionId').text = 'null' SubElement(contents, 'IsLatest').text = 'true' else: contents = SubElement(elem, 'Contents') SubElement(contents, 'Key').text = name SubElement(contents, 'LastModified').text = \ o['last_modified'][:-3] + 'Z' if 's3_etag' in o: # New-enough MUs are already in the right format etag = o['s3_etag'] elif 'slo_etag' in o: # SLOs may be in something *close* to the MU format etag = '"%s-N"' % o['slo_etag'].strip('"') else: # Normal objects just use the MD5 etag = '"%s"' % o['hash'] # This also catches sufficiently-old SLOs, but we have # no way to identify those from container listings SubElement(contents, 'ETag').text = etag SubElement(contents, 'Size').text = str(o['bytes']) if fetch_owner or listing_type != 'version-2': owner = SubElement(contents, 'Owner') SubElement(owner, 'ID').text = req.user_id SubElement(owner, 'DisplayName').text = req.user_id SubElement(contents, 'StorageClass').text = 'STANDARD' for o in objects: if 'subdir' in o: common_prefixes = SubElement(elem, 'CommonPrefixes') name = o['subdir'] if encoding_type == 'url': name = quote(name.encode('utf-8')) SubElement(common_prefixes, 'Prefix').text = name body = tostring(elem) return HTTPOk(body=body, content_type='application/xml')
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = { 'Accept': 'application/json', sysmeta_header('object', 'upload-id'): upload_id } for key, val in resp.headers.items(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type container = req.container_name + MULTIUPLOAD_SUFFIX s3_etag_hasher = md5(usedforsecurity=False) manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') if 'content-md5' in req.headers: # If an MD5 was provided, we need to verify it. # Note that S3Request already took care of translating to ETag if req.headers['etag'] != md5( xml, usedforsecurity=False).hexdigest(): raise BadDigest(content_md5=req.headers['content-md5']) # We're only interested in the body here, in the # multipart-upload controller -- *don't* let it get # plumbed down to the object-server del req.headers['etag'] complete_elem = fromstring(xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = normalize_etag(part_elem.find('./ETag').text) if len(etag) != 32 or any(c not in '0123456789abcdef' for c in etag): raise InvalidPart(upload_id=upload_id, part_number=part_number) manifest.append({ 'path': '/%s/%s/%s/%d' % (wsgi_to_str(container), wsgi_to_str( req.object_name), upload_id, part_number), 'etag': etag }) s3_etag_hasher.update(binascii.a2b_hex(etag)) except (XMLSyntaxError, DocumentInvalid): # NB: our schema definitions catch uploads with no parts here raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest)) s3_etag_header = sysmeta_header('object', 'etag') if resp.sysmeta_headers.get(s3_etag_header) == s3_etag: # This header should only already be present if the upload marker # has been cleaned up and the current target uses the same # upload-id; assuming the segments to use haven't changed, the work # is already done return HTTPOk(body=_make_complete_body(req, s3_etag, False), content_type='application/xml') headers[s3_etag_header] = s3_etag # Leave base header value blank; SLO will populate c_etag = '; s3_etag=%s' % s3_etag headers[get_container_update_override_key('etag')] = c_etag too_small_message = ('s3api requires that each segment be at least ' '%d bytes' % self.conf.min_segment_size) def size_checker(manifest): # Check the size of each segment except the last and make sure # they are all more than the minimum upload chunk size. # Note that we need to use the *internal* keys, since we're # looking at the manifest that's about to be written. return [(item['name'], too_small_message) for item in manifest[:-1] if item and item['bytes'] < self.conf.min_segment_size] req.environ['swift.callback.slo_manifest_hook'] = size_checker start_time = time.time() def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={ 'multipart-manifest': 'put', 'heartbeat': 'on' }, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if time.time() - start_time < 10: # Include some grace period to keep # ceph-s3tests happy continue if not yielded_anything: yield (b'<?xml version="1.0" ' b'encoding="UTF-8"?>\n') yielded_anything = True yield chunk continue body.append(chunk) body = json.loads(b''.join(body)) if body['Response Status'] != '201 Created': for seg, err in body['Errors']: if err == too_small_message: raise EntityTooSmall() elif err in ('Etag Mismatch', '404 Not Found'): raise InvalidPart(upload_id=upload_id) raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) if too_small_message in msg: raise EntityTooSmall(msg) elif ', Etag Mismatch' in msg: raise InvalidPart(upload_id=upload_id) elif ', 404 Not Found' in msg: raise InvalidPart(upload_id=upload_id) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # The important thing is that we wrote out a tombstone to # make sure the marker got cleaned up. If it's already # gone (e.g., because of concurrent completes or a retried # complete), so much the better. pass yield _make_complete_body(req, s3_etag, yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield b'\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk resp = HTTPOk() # assume we're good for now... but see above! resp.app_iter = reiterate(response_iter()) resp.content_type = "application/xml" return resp
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = {'Accept': 'application/json'} for key, val in resp.headers.items(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type container = req.container_name + MULTIUPLOAD_SUFFIX s3_etag_hasher = md5() manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') if 'content-md5' in req.headers: # If an MD5 was provided, we need to verify it. # Note that S3Request already took care of translating to ETag if req.headers['etag'] != md5(xml).hexdigest(): raise BadDigest(content_md5=req.headers['content-md5']) # We're only interested in the body here, in the # multipart-upload controller -- *don't* let it get # plumbed down to the object-server del req.headers['etag'] complete_elem = fromstring( xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = part_elem.find('./ETag').text if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"': # strip double quotes etag = etag[1:-1] if len(etag) != 32 or any(c not in '0123456789abcdef' for c in etag): raise InvalidPart(upload_id=upload_id, part_number=part_number) manifest.append({ 'path': '/%s/%s/%s/%d' % ( container, req.object_name, upload_id, part_number), 'etag': etag}) s3_etag_hasher.update(binascii.a2b_hex(etag)) except (XMLSyntaxError, DocumentInvalid): # NB: our schema definitions catch uploads with no parts here raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest)) headers[sysmeta_header('object', 'etag')] = s3_etag # Leave base header value blank; SLO will populate c_etag = '; s3_etag=%s' % s3_etag headers['X-Object-Sysmeta-Container-Update-Override-Etag'] = c_etag too_small_message = ('s3api requires that each segment be at least ' '%d bytes' % self.conf.min_segment_size) def size_checker(manifest): # Check the size of each segment except the last and make sure # they are all more than the minimum upload chunk size. # Note that we need to use the *internal* keys, since we're # looking at the manifest that's about to be written. return [ (item['name'], too_small_message) for item in manifest[:-1] if item and item['bytes'] < self.conf.min_segment_size] req.environ['swift.callback.slo_manifest_hook'] = size_checker start_time = time.time() def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response( self.app, 'PUT', body=json.dumps(manifest), query={'multipart-manifest': 'put', 'heartbeat': 'on'}, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if time.time() - start_time < 10: # Include some grace period to keep # ceph-s3tests happy continue if not yielded_anything: yield (b'<?xml version="1.0" ' b'encoding="UTF-8"?>\n') yielded_anything = True yield chunk continue body.append(chunk) body = json.loads(b''.join(body)) if body['Response Status'] != '201 Created': for seg, err in body['Errors']: if err == too_small_message: raise EntityTooSmall() elif err in ('Etag Mismatch', '404 Not Found'): raise InvalidPart(upload_id=upload_id) raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) if too_small_message in msg: raise EntityTooSmall(msg) elif ', Etag Mismatch' in msg: raise InvalidPart(upload_id=upload_id) elif ', 404 Not Found' in msg: raise InvalidPart(upload_id=upload_id) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # We know that this existed long enough for us to HEAD pass result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at # the request header when the port is non default value and it # makes req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) # Why are we doing our own port parsing? Because py3 decided # to start raising ValueErrors on access after parsing such # an invalid port netloc = parsed_url.netloc.split('@')[-1].split(']')[-1] if ':' in netloc: port = netloc.split(':', 2)[1] host_url += ':%s' % port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag resp.headers.pop('ETag', None) if yielded_anything: yield b'\n' yield tostring(result_elem, xml_declaration=not yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield b'\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk resp = HTTPOk() # assume we're good for now... but see above! resp.app_iter = reiterate(response_iter()) resp.content_type = "application/xml" return resp