def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={ 'multipart-manifest': 'put', 'heartbeat': 'on' }, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if time.time() - start_time < 10: # Include some grace period to keep # ceph-s3tests happy continue if not yielded_anything: yield (b'<?xml version="1.0" ' b'encoding="UTF-8"?>\n') yielded_anything = True yield chunk continue body.append(chunk) body = json.loads(b''.join(body)) if body['Response Status'] != '201 Created': for seg, err in body['Errors']: if err == too_small_message: raise EntityTooSmall() elif err in ('Etag Mismatch', '404 Not Found'): raise InvalidPart(upload_id=upload_id) raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) if too_small_message in msg: raise EntityTooSmall(msg) elif ', Etag Mismatch' in msg: raise InvalidPart(upload_id=upload_id) elif ', 404 Not Found' in msg: raise InvalidPart(upload_id=upload_id) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # The important thing is that we wrote out a tombstone to # make sure the marker got cleaned up. If it's already # gone (e.g., because of concurrent completes or a retried # complete), so much the better. pass yield _make_complete_body(req, s3_etag, yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield b'\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk
def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={ 'multipart-manifest': 'put', 'heartbeat': 'on' }, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if time.time() - start_time < 10: # Include some grace period to keep # ceph-s3tests happy continue if not yielded_anything: yield (b'<?xml version="1.0" ' b'encoding="UTF-8"?>\n') yielded_anything = True yield chunk continue body.append(chunk) body = json.loads(b''.join(body)) if body['Response Status'] != '201 Created': for seg, err in body['Errors']: if err == too_small_message: raise EntityTooSmall() elif err in ('Etag Mismatch', '404 Not Found'): raise InvalidPart(upload_id=upload_id) raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) if too_small_message in msg: raise EntityTooSmall(msg) elif ', Etag Mismatch' in msg: raise InvalidPart(upload_id=upload_id) elif ', 404 Not Found' in msg: raise InvalidPart(upload_id=upload_id) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # We know that this existed long enough for us to HEAD pass result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at # the request header when the port is non default value and it # makes req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) # Why are we doing our own port parsing? Because py3 decided # to start raising ValueErrors on access after parsing such # an invalid port netloc = parsed_url.netloc.split('@')[-1].split(']')[-1] if ':' in netloc: port = netloc.split(':', 2)[1] host_url += ':%s' % port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag resp.headers.pop('ETag', None) if yielded_anything: yield b'\n' yield tostring(result_elem, xml_declaration=not yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield b'\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk
def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={ 'multipart-manifest': 'put', 'heartbeat': 'on' }, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if not yielded_anything: yield ('<?xml version="1.0" ' 'encoding="UTF-8"?>\n') yielded_anything = True yield chunk body.append(chunk) body = json.loads(''.join(body)) if body['Response Status'] != '201 Created': raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) expected_msg = ('too small; each segment must be ' 'at least 1 byte') if expected_msg in msg: # FIXME: AWS S3 allows a smaller object than 5 MB if # there is only one part. Use a COPY request to copy # the part object from the segments container instead. raise EntityTooSmall(msg) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # We know that this existed long enough for us to HEAD pass result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at # the request header when the port is non default value and it # makes req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) if parsed_url.port: host_url += ':%s' % parsed_url.port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag resp.headers.pop('ETag', None) if yielded_anything: yield '\n' yield tostring(result_elem, xml_declaration=not yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield '\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = {} for key, val in resp.headers.iteritems(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type # Query for the objects in the segments area to make sure it completed query = { 'format': 'json', 'prefix': '%s/%s/' % (req.object_name, upload_id), 'delimiter': '/' } container = req.container_name + MULTIUPLOAD_SUFFIX resp = req.get_response(self.app, 'GET', container, '', query=query) objinfo = json.loads(resp.body) objtable = dict((o['name'], { 'path': '/'.join(['', container, o['name']]), 'etag': o['hash'], 'size_bytes': o['bytes'] }) for o in objinfo) manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') complete_elem = fromstring(xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = part_elem.find('./ETag').text if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"': # strip double quotes etag = etag[1:-1] info = objtable.get("%s/%s/%s" % (req.object_name, upload_id, part_number)) if info is None or info['etag'] != etag: raise InvalidPart(upload_id=upload_id, part_number=part_number) info['size_bytes'] = int(info['size_bytes']) manifest.append(info) except (XMLSyntaxError, DocumentInvalid): raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise # Check the size of each segment except the last and make sure they are # all more than the minimum upload chunk size for info in manifest[:-1]: if info['size_bytes'] < self.conf.min_segment_size: raise EntityTooSmall() try: # TODO: add support for versioning if manifest: resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={'multipart-manifest': 'put'}, headers=headers) else: # the upload must have consisted of a single zero-length part # just write it directly resp = req.get_response(self.app, 'PUT', body='', headers=headers) except BadSwiftRequest as e: msg = str(e) expected_msg = 'too small; each segment must be at least 1 byte' if expected_msg in msg: # FIXME: AWS S3 allows a smaller object than 5 MB if there is # only one part. Use a COPY request to copy the part object # from the segments container instead. raise EntityTooSmall(msg) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: pass # We know that this existed long enough for us to HEAD result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at the # request header when the port is non default value and it makes # req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) if parsed_url.port: host_url += ':%s' % parsed_url.port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = resp.etag resp.body = tostring(result_elem) resp.status = 200 resp.content_type = "application/xml" return resp
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = {'Accept': 'application/json'} for key, val in resp.headers.items(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type # Query for the objects in the segments area to make sure it completed query = { 'format': 'json', 'prefix': '%s/%s/' % (req.object_name, upload_id), 'delimiter': '/' } container = req.container_name + MULTIUPLOAD_SUFFIX resp = req.get_response(self.app, 'GET', container, '', query=query) objinfo = json.loads(resp.body) objtable = dict((o['name'], { 'path': '/'.join(['', container, o['name']]), 'etag': o['hash'], 'size_bytes': o['bytes'] }) for o in objinfo) s3_etag_hasher = md5() manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') complete_elem = fromstring(xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = part_elem.find('./ETag').text if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"': # strip double quotes etag = etag[1:-1] info = objtable.get("%s/%s/%s" % (req.object_name, upload_id, part_number)) if info is None or info['etag'] != etag: raise InvalidPart(upload_id=upload_id, part_number=part_number) s3_etag_hasher.update(etag.decode('hex')) info['size_bytes'] = int(info['size_bytes']) manifest.append(info) except (XMLSyntaxError, DocumentInvalid): # NB: our schema definitions catch uploads with no parts here raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest)) headers[sysmeta_header('object', 'etag')] = s3_etag # Leave base header value blank; SLO will populate c_etag = '; s3_etag=%s' % s3_etag headers['X-Object-Sysmeta-Container-Update-Override-Etag'] = c_etag # Check the size of each segment except the last and make sure they are # all more than the minimum upload chunk size for info in manifest[:-1]: if info['size_bytes'] < self.conf.min_segment_size: raise EntityTooSmall() def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={ 'multipart-manifest': 'put', 'heartbeat': 'on' }, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if not yielded_anything: yield ('<?xml version="1.0" ' 'encoding="UTF-8"?>\n') yielded_anything = True yield chunk body.append(chunk) body = json.loads(''.join(body)) if body['Response Status'] != '201 Created': raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) expected_msg = ('too small; each segment must be ' 'at least 1 byte') if expected_msg in msg: # FIXME: AWS S3 allows a smaller object than 5 MB if # there is only one part. Use a COPY request to copy # the part object from the segments container instead. raise EntityTooSmall(msg) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # We know that this existed long enough for us to HEAD pass result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at # the request header when the port is non default value and it # makes req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) if parsed_url.port: host_url += ':%s' % parsed_url.port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag resp.headers.pop('ETag', None) if yielded_anything: yield '\n' yield tostring(result_elem, xml_declaration=not yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield '\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk resp = HTTPOk() # assume we're good for now... but see above! resp.app_iter = reiterate(response_iter()) resp.content_type = "application/xml" return resp