def _app_call(self, env): """ Ensures start_response has been called before returning. """ self._response_status = None self._response_headers = None self._response_exc_info = None resp = self.app(env, self._start_response) # if start_response has not been called, iterate until we've got a # non-empty chunk, by which time the app *should* have called it if self._response_status is None: resp = reiterate(resp) return resp
def call_application(self, application): """ Calls the application with this request's environment. Returns the status, headers, and app_iter for the response as a tuple. :param application: the WSGI application to call """ output = [] captured = [] def start_response(status, headers, exc_info=None): captured[:] = [status, headers, exc_info] return output.append app_iter = application(self.environ, start_response) if not app_iter: app_iter = output if not captured: app_iter = reiterate(app_iter) return (captured[0], captured[1], app_iter)
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = { 'Accept': 'application/json', sysmeta_header('object', 'upload-id'): upload_id } for key, val in resp.headers.items(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type container = req.container_name + MULTIUPLOAD_SUFFIX s3_etag_hasher = md5(usedforsecurity=False) manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') if 'content-md5' in req.headers: # If an MD5 was provided, we need to verify it. # Note that S3Request already took care of translating to ETag if req.headers['etag'] != md5( xml, usedforsecurity=False).hexdigest(): raise BadDigest(content_md5=req.headers['content-md5']) # We're only interested in the body here, in the # multipart-upload controller -- *don't* let it get # plumbed down to the object-server del req.headers['etag'] complete_elem = fromstring(xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = normalize_etag(part_elem.find('./ETag').text) if len(etag) != 32 or any(c not in '0123456789abcdef' for c in etag): raise InvalidPart(upload_id=upload_id, part_number=part_number) manifest.append({ 'path': '/%s/%s/%s/%d' % (wsgi_to_str(container), wsgi_to_str( req.object_name), upload_id, part_number), 'etag': etag }) s3_etag_hasher.update(binascii.a2b_hex(etag)) except (XMLSyntaxError, DocumentInvalid): # NB: our schema definitions catch uploads with no parts here raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest)) s3_etag_header = sysmeta_header('object', 'etag') if resp.sysmeta_headers.get(s3_etag_header) == s3_etag: # This header should only already be present if the upload marker # has been cleaned up and the current target uses the same # upload-id; assuming the segments to use haven't changed, the work # is already done return HTTPOk(body=_make_complete_body(req, s3_etag, False), content_type='application/xml') headers[s3_etag_header] = s3_etag # Leave base header value blank; SLO will populate c_etag = '; s3_etag=%s' % s3_etag headers[get_container_update_override_key('etag')] = c_etag too_small_message = ('s3api requires that each segment be at least ' '%d bytes' % self.conf.min_segment_size) def size_checker(manifest): # Check the size of each segment except the last and make sure # they are all more than the minimum upload chunk size. # Note that we need to use the *internal* keys, since we're # looking at the manifest that's about to be written. return [(item['name'], too_small_message) for item in manifest[:-1] if item and item['bytes'] < self.conf.min_segment_size] req.environ['swift.callback.slo_manifest_hook'] = size_checker start_time = time.time() def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={ 'multipart-manifest': 'put', 'heartbeat': 'on' }, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if time.time() - start_time < 10: # Include some grace period to keep # ceph-s3tests happy continue if not yielded_anything: yield (b'<?xml version="1.0" ' b'encoding="UTF-8"?>\n') yielded_anything = True yield chunk continue body.append(chunk) body = json.loads(b''.join(body)) if body['Response Status'] != '201 Created': for seg, err in body['Errors']: if err == too_small_message: raise EntityTooSmall() elif err in ('Etag Mismatch', '404 Not Found'): raise InvalidPart(upload_id=upload_id) raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) if too_small_message in msg: raise EntityTooSmall(msg) elif ', Etag Mismatch' in msg: raise InvalidPart(upload_id=upload_id) elif ', 404 Not Found' in msg: raise InvalidPart(upload_id=upload_id) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # The important thing is that we wrote out a tombstone to # make sure the marker got cleaned up. If it's already # gone (e.g., because of concurrent completes or a retried # complete), so much the better. pass yield _make_complete_body(req, s3_etag, yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield b'\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk resp = HTTPOk() # assume we're good for now... but see above! resp.app_iter = reiterate(response_iter()) resp.content_type = "application/xml" return resp
def _perform_subrequest(self, orig_env, attributes, fp, keys): """ Performs the subrequest and returns the response. :param orig_env: The WSGI environment dict; will only be used to form a new env for the subrequest. :param attributes: dict of the attributes of the form so far. :param fp: The file-like object containing the request body. :param keys: The account keys to validate the signature with. :returns: (status_line, headers_list) """ if not keys: raise FormUnauthorized('invalid signature') try: max_file_size = int(attributes.get('max_file_size') or 0) except ValueError: raise FormInvalid('max_file_size not an integer') subenv = make_pre_authed_env(orig_env, 'PUT', agent=None, swift_source='FP') if 'QUERY_STRING' in subenv: del subenv['QUERY_STRING'] subenv['HTTP_TRANSFER_ENCODING'] = 'chunked' subenv['wsgi.input'] = _CappedFileLikeObject(fp, max_file_size) if not subenv['PATH_INFO'].endswith('/') and \ subenv['PATH_INFO'].count('/') < 4: subenv['PATH_INFO'] += '/' subenv['PATH_INFO'] += str_to_wsgi( attributes['filename'] or 'filename') if 'x_delete_at' in attributes: try: subenv['HTTP_X_DELETE_AT'] = int(attributes['x_delete_at']) except ValueError: raise FormInvalid('x_delete_at not an integer: ' 'Unix timestamp required.') if 'x_delete_after' in attributes: try: subenv['HTTP_X_DELETE_AFTER'] = int( attributes['x_delete_after']) except ValueError: raise FormInvalid('x_delete_after not an integer: ' 'Number of seconds required.') if 'content-type' in attributes: subenv['CONTENT_TYPE'] = \ attributes['content-type'] or 'application/octet-stream' if 'content-encoding' in attributes: subenv['HTTP_CONTENT_ENCODING'] = attributes['content-encoding'] try: if int(attributes.get('expires') or 0) < time(): raise FormUnauthorized('form expired') except ValueError: raise FormInvalid('expired not an integer') hmac_body = '%s\n%s\n%s\n%s\n%s' % ( wsgi_to_str(orig_env['PATH_INFO']), attributes.get('redirect') or '', attributes.get('max_file_size') or '0', attributes.get('max_file_count') or '0', attributes.get('expires') or '0') if six.PY3: hmac_body = hmac_body.encode('utf-8') has_valid_sig = False for key in keys: # Encode key like in swift.common.utls.get_hmac. if not isinstance(key, six.binary_type): key = key.encode('utf8') sig = hmac.new(key, hmac_body, sha1).hexdigest() if streq_const_time(sig, (attributes.get('signature') or 'invalid')): has_valid_sig = True if not has_valid_sig: raise FormUnauthorized('invalid signature') substatus = [None] subheaders = [None] wsgi_input = subenv['wsgi.input'] def _start_response(status, headers, exc_info=None): if wsgi_input.file_size_exceeded: raise EOFError("max_file_size exceeded") substatus[0] = status subheaders[0] = headers # reiterate to ensure the response started, # but drop any data on the floor close_if_possible(reiterate(self.app(subenv, _start_response))) return substatus[0], subheaders[0]
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = {'Accept': 'application/json'} for key, val in resp.headers.items(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type container = req.container_name + MULTIUPLOAD_SUFFIX s3_etag_hasher = md5() manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') if 'content-md5' in req.headers: # If an MD5 was provided, we need to verify it. # Note that S3Request already took care of translating to ETag if req.headers['etag'] != md5(xml).hexdigest(): raise BadDigest(content_md5=req.headers['content-md5']) # We're only interested in the body here, in the # multipart-upload controller -- *don't* let it get # plumbed down to the object-server del req.headers['etag'] complete_elem = fromstring(xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = normalize_etag(part_elem.find('./ETag').text) if len(etag) != 32 or any(c not in '0123456789abcdef' for c in etag): raise InvalidPart(upload_id=upload_id, part_number=part_number) manifest.append({ 'path': '/%s/%s/%s/%d' % (container, req.object_name, upload_id, part_number), 'etag': etag }) s3_etag_hasher.update(binascii.a2b_hex(etag)) except (XMLSyntaxError, DocumentInvalid): # NB: our schema definitions catch uploads with no parts here raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest)) headers[sysmeta_header('object', 'etag')] = s3_etag # Leave base header value blank; SLO will populate c_etag = '; s3_etag=%s' % s3_etag headers[get_container_update_override_key('etag')] = c_etag too_small_message = ('s3api requires that each segment be at least ' '%d bytes' % self.conf.min_segment_size) def size_checker(manifest): # Check the size of each segment except the last and make sure # they are all more than the minimum upload chunk size. # Note that we need to use the *internal* keys, since we're # looking at the manifest that's about to be written. return [(item['name'], too_small_message) for item in manifest[:-1] if item and item['bytes'] < self.conf.min_segment_size] req.environ['swift.callback.slo_manifest_hook'] = size_checker start_time = time.time() def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={ 'multipart-manifest': 'put', 'heartbeat': 'on' }, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if time.time() - start_time < 10: # Include some grace period to keep # ceph-s3tests happy continue if not yielded_anything: yield (b'<?xml version="1.0" ' b'encoding="UTF-8"?>\n') yielded_anything = True yield chunk continue body.append(chunk) body = json.loads(b''.join(body)) if body['Response Status'] != '201 Created': for seg, err in body['Errors']: if err == too_small_message: raise EntityTooSmall() elif err in ('Etag Mismatch', '404 Not Found'): raise InvalidPart(upload_id=upload_id) raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) if too_small_message in msg: raise EntityTooSmall(msg) elif ', Etag Mismatch' in msg: raise InvalidPart(upload_id=upload_id) elif ', 404 Not Found' in msg: raise InvalidPart(upload_id=upload_id) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # We know that this existed long enough for us to HEAD pass result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at # the request header when the port is non default value and it # makes req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) # Why are we doing our own port parsing? Because py3 decided # to start raising ValueErrors on access after parsing such # an invalid port netloc = parsed_url.netloc.split('@')[-1].split(']')[-1] if ':' in netloc: port = netloc.split(':', 2)[1] host_url += ':%s' % port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag resp.headers.pop('ETag', None) if yielded_anything: yield b'\n' yield tostring(result_elem, xml_declaration=not yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield b'\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk resp = HTTPOk() # assume we're good for now... but see above! resp.app_iter = reiterate(response_iter()) resp.content_type = "application/xml" return resp
def iter_response(iterable): iterator = reiterate(iterable) content_length = None for h, v in start_response_args[0][1]: if h.lower() == 'content-length': content_length = int(v) break elif h.lower() == 'transfer-encoding': break else: if isinstance(iterator, list): content_length = sum(len(i) for i in iterator) start_response_args[0][1].append( ('Content-Length', str(content_length))) req = Request(env) method = self.method_from_req(req) if method == 'HEAD': content_length = 0 if content_length is not None: iterator = enforce_byte_count(iterator, content_length) wire_status_int = int(start_response_args[0][0].split(' ', 1)[0]) resp_headers = dict(start_response_args[0][1]) start_response(*start_response_args[0]) # Log timing information for time-to-first-byte (GET requests only) ttfb = 0.0 if method == 'GET': policy_index = get_policy_index(req.headers, resp_headers) metric_name = self.statsd_metric_name(req, wire_status_int, method) metric_name_policy = self.statsd_metric_name_policy( req, wire_status_int, method, policy_index) ttfb = time.time() - start_time if metric_name: self.access_logger.timing( metric_name + '.first-byte.timing', ttfb * 1000) if metric_name_policy: self.access_logger.timing( metric_name_policy + '.first-byte.timing', ttfb * 1000) bytes_sent = 0 client_disconnect = False start_status = wire_status_int try: for chunk in iterator: bytes_sent += len(chunk) yield chunk except StopIteration: # iterator was depleted return except GeneratorExit: # generator was closed before we finished client_disconnect = True raise except Exception: start_status = 500 raise finally: status_int = status_int_for_logging(start_status, client_disconnect) self.log_request(req, status_int, input_proxy.bytes_received, bytes_sent, start_time, time.time(), resp_headers=resp_headers, ttfb=ttfb, wire_status_int=wire_status_int) iterator.close()
def _perform_subrequest(self, orig_env, attributes, fp, keys): """ Performs the subrequest and returns the response. :param orig_env: The WSGI environment dict; will only be used to form a new env for the subrequest. :param attributes: dict of the attributes of the form so far. :param fp: The file-like object containing the request body. :param keys: The account keys to validate the signature with. :returns: (status_line, headers_list) """ if not keys: raise FormUnauthorized('invalid signature') try: max_file_size = int(attributes.get('max_file_size') or 0) except ValueError: raise FormInvalid('max_file_size not an integer') subenv = make_pre_authed_env(orig_env, 'PUT', agent=None, swift_source='FP') if 'QUERY_STRING' in subenv: del subenv['QUERY_STRING'] subenv['HTTP_TRANSFER_ENCODING'] = 'chunked' subenv['wsgi.input'] = _CappedFileLikeObject(fp, max_file_size) if not subenv['PATH_INFO'].endswith('/') and \ subenv['PATH_INFO'].count('/') < 4: subenv['PATH_INFO'] += '/' subenv['PATH_INFO'] += attributes['filename'] or 'filename' if 'x_delete_at' in attributes: try: subenv['HTTP_X_DELETE_AT'] = int(attributes['x_delete_at']) except ValueError: raise FormInvalid('x_delete_at not an integer: ' 'Unix timestamp required.') if 'x_delete_after' in attributes: try: subenv['HTTP_X_DELETE_AFTER'] = int( attributes['x_delete_after']) except ValueError: raise FormInvalid('x_delete_after not an integer: ' 'Number of seconds required.') if 'content-type' in attributes: subenv['CONTENT_TYPE'] = \ attributes['content-type'] or 'application/octet-stream' if 'content-encoding' in attributes: subenv['HTTP_CONTENT_ENCODING'] = attributes['content-encoding'] try: if int(attributes.get('expires') or 0) < time(): raise FormUnauthorized('form expired') except ValueError: raise FormInvalid('expired not an integer') hmac_body = '%s\n%s\n%s\n%s\n%s' % ( orig_env['PATH_INFO'], attributes.get('redirect') or '', attributes.get('max_file_size') or '0', attributes.get('max_file_count') or '0', attributes.get('expires') or '0') if six.PY3: hmac_body = hmac_body.encode('utf-8') has_valid_sig = False for key in keys: sig = hmac.new(key, hmac_body, sha1).hexdigest() if streq_const_time(sig, (attributes.get('signature') or 'invalid')): has_valid_sig = True if not has_valid_sig: raise FormUnauthorized('invalid signature') substatus = [None] subheaders = [None] wsgi_input = subenv['wsgi.input'] def _start_response(status, headers, exc_info=None): if wsgi_input.file_size_exceeded: raise EOFError("max_file_size exceeded") substatus[0] = status subheaders[0] = headers # reiterate to ensure the response started, # but drop any data on the floor close_if_possible(reiterate(self.app(subenv, _start_response))) return substatus[0], subheaders[0]
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = {'Accept': 'application/json'} for key, val in resp.headers.items(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type # Query for the objects in the segments area to make sure it completed query = { 'format': 'json', 'prefix': '%s/%s/' % (req.object_name, upload_id), 'delimiter': '/' } container = req.container_name + MULTIUPLOAD_SUFFIX resp = req.get_response(self.app, 'GET', container, '', query=query) objinfo = json.loads(resp.body) objtable = dict((o['name'], { 'path': '/'.join(['', container, o['name']]), 'etag': o['hash'], 'size_bytes': o['bytes'] }) for o in objinfo) s3_etag_hasher = md5() manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') complete_elem = fromstring(xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = part_elem.find('./ETag').text if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"': # strip double quotes etag = etag[1:-1] info = objtable.get("%s/%s/%s" % (req.object_name, upload_id, part_number)) if info is None or info['etag'] != etag: raise InvalidPart(upload_id=upload_id, part_number=part_number) s3_etag_hasher.update(etag.decode('hex')) info['size_bytes'] = int(info['size_bytes']) manifest.append(info) except (XMLSyntaxError, DocumentInvalid): # NB: our schema definitions catch uploads with no parts here raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest)) headers[sysmeta_header('object', 'etag')] = s3_etag # Leave base header value blank; SLO will populate c_etag = '; s3_etag=%s' % s3_etag headers['X-Object-Sysmeta-Container-Update-Override-Etag'] = c_etag # Check the size of each segment except the last and make sure they are # all more than the minimum upload chunk size for info in manifest[:-1]: if info['size_bytes'] < self.conf.min_segment_size: raise EntityTooSmall() def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={ 'multipart-manifest': 'put', 'heartbeat': 'on' }, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if not yielded_anything: yield ('<?xml version="1.0" ' 'encoding="UTF-8"?>\n') yielded_anything = True yield chunk body.append(chunk) body = json.loads(''.join(body)) if body['Response Status'] != '201 Created': raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) expected_msg = ('too small; each segment must be ' 'at least 1 byte') if expected_msg in msg: # FIXME: AWS S3 allows a smaller object than 5 MB if # there is only one part. Use a COPY request to copy # the part object from the segments container instead. raise EntityTooSmall(msg) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # We know that this existed long enough for us to HEAD pass result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at # the request header when the port is non default value and it # makes req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) if parsed_url.port: host_url += ':%s' % parsed_url.port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag resp.headers.pop('ETag', None) if yielded_anything: yield '\n' yield tostring(result_elem, xml_declaration=not yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield '\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk resp = HTTPOk() # assume we're good for now... but see above! resp.app_iter = reiterate(response_iter()) resp.content_type = "application/xml" return resp
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = {'Accept': 'application/json'} for key, val in resp.headers.items(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type container = req.container_name + MULTIUPLOAD_SUFFIX s3_etag_hasher = md5() manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') if 'content-md5' in req.headers: # If an MD5 was provided, we need to verify it. # Note that S3Request already took care of translating to ETag if req.headers['etag'] != md5(xml).hexdigest(): raise BadDigest(content_md5=req.headers['content-md5']) # We're only interested in the body here, in the # multipart-upload controller -- *don't* let it get # plumbed down to the object-server del req.headers['etag'] complete_elem = fromstring( xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = part_elem.find('./ETag').text if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"': # strip double quotes etag = etag[1:-1] if len(etag) != 32 or any(c not in '0123456789abcdef' for c in etag): raise InvalidPart(upload_id=upload_id, part_number=part_number) manifest.append({ 'path': '/%s/%s/%s/%d' % ( container, req.object_name, upload_id, part_number), 'etag': etag}) s3_etag_hasher.update(binascii.a2b_hex(etag)) except (XMLSyntaxError, DocumentInvalid): # NB: our schema definitions catch uploads with no parts here raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest)) headers[sysmeta_header('object', 'etag')] = s3_etag # Leave base header value blank; SLO will populate c_etag = '; s3_etag=%s' % s3_etag headers['X-Object-Sysmeta-Container-Update-Override-Etag'] = c_etag too_small_message = ('s3api requires that each segment be at least ' '%d bytes' % self.conf.min_segment_size) def size_checker(manifest): # Check the size of each segment except the last and make sure # they are all more than the minimum upload chunk size. # Note that we need to use the *internal* keys, since we're # looking at the manifest that's about to be written. return [ (item['name'], too_small_message) for item in manifest[:-1] if item and item['bytes'] < self.conf.min_segment_size] req.environ['swift.callback.slo_manifest_hook'] = size_checker start_time = time.time() def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response( self.app, 'PUT', body=json.dumps(manifest), query={'multipart-manifest': 'put', 'heartbeat': 'on'}, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if time.time() - start_time < 10: # Include some grace period to keep # ceph-s3tests happy continue if not yielded_anything: yield (b'<?xml version="1.0" ' b'encoding="UTF-8"?>\n') yielded_anything = True yield chunk continue body.append(chunk) body = json.loads(b''.join(body)) if body['Response Status'] != '201 Created': for seg, err in body['Errors']: if err == too_small_message: raise EntityTooSmall() elif err in ('Etag Mismatch', '404 Not Found'): raise InvalidPart(upload_id=upload_id) raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) if too_small_message in msg: raise EntityTooSmall(msg) elif ', Etag Mismatch' in msg: raise InvalidPart(upload_id=upload_id) elif ', 404 Not Found' in msg: raise InvalidPart(upload_id=upload_id) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # We know that this existed long enough for us to HEAD pass result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at # the request header when the port is non default value and it # makes req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) # Why are we doing our own port parsing? Because py3 decided # to start raising ValueErrors on access after parsing such # an invalid port netloc = parsed_url.netloc.split('@')[-1].split(']')[-1] if ':' in netloc: port = netloc.split(':', 2)[1] host_url += ':%s' % port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag resp.headers.pop('ETag', None) if yielded_anything: yield b'\n' yield tostring(result_elem, xml_declaration=not yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield b'\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk resp = HTTPOk() # assume we're good for now... but see above! resp.app_iter = reiterate(response_iter()) resp.content_type = "application/xml" return resp