def direct_put_object(node, part, account, container, name, contents, content_length=None, etag=None, content_type=None, headers=None, conn_timeout=5, response_timeout=15, chunk_size=65535): """ Put object directly from the object server. :param node: node dictionary from the ring :param part: partition the container is on :param account: account name :param container: container name :param name: object name :param contents: an iterable or string to read object data from :param content_length: value to send as content-length header :param etag: etag of contents :param content_type: value to send as content-type header :param headers: additional headers to include in the request :param conn_timeout: timeout in seconds for establishing the connection :param response_timeout: timeout in seconds for getting the response :param chunk_size: if defined, chunk size of data to send. :returns: etag from the server response :raises ClientException: HTTP PUT request failed """ path = _make_path(account, container, name) if headers is None: headers = {} if etag: headers['ETag'] = normalize_etag(etag) if content_type is not None: headers['Content-Type'] = content_type else: headers['Content-Type'] = 'application/octet-stream' # Incase the caller want to insert an object with specific age add_ts = 'X-Timestamp' not in headers resp = _make_req(node, part, 'PUT', path, gen_headers(headers, add_ts=add_ts), 'Object', conn_timeout, response_timeout, contents=contents, content_length=content_length, chunk_size=chunk_size) return normalize_etag(resp.getheader('etag'))
def handle_put(self, req, start_response): self._check_headers(req) keys = self.get_keys(req.environ, required=['object', 'container']) self.encrypt_user_metadata(req, keys) enc_input_proxy = EncInputWrapper(self.crypto, keys, req, self.logger) req.environ['wsgi.input'] = enc_input_proxy resp = self._app_call(req.environ) # If an etag is in the response headers and a plaintext etag was # calculated, then overwrite the response value with the plaintext etag # provided it matches the ciphertext etag. If it does not match then do # not overwrite and allow the response value to return to client. mod_resp_headers = self._response_headers if (is_success(self._get_status_int()) and enc_input_proxy.plaintext_md5): plaintext_etag = enc_input_proxy.plaintext_md5.hexdigest() ciphertext_etag = enc_input_proxy.ciphertext_md5.hexdigest() mod_resp_headers = [ (h, v if (h.lower() != 'etag' or normalize_etag(v) != ciphertext_etag) else plaintext_etag) for h, v in mod_resp_headers ] start_response(self._response_status, mod_resp_headers, self._response_exc_info) return resp
def GETorHEAD(self, req): had_match = False for match_header in ('if-match', 'if-none-match'): if match_header not in req.headers: continue had_match = True for value in list_from_csv(req.headers[match_header]): value = normalize_etag(value) if value.endswith('-N'): # Deal with fake S3-like etags for SLOs uploaded via Swift req.headers[match_header] += ', ' + value[:-2] if had_match: # Update where to look update_etag_is_at_header(req, sysmeta_header('object', 'etag')) resp = req.get_response(self.app) if req.method == 'HEAD': resp.app_iter = None for key in ('content-type', 'content-language', 'expires', 'cache-control', 'content-disposition', 'content-encoding'): if 'response-' + key in req.params: resp.headers[key] = req.params['response-' + key] return resp
def _validate_and_prep_request_headers(req): """ Validate that the value from x-symlink-target header is well formatted and that the x-symlink-target-etag header (if present) does not contain problematic characters. We assume the caller ensures that x-symlink-target header is present in req.headers. :param req: HTTP request object :returns: a tuple, the full versioned path to the object (as a WSGI string) and the X-Symlink-Target-Etag header value which may be None :raise: HTTPPreconditionFailed if x-symlink-target value is not well formatted. :raise: HTTPBadRequest if the x-symlink-target value points to the request path. :raise: HTTPBadRequest if the x-symlink-target-etag value contains a semicolon, double-quote, or backslash. """ # N.B. check_path_header doesn't assert the leading slash and # copy middleware may accept the format. In the symlink, API # says apparently to use "container/object" format so add the # validation first, here. error_body = 'X-Symlink-Target header must be of the form ' \ '<container name>/<object name>' if wsgi_unquote(req.headers[TGT_OBJ_SYMLINK_HDR]).startswith('/'): raise HTTPPreconditionFailed(body=error_body, request=req, content_type='text/plain') # check container and object format container, obj = check_path_header(req, TGT_OBJ_SYMLINK_HDR, 2, error_body) req.headers[TGT_OBJ_SYMLINK_HDR] = wsgi_quote('%s/%s' % (container, obj)) # Check account format if it exists account = check_account_format( req, wsgi_unquote(req.headers[TGT_ACCT_SYMLINK_HDR])) \ if TGT_ACCT_SYMLINK_HDR in req.headers else None # Extract request path _junk, req_acc, req_cont, req_obj = req.split_path(4, 4, True) if account: req.headers[TGT_ACCT_SYMLINK_HDR] = wsgi_quote(account) else: account = req_acc # Check if symlink targets the symlink itself or not if (account, container, obj) == (req_acc, req_cont, req_obj): raise HTTPBadRequest(body='Symlink cannot target itself', request=req, content_type='text/plain') etag = normalize_etag(req.headers.get(TGT_ETAG_SYMLINK_HDR, None)) if etag and any(c in etag for c in ';"\\'): # See cgi.parse_header for why the above chars are problematic raise HTTPBadRequest(body='Bad %s format' % TGT_ETAG_SYMLINK_HDR.title(), request=req, content_type='text/plain') if not (etag or req.headers.get('Content-Type')): req.headers['Content-Type'] = 'application/symlink' return '/v1/%s/%s/%s' % (account, container, obj), etag
def test_async_updates_after_PUT_and_POST(self): # verify correct update values when PUT update and POST updates are # missed but then async updates are sent cpart, cnodes = self.container_ring.get_nodes(self.account, 'c1') client.put_container(self.url, self.token, 'c1', headers={'X-Storage-Policy': self.policy.name}) # PUT and POST to object while one container server is stopped so that # we force async updates to it kill_server((cnodes[0]['ip'], cnodes[0]['port']), self.ipport2server) content = u'stuff' client.put_object(self.url, self.token, 'c1', 'o1', contents=content, content_type='test/ctype') meta = client.head_object(self.url, self.token, 'c1', 'o1') int_client = self.make_internal_client() int_client.set_object_metadata( self.account, 'c1', 'o1', {'X-Object-Meta-Fruit': 'Tomato'}) self.assertEqual( 'Tomato', int_client.get_object_metadata(self.account, 'c1', 'o1') ['x-object-meta-fruit']) # sanity # re-start the container server and assert that it does not yet know # about the object start_server((cnodes[0]['ip'], cnodes[0]['port']), self.ipport2server) self.assertFalse(direct_client.direct_get_container( cnodes[0], cpart, self.account, 'c1')[1]) # Run the object-updaters to send the async pendings Manager(['object-updater']).once() # check the re-started container server got same update as others. # we cannot assert the actual etag value because it may be encrypted listing_etags = set() for cnode in cnodes: listing = direct_client.direct_get_container( cnode, cpart, self.account, 'c1')[1] self.assertEqual(1, len(listing)) self.assertEqual(len(content), listing[0]['bytes']) self.assertEqual('test/ctype', listing[0]['content_type']) listing_etags.add(listing[0]['hash']) self.assertEqual(1, len(listing_etags)) # check that listing meta returned to client is consistent with object # meta returned to client hdrs, listing = client.get_container(self.url, self.token, 'c1') self.assertEqual(1, len(listing)) self.assertEqual('o1', listing[0]['name']) self.assertEqual(len(content), listing[0]['bytes']) self.assertEqual(normalize_etag(meta['etag']), listing[0]['hash']) self.assertEqual('test/ctype', listing[0]['content_type'])
def GETorHEAD(self, req): had_match = False for match_header in ('if-match', 'if-none-match'): if match_header not in req.headers: continue had_match = True for value in list_from_csv(req.headers[match_header]): value = normalize_etag(value) if value.endswith('-N'): # Deal with fake S3-like etags for SLOs uploaded via Swift req.headers[match_header] += ', ' + value[:-2] if had_match: # Update where to look update_etag_is_at_header(req, sysmeta_header('object', 'etag')) object_name = req.object_name version_id = version_id_param(req) query = {} if version_id is None else {'version-id': version_id} if version_id not in ('null', None): container_info = req.get_container_info(self.app) if not container_info.get( 'sysmeta', {}).get('versions-container', ''): # Versioning has never been enabled raise NoSuchVersion(object_name, version_id) cors_rule = None if req.headers.get('Origin'): cors_rule = get_cors(self.app, req, req.method, req.headers.get('Origin')) resp = req.get_response(self.app, query=query) if req.method == 'HEAD': resp.app_iter = None if 'x-amz-meta-deleted' in resp.headers: raise NoSuchKey(object_name) for key in ('content-type', 'content-language', 'expires', 'cache-control', 'content-disposition', 'content-encoding'): if 'response-' + key in req.params: resp.headers[key] = req.params['response-' + key] if cors_rule is not None: cors_fill_headers(req, resp, cors_rule) return resp
def test_async_update_after_PUT(self): cpart, cnodes = self.container_ring.get_nodes(self.account, 'c1') client.put_container(self.url, self.token, 'c1', headers={'X-Storage-Policy': self.policy.name}) # put an object while one container server is stopped so that we force # an async update to it kill_server((cnodes[0]['ip'], cnodes[0]['port']), self.ipport2server) content = u'stuff' client.put_object(self.url, self.token, 'c1', 'o1', contents=content, content_type='test/ctype') meta = client.head_object(self.url, self.token, 'c1', 'o1') # re-start the container server and assert that it does not yet know # about the object start_server((cnodes[0]['ip'], cnodes[0]['port']), self.ipport2server) self.assertFalse(direct_client.direct_get_container( cnodes[0], cpart, self.account, 'c1')[1]) # Run the object-updaters to be sure updates are done Manager(['object-updater']).once() # check the re-started container server got same update as others. # we cannot assert the actual etag value because it may be encrypted listing_etags = set() for cnode in cnodes: listing = direct_client.direct_get_container( cnode, cpart, self.account, 'c1')[1] self.assertEqual(1, len(listing)) self.assertEqual(len(content), listing[0]['bytes']) self.assertEqual('test/ctype', listing[0]['content_type']) listing_etags.add(listing[0]['hash']) self.assertEqual(1, len(listing_etags)) # check that listing meta returned to client is consistent with object # meta returned to client hdrs, listing = client.get_container(self.url, self.token, 'c1') self.assertEqual(1, len(listing)) self.assertEqual('o1', listing[0]['name']) self.assertEqual(len(content), listing[0]['bytes']) self.assertEqual(normalize_etag(meta['etag']), listing[0]['hash']) self.assertEqual('test/ctype', listing[0]['content_type'])
def GETorHEAD(self, req): had_match = False for match_header in ('if-match', 'if-none-match'): if match_header not in req.headers: continue had_match = True for value in list_from_csv(req.headers[match_header]): value = normalize_etag(value) if value.endswith('-N'): # Deal with fake S3-like etags for SLOs uploaded via Swift req.headers[match_header] += ', ' + value[:-2] if had_match: # Update where to look update_etag_is_at_header(req, sysmeta_header('object', 'etag')) object_name = req.object_name version_id = req.params.get('versionId') if version_id not in ('null', None) and \ 'object_versioning' not in get_swift_info(): raise S3NotImplemented() query = {} if version_id is None else {'version-id': version_id} resp = req.get_response(self.app, query=query) if req.method == 'HEAD': resp.app_iter = None if 'x-amz-meta-deleted' in resp.headers: raise NoSuchKey(object_name) for key in ('content-type', 'content-language', 'expires', 'cache-control', 'content-disposition', 'content-encoding'): if 'response-' + key in req.params: resp.headers[key] = req.params['response-' + key] return resp
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = { 'Accept': 'application/json', sysmeta_header('object', 'upload-id'): upload_id } for key, val in resp.headers.items(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type container = req.container_name + MULTIUPLOAD_SUFFIX s3_etag_hasher = md5(usedforsecurity=False) manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') if 'content-md5' in req.headers: # If an MD5 was provided, we need to verify it. # Note that S3Request already took care of translating to ETag if req.headers['etag'] != md5( xml, usedforsecurity=False).hexdigest(): raise BadDigest(content_md5=req.headers['content-md5']) # We're only interested in the body here, in the # multipart-upload controller -- *don't* let it get # plumbed down to the object-server del req.headers['etag'] complete_elem = fromstring(xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = normalize_etag(part_elem.find('./ETag').text) if len(etag) != 32 or any(c not in '0123456789abcdef' for c in etag): raise InvalidPart(upload_id=upload_id, part_number=part_number) manifest.append({ 'path': '/%s/%s/%s/%d' % (wsgi_to_str(container), wsgi_to_str( req.object_name), upload_id, part_number), 'etag': etag }) s3_etag_hasher.update(binascii.a2b_hex(etag)) except (XMLSyntaxError, DocumentInvalid): # NB: our schema definitions catch uploads with no parts here raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest)) s3_etag_header = sysmeta_header('object', 'etag') if resp.sysmeta_headers.get(s3_etag_header) == s3_etag: # This header should only already be present if the upload marker # has been cleaned up and the current target uses the same # upload-id; assuming the segments to use haven't changed, the work # is already done return HTTPOk(body=_make_complete_body(req, s3_etag, False), content_type='application/xml') headers[s3_etag_header] = s3_etag # Leave base header value blank; SLO will populate c_etag = '; s3_etag=%s' % s3_etag headers[get_container_update_override_key('etag')] = c_etag too_small_message = ('s3api requires that each segment be at least ' '%d bytes' % self.conf.min_segment_size) def size_checker(manifest): # Check the size of each segment except the last and make sure # they are all more than the minimum upload chunk size. # Note that we need to use the *internal* keys, since we're # looking at the manifest that's about to be written. return [(item['name'], too_small_message) for item in manifest[:-1] if item and item['bytes'] < self.conf.min_segment_size] req.environ['swift.callback.slo_manifest_hook'] = size_checker start_time = time.time() def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={ 'multipart-manifest': 'put', 'heartbeat': 'on' }, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if time.time() - start_time < 10: # Include some grace period to keep # ceph-s3tests happy continue if not yielded_anything: yield (b'<?xml version="1.0" ' b'encoding="UTF-8"?>\n') yielded_anything = True yield chunk continue body.append(chunk) body = json.loads(b''.join(body)) if body['Response Status'] != '201 Created': for seg, err in body['Errors']: if err == too_small_message: raise EntityTooSmall() elif err in ('Etag Mismatch', '404 Not Found'): raise InvalidPart(upload_id=upload_id) raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) if too_small_message in msg: raise EntityTooSmall(msg) elif ', Etag Mismatch' in msg: raise InvalidPart(upload_id=upload_id) elif ', 404 Not Found' in msg: raise InvalidPart(upload_id=upload_id) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # The important thing is that we wrote out a tombstone to # make sure the marker got cleaned up. If it's already # gone (e.g., because of concurrent completes or a retried # complete), so much the better. pass yield _make_complete_body(req, s3_etag, yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield b'\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk resp = HTTPOk() # assume we're good for now... but see above! resp.app_iter = reiterate(response_iter()) resp.content_type = "application/xml" return resp
def container_sync_row(self, row, sync_to, user_key, broker, info, realm, realm_key): """ Sends the update the row indicates to the sync_to container. Update can be either delete or put. :param row: The updated row in the local database triggering the sync update. :param sync_to: The URL to the remote container. :param user_key: The X-Container-Sync-Key to use when sending requests to the other container. :param broker: The local container database broker. :param info: The get_info result from the local container database broker. :param realm: The realm from self.realms_conf, if there is one. If None, fallback to using the older allowed_sync_hosts way of syncing. :param realm_key: The realm key from self.realms_conf, if there is one. If None, fallback to using the older allowed_sync_hosts way of syncing. :returns: True on success """ try: start_time = time() # extract last modified time from the created_at value ts_data, ts_ctype, ts_meta = decode_timestamps(row['created_at']) if row['deleted']: # when sync'ing a deleted object, use ts_data - this is the # timestamp of the source tombstone try: headers = {'x-timestamp': ts_data.internal} self._update_sync_to_headers(row['name'], sync_to, user_key, realm, realm_key, 'DELETE', headers) delete_object(sync_to, name=row['name'], headers=headers, proxy=self.select_http_proxy(), logger=self.logger, timeout=self.conn_timeout) except ClientException as err: if err.http_status != HTTP_NOT_FOUND: raise self.container_deletes += 1 self.container_stats['deletes'] += 1 self.logger.increment('deletes') self.logger.timing_since('deletes.timing', start_time) else: # when sync'ing a live object, use ts_meta - this is the time # at which the source object was last modified by a PUT or POST if self._object_in_remote_container(row['name'], sync_to, user_key, realm, realm_key, ts_meta): return True exc = None # look up for the newest one; the symlink=get query-string has # no effect unless symlinks are enabled in the internal client # in which case it ensures that symlink objects retain their # symlink property when sync'd. headers_out = { 'X-Newest': True, 'X-Backend-Storage-Policy-Index': str(info['storage_policy_index']) } try: source_obj_status, headers, body = \ self.swift.get_object(info['account'], info['container'], row['name'], headers=headers_out, acceptable_statuses=(2, 4), params={'symlink': 'get'}) except (Exception, UnexpectedResponse, Timeout) as err: headers = {} body = None exc = err timestamp = Timestamp(headers.get('x-timestamp', 0)) if timestamp < ts_meta: if exc: raise exc raise Exception( _('Unknown exception trying to GET: ' '%(account)r %(container)r %(object)r'), { 'account': info['account'], 'container': info['container'], 'object': row['name'] }) for key in ('date', 'last-modified'): if key in headers: del headers[key] if 'etag' in headers: headers['etag'] = normalize_etag(headers['etag']) if 'content-type' in headers: headers['content-type'] = clean_content_type( headers['content-type']) self._update_sync_to_headers(row['name'], sync_to, user_key, realm, realm_key, 'PUT', headers) put_object(sync_to, name=row['name'], headers=headers, contents=FileLikeIter(body), proxy=self.select_http_proxy(), logger=self.logger, timeout=self.conn_timeout) self.container_puts += 1 self.container_stats['puts'] += 1 self.container_stats['bytes'] += row['size'] self.logger.increment('puts') self.logger.timing_since('puts.timing', start_time) except ClientException as err: if err.http_status == HTTP_UNAUTHORIZED: self.logger.info( _('Unauth %(sync_from)r => %(sync_to)r'), { 'sync_from': '%s/%s' % (quote(info['account']), quote(info['container'])), 'sync_to': sync_to }) elif err.http_status == HTTP_NOT_FOUND: self.logger.info( _('Not found %(sync_from)r => %(sync_to)r \ - object %(obj_name)r'), { 'sync_from': '%s/%s' % (quote(info['account']), quote(info['container'])), 'sync_to': sync_to, 'obj_name': row['name'] }) else: self.logger.exception(_('ERROR Syncing %(db_file)s %(row)s'), { 'db_file': str(broker), 'row': row }) self.container_failures += 1 self.logger.increment('failures') return False except (Exception, Timeout) as err: self.logger.exception(_('ERROR Syncing %(db_file)s %(row)s'), { 'db_file': str(broker), 'row': row }) self.container_failures += 1 self.logger.increment('failures') return False return True
def get_or_head_response(self, req, x_object_manifest): ''' :param req: user's request :param x_object_manifest: as unquoted, native string ''' response_headers = self._response_headers container, obj_prefix = x_object_manifest.split('/', 1) version, account, _junk = req.split_path(2, 3, True) version = wsgi_to_str(version) account = wsgi_to_str(account) error_response, segments = self._get_container_listing( req, version, account, container, obj_prefix) if error_response: return error_response have_complete_listing = len(segments) < \ constraints.CONTAINER_LISTING_LIMIT first_byte = last_byte = None actual_content_length = None content_length_for_swob_range = None if req.range and len(req.range.ranges) == 1: content_length_for_swob_range = sum(o['bytes'] for o in segments) # This is a hack to handle suffix byte ranges (e.g. "bytes=-5"), # which we can't honor unless we have a complete listing. _junk, range_end = req.range.ranges_for_length(float("inf"))[0] # If this is all the segments, we know whether or not this # range request is satisfiable. # # Alternately, we may not have all the segments, but this range # falls entirely within the first page's segments, so we know # that it is satisfiable. if (have_complete_listing or range_end < content_length_for_swob_range): byteranges = req.range.ranges_for_length( content_length_for_swob_range) if not byteranges: headers = {'Accept-Ranges': 'bytes'} if have_complete_listing: headers['Content-Range'] = 'bytes */%d' % ( content_length_for_swob_range, ) return HTTPRequestedRangeNotSatisfiable( request=req, headers=headers) first_byte, last_byte = byteranges[0] # For some reason, swob.Range.ranges_for_length adds 1 to the # last byte's position. last_byte -= 1 actual_content_length = last_byte - first_byte + 1 else: # The range may or may not be satisfiable, but we can't tell # based on just one page of listing, and we're not going to go # get more pages because that would use up too many resources, # so we ignore the Range header and return the whole object. actual_content_length = None content_length_for_swob_range = None req.range = None else: req.range = None response_headers = [ (h, v) for h, v in response_headers if h.lower() not in ("content-length", "content-range")] if content_length_for_swob_range is not None: # Here, we have to give swob a big-enough content length so that # it can compute the actual content length based on the Range # header. This value will not be visible to the client; swob will # substitute its own Content-Length. # # Note: if the manifest points to at least CONTAINER_LISTING_LIMIT # segments, this may be less than the sum of all the segments' # sizes. However, it'll still be greater than the last byte in the # Range header, so it's good enough for swob. response_headers.append(('Content-Length', str(content_length_for_swob_range))) elif have_complete_listing: actual_content_length = sum(o['bytes'] for o in segments) response_headers.append(('Content-Length', str(actual_content_length))) if have_complete_listing: response_headers = [(h, v) for h, v in response_headers if h.lower() != "etag"] etag = md5() for seg_dict in segments: etag.update(normalize_etag(seg_dict['hash']).encode('utf8')) response_headers.append(('Etag', '"%s"' % etag.hexdigest())) app_iter = None if req.method == 'GET': listing_iter = RateLimitedIterator( self._segment_listing_iterator( req, version, account, container, obj_prefix, segments, first_byte=first_byte, last_byte=last_byte), self.dlo.rate_limit_segments_per_sec, limit_after=self.dlo.rate_limit_after_segment) app_iter = SegmentedIterable( req, self.dlo.app, listing_iter, ua_suffix="DLO MultipartGET", swift_source="DLO", name=req.path, logger=self.logger, max_get_time=self.dlo.max_get_time, response_body_length=actual_content_length) try: app_iter.validate_first_segment() except HTTPException as err_resp: return err_resp except (SegmentError, ListingIterError): return HTTPConflict(request=req) resp = Response(request=req, headers=response_headers, conditional_response=True, app_iter=app_iter) return resp
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = {'Accept': 'application/json'} for key, val in resp.headers.items(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type container = req.container_name + MULTIUPLOAD_SUFFIX s3_etag_hasher = md5() manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') if 'content-md5' in req.headers: # If an MD5 was provided, we need to verify it. # Note that S3Request already took care of translating to ETag if req.headers['etag'] != md5(xml).hexdigest(): raise BadDigest(content_md5=req.headers['content-md5']) # We're only interested in the body here, in the # multipart-upload controller -- *don't* let it get # plumbed down to the object-server del req.headers['etag'] complete_elem = fromstring(xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = normalize_etag(part_elem.find('./ETag').text) if len(etag) != 32 or any(c not in '0123456789abcdef' for c in etag): raise InvalidPart(upload_id=upload_id, part_number=part_number) manifest.append({ 'path': '/%s/%s/%s/%d' % (container, req.object_name, upload_id, part_number), 'etag': etag }) s3_etag_hasher.update(binascii.a2b_hex(etag)) except (XMLSyntaxError, DocumentInvalid): # NB: our schema definitions catch uploads with no parts here raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest)) headers[sysmeta_header('object', 'etag')] = s3_etag # Leave base header value blank; SLO will populate c_etag = '; s3_etag=%s' % s3_etag headers[get_container_update_override_key('etag')] = c_etag too_small_message = ('s3api requires that each segment be at least ' '%d bytes' % self.conf.min_segment_size) def size_checker(manifest): # Check the size of each segment except the last and make sure # they are all more than the minimum upload chunk size. # Note that we need to use the *internal* keys, since we're # looking at the manifest that's about to be written. return [(item['name'], too_small_message) for item in manifest[:-1] if item and item['bytes'] < self.conf.min_segment_size] req.environ['swift.callback.slo_manifest_hook'] = size_checker start_time = time.time() def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={ 'multipart-manifest': 'put', 'heartbeat': 'on' }, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if time.time() - start_time < 10: # Include some grace period to keep # ceph-s3tests happy continue if not yielded_anything: yield (b'<?xml version="1.0" ' b'encoding="UTF-8"?>\n') yielded_anything = True yield chunk continue body.append(chunk) body = json.loads(b''.join(body)) if body['Response Status'] != '201 Created': for seg, err in body['Errors']: if err == too_small_message: raise EntityTooSmall() elif err in ('Etag Mismatch', '404 Not Found'): raise InvalidPart(upload_id=upload_id) raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) if too_small_message in msg: raise EntityTooSmall(msg) elif ', Etag Mismatch' in msg: raise InvalidPart(upload_id=upload_id) elif ', 404 Not Found' in msg: raise InvalidPart(upload_id=upload_id) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # We know that this existed long enough for us to HEAD pass result_elem = Element('CompleteMultipartUploadResult') # NOTE: boto with sig v4 appends port to HTTP_HOST value at # the request header when the port is non default value and it # makes req.host_url like as http://localhost:8080:8080/path # that obviously invalid. Probably it should be resolved at # swift.common.swob though, tentatively we are parsing and # reconstructing the correct host_url info here. # in detail, https://github.com/boto/boto/pull/3513 parsed_url = urlparse(req.host_url) host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) # Why are we doing our own port parsing? Because py3 decided # to start raising ValueErrors on access after parsing such # an invalid port netloc = parsed_url.netloc.split('@')[-1].split(']')[-1] if ':' in netloc: port = netloc.split(':', 2)[1] host_url += ':%s' % port SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name SubElement(result_elem, 'Key').text = req.object_name SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag resp.headers.pop('ETag', None) if yielded_anything: yield b'\n' yield tostring(result_elem, xml_declaration=not yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield b'\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk resp = HTTPOk() # assume we're good for now... but see above! resp.app_iter = reiterate(response_iter()) resp.content_type = "application/xml" return resp
def _assert_object_metadata_matches_listing(self, listing, metadata): self.assertEqual(listing['bytes'], int(metadata['content-length'])) self.assertEqual(listing['hash'], normalize_etag(metadata['etag'])) self.assertEqual(listing['content_type'], metadata['content-type']) modified = Timestamp(metadata['x-timestamp']).isoformat self.assertEqual(listing['last_modified'], modified)
def GET(self, req): """ Handle GET Bucket (List Objects) request """ max_keys = req.get_validated_param('max-keys', self.conf.max_bucket_listing) # TODO: Separate max_bucket_listing and default_bucket_listing tag_max_keys = max_keys max_keys = min(max_keys, self.conf.max_bucket_listing) encoding_type = req.params.get('encoding-type') if encoding_type is not None and encoding_type != 'url': err_msg = 'Invalid Encoding Method specified in Request' raise InvalidArgument('encoding-type', encoding_type, err_msg) query = { 'format': 'json', 'limit': max_keys + 1, } if 'prefix' in req.params: query.update({'prefix': req.params['prefix']}) if 'delimiter' in req.params: query.update({'delimiter': req.params['delimiter']}) fetch_owner = False if 'versions' in req.params: listing_type = 'object-versions' if 'key-marker' in req.params: query.update({'marker': req.params['key-marker']}) elif 'version-id-marker' in req.params: err_msg = ('A version-id marker cannot be specified without ' 'a key marker.') raise InvalidArgument('version-id-marker', req.params['version-id-marker'], err_msg) elif int(req.params.get('list-type', '1')) == 2: listing_type = 'version-2' if 'start-after' in req.params: query.update({'marker': req.params['start-after']}) # continuation-token overrides start-after if 'continuation-token' in req.params: decoded = b64decode(req.params['continuation-token']) if not six.PY2: decoded = decoded.decode('utf8') query.update({'marker': decoded}) if 'fetch-owner' in req.params: fetch_owner = config_true_value(req.params['fetch-owner']) else: listing_type = 'version-1' if 'marker' in req.params: query.update({'marker': req.params['marker']}) resp = req.get_response(self.app, query=query) objects = json.loads(resp.body) # in order to judge that truncated is valid, check whether # max_keys + 1 th element exists in swift. is_truncated = max_keys > 0 and len(objects) > max_keys objects = objects[:max_keys] if listing_type == 'object-versions': elem = Element('ListVersionsResult') SubElement(elem, 'Name').text = req.container_name SubElement(elem, 'Prefix').text = req.params.get('prefix') SubElement(elem, 'KeyMarker').text = req.params.get('key-marker') SubElement( elem, 'VersionIdMarker').text = req.params.get('version-id-marker') if is_truncated: if 'name' in objects[-1]: SubElement(elem, 'NextKeyMarker').text = \ objects[-1]['name'] if 'subdir' in objects[-1]: SubElement(elem, 'NextKeyMarker').text = \ objects[-1]['subdir'] SubElement(elem, 'NextVersionIdMarker').text = 'null' else: elem = Element('ListBucketResult') SubElement(elem, 'Name').text = req.container_name SubElement(elem, 'Prefix').text = req.params.get('prefix') if listing_type == 'version-1': SubElement(elem, 'Marker').text = req.params.get('marker') if is_truncated and 'delimiter' in req.params: if 'name' in objects[-1]: name = objects[-1]['name'] else: name = objects[-1]['subdir'] if encoding_type == 'url': name = quote(name.encode('utf-8')) SubElement(elem, 'NextMarker').text = name elif listing_type == 'version-2': if is_truncated: if 'name' in objects[-1]: SubElement(elem, 'NextContinuationToken').text = \ b64encode(objects[-1]['name'].encode('utf-8')) if 'subdir' in objects[-1]: SubElement(elem, 'NextContinuationToken').text = \ b64encode(objects[-1]['subdir'].encode('utf-8')) if 'continuation-token' in req.params: SubElement(elem, 'ContinuationToken').text = \ req.params['continuation-token'] if 'start-after' in req.params: SubElement(elem, 'StartAfter').text = \ req.params['start-after'] SubElement(elem, 'KeyCount').text = str(len(objects)) SubElement(elem, 'MaxKeys').text = str(tag_max_keys) if 'delimiter' in req.params: SubElement(elem, 'Delimiter').text = req.params['delimiter'] if encoding_type == 'url': SubElement(elem, 'EncodingType').text = encoding_type SubElement(elem, 'IsTruncated').text = \ 'true' if is_truncated else 'false' for o in objects: if 'subdir' not in o: name = o['name'] if encoding_type == 'url': name = quote(name.encode('utf-8')) if listing_type == 'object-versions': contents = SubElement(elem, 'Version') SubElement(contents, 'Key').text = name SubElement(contents, 'VersionId').text = 'null' SubElement(contents, 'IsLatest').text = 'true' else: contents = SubElement(elem, 'Contents') SubElement(contents, 'Key').text = name SubElement(contents, 'LastModified').text = \ o['last_modified'][:-3] + 'Z' if 's3_etag' in o: # New-enough MUs are already in the right format etag = o['s3_etag'] elif 'slo_etag' in o: # SLOs may be in something *close* to the MU format etag = '"%s-N"' % swob.normalize_etag(o['slo_etag']) else: etag = o['hash'] if len(etag) < 2 or etag[::len(etag) - 1] != '""': # Normal objects just use the MD5 etag = '"%s"' % o['hash'] # This also catches sufficiently-old SLOs, but we have # no way to identify those from container listings # Otherwise, somebody somewhere (proxyfs, maybe?) made this # look like an RFC-compliant ETag; we don't need to # quote-wrap. SubElement(contents, 'ETag').text = etag SubElement(contents, 'Size').text = str(o['bytes']) if fetch_owner or listing_type != 'version-2': owner = SubElement(contents, 'Owner') SubElement(owner, 'ID').text = req.user_id SubElement(owner, 'DisplayName').text = req.user_id SubElement(contents, 'StorageClass').text = 'STANDARD' for o in objects: if 'subdir' in o: common_prefixes = SubElement(elem, 'CommonPrefixes') name = o['subdir'] if encoding_type == 'url': name = quote(name.encode('utf-8')) SubElement(common_prefixes, 'Prefix').text = name body = tostring(elem) return HTTPOk(body=body, content_type='application/xml')