def POST(self, req): """ Handles Delete Multiple Objects. """ def object_key_iter(elem): for obj in elem.iterchildren('Object'): key = obj.find('./Key').text if not key: raise UserKeyMustBeSpecified() version = obj.find('./VersionId') if version is not None: version = version.text yield key, version max_body_size = min( # FWIW, AWS limits multideletes to 1000 keys, and swift limits # object names to 1024 bytes (by default). Add a factor of two to # allow some slop. 2 * self.conf.max_multi_delete_objects * MAX_OBJECT_NAME_LENGTH, # But, don't let operators shoot themselves in the foot 10 * 1024 * 1024) try: xml = req.xml(max_body_size) if not xml: raise MissingRequestBodyError() req.check_md5(xml) elem = fromstring(xml, 'Delete', self.logger) quiet = elem.find('./Quiet') if quiet is not None and quiet.text.lower() == 'true': self.quiet = True else: self.quiet = False delete_list = list(object_key_iter(elem)) if len(delete_list) > self.conf.max_multi_delete_objects: raise MalformedXML() except (XMLSyntaxError, DocumentInvalid): raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise elem = Element('DeleteResult') # check bucket existence try: req.get_response(self.app, 'HEAD') except AccessDenied as error: body = self._gen_error_body(error, elem, delete_list) return HTTPOk(body=body) if any(version is not None for _key, version in delete_list): # TODO: support deleting specific versions of objects raise S3NotImplemented() def do_delete(base_req, key, version): req = copy.copy(base_req) req.environ = copy.copy(base_req.environ) req.object_name = key try: query = req.gen_multipart_manifest_delete_query(self.app) resp = req.get_response(self.app, method='DELETE', query=query, headers={'Accept': 'application/json'}) # Have to read the response to actually do the SLO delete if query: try: delete_result = json.loads(resp.body) if delete_result['Errors']: # NB: bulk includes 404s in "Number Not Found", # not "Errors" msg_parts = [delete_result['Response Status']] msg_parts.extend( '%s: %s' % (obj, status) for obj, status in delete_result['Errors']) return key, { 'code': 'SLODeleteError', 'message': '\n'.join(msg_parts) } # else, all good except (ValueError, TypeError, KeyError): # Logs get all the gory details self.logger.exception( 'Could not parse SLO delete response: %r', resp.body) # Client gets something more generic return key, { 'code': 'SLODeleteError', 'message': 'Unexpected swift response' } except NoSuchKey: pass except ErrorResponse as e: return key, {'code': e.__class__.__name__, 'message': e._msg} return key, None with StreamingPile(self.conf.multi_delete_concurrency) as pile: for key, err in pile.asyncstarmap( do_delete, ((req, key, version) for key, version in delete_list)): if err: error = SubElement(elem, 'Error') SubElement(error, 'Key').text = key SubElement(error, 'Code').text = err['code'] SubElement(error, 'Message').text = err['message'] elif not self.quiet: deleted = SubElement(elem, 'Deleted') SubElement(deleted, 'Key').text = key body = tostring(elem) return HTTPOk(body=body)
def handle_delete_iter(self, req, objs_to_delete=None, user_agent='BulkDelete', swift_source='BD', out_content_type='text/plain'): """ A generator that can be assigned to a swob Response's app_iter which, when iterated over, will delete the objects specified in request body. Will occasionally yield whitespace while request is being processed. When the request is completed will yield a response body that can be parsed to determine success. See above documentation for details. :params req: a swob Request :params objs_to_delete: a list of dictionaries that specifies the (native string) objects to be deleted. If None, uses self.get_objs_to_delete to query request. """ last_yield = time() if out_content_type and out_content_type.endswith('/xml'): to_yield = b'<?xml version="1.0" encoding="UTF-8"?>\n' else: to_yield = b' ' separator = b'' failed_files = [] resp_dict = { 'Response Status': HTTPOk().status, 'Response Body': '', 'Number Deleted': 0, 'Number Not Found': 0 } req.environ['eventlet.minimum_write_chunk_size'] = 0 try: if not out_content_type: raise HTTPNotAcceptable(request=req) try: vrs, account, _junk = req.split_path(2, 3, True) except ValueError: raise HTTPNotFound(request=req) vrs = wsgi_to_str(vrs) account = wsgi_to_str(account) incoming_format = req.headers.get('Content-Type') if incoming_format and \ not incoming_format.startswith('text/plain'): # For now only accept newline separated object names raise HTTPNotAcceptable(request=req) if objs_to_delete is None: objs_to_delete = self.get_objs_to_delete(req) failed_file_response = {'type': HTTPBadRequest} def delete_filter(predicate, objs_to_delete): for obj_to_delete in objs_to_delete: obj_name = obj_to_delete['name'] if not obj_name: continue if not predicate(obj_name): continue if obj_to_delete.get('error'): if obj_to_delete['error']['code'] == HTTP_NOT_FOUND: resp_dict['Number Not Found'] += 1 else: failed_files.append([ wsgi_quote(str_to_wsgi(obj_name)), obj_to_delete['error']['message'] ]) continue delete_path = '/'.join( ['', vrs, account, obj_name.lstrip('/')]) if not constraints.check_utf8(delete_path): failed_files.append([ wsgi_quote(str_to_wsgi(obj_name)), HTTPPreconditionFailed().status ]) continue yield (obj_name, delete_path, obj_to_delete.get('version_id')) def objs_then_containers(objs_to_delete): # process all objects first yield delete_filter(lambda name: '/' in name.strip('/'), objs_to_delete) # followed by containers yield delete_filter(lambda name: '/' not in name.strip('/'), objs_to_delete) def do_delete(obj_name, delete_path, version_id): delete_obj_req = make_subrequest( req.environ, method='DELETE', path=wsgi_quote(str_to_wsgi(delete_path)), headers={'X-Auth-Token': req.headers.get('X-Auth-Token')}, body='', agent='%(orig)s ' + user_agent, swift_source=swift_source) if version_id is None: delete_obj_req.params = {} else: delete_obj_req.params = {'version-id': version_id} return (delete_obj_req.get_response(self.app), obj_name, 0) with StreamingPile(self.delete_concurrency) as pile: for names_to_delete in objs_then_containers(objs_to_delete): for resp, obj_name, retry in pile.asyncstarmap( do_delete, names_to_delete): if last_yield + self.yield_frequency < time(): last_yield = time() yield to_yield to_yield, separator = b' ', b'\r\n\r\n' self._process_delete(resp, pile, obj_name, resp_dict, failed_files, failed_file_response, retry) if len(failed_files) >= self.max_failed_deletes: # Abort, but drain off the in-progress deletes for resp, obj_name, retry in pile: if last_yield + self.yield_frequency < time(): last_yield = time() yield to_yield to_yield, separator = b' ', b'\r\n\r\n' # Don't pass in the pile, as we shouldn't retry self._process_delete(resp, None, obj_name, resp_dict, failed_files, failed_file_response, retry) msg = 'Max delete failures exceeded' raise HTTPBadRequest(msg) if failed_files: resp_dict['Response Status'] = \ failed_file_response['type']().status elif not (resp_dict['Number Deleted'] or resp_dict['Number Not Found']): resp_dict['Response Status'] = HTTPBadRequest().status resp_dict['Response Body'] = 'Invalid bulk delete.' except HTTPException as err: resp_dict['Response Status'] = err.status resp_dict['Response Body'] = err.body.decode('utf-8') except Exception: self.logger.exception('Error in bulk delete.') resp_dict['Response Status'] = HTTPServerError().status yield separator + get_response_body(out_content_type, resp_dict, failed_files, 'delete')
def handle_multipart_put(self, req, start_response): """ Will handle the PUT of a SLO manifest. Heads every object in manifest to check if is valid and if so will save a manifest generated from the user input. Uses WSGIContext to call self and start_response and returns a WSGI iterator. :params req: a swob.Request with an obj in path :raises: HttpException on errors """ try: vrs, account, container, obj = req.split_path(1, 4, True) except ValueError: return self.app(req.environ, start_response) if req.content_length > self.max_manifest_size: raise HTTPRequestEntityTooLarge("Manifest File > %d bytes" % self.max_manifest_size) if req.headers.get('X-Copy-From'): raise HTTPMethodNotAllowed( 'Multipart Manifest PUTs cannot be COPY requests') if req.content_length is None and \ req.headers.get('transfer-encoding', '').lower() != 'chunked': raise HTTPLengthRequired(request=req) parsed_data = parse_and_validate_input( req.body_file.read(self.max_manifest_size), req.path) problem_segments = [] if len(parsed_data) > self.max_manifest_segments: raise HTTPRequestEntityTooLarge( 'Number of segments must be <= %d' % self.max_manifest_segments) total_size = 0 out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS) if not out_content_type: out_content_type = 'text/plain' data_for_storage = [] path2indices = defaultdict(list) for index, seg_dict in enumerate(parsed_data): path2indices[seg_dict['path']].append(index) def do_head(obj_name): obj_path = '/'.join( ['', vrs, account, get_valid_utf8_str(obj_name).lstrip('/')]) sub_req = make_subrequest( req.environ, path=obj_path + '?', # kill the query string method='HEAD', headers={'x-auth-token': req.headers.get('x-auth-token')}, agent='%(orig)s SLO MultipartPUT', swift_source='SLO') return obj_name, sub_req.get_response(self) def validate_seg_dict(seg_dict, head_seg_resp): if not head_seg_resp.is_success: problem_segments.append( [quote(obj_name), head_seg_resp.status]) return 0, None segment_length = head_seg_resp.content_length if seg_dict.get('range'): # Since we now know the length, we can normalize the # range. We know that there is exactly one range # requested since we checked that earlier in # parse_and_validate_input(). ranges = seg_dict['range'].ranges_for_length( head_seg_resp.content_length) if not ranges: problem_segments.append( [quote(obj_name), 'Unsatisfiable Range']) elif ranges == [(0, head_seg_resp.content_length)]: # Just one range, and it exactly matches the object. # Why'd we do this again? del seg_dict['range'] segment_length = head_seg_resp.content_length else: rng = ranges[0] seg_dict['range'] = '%d-%d' % (rng[0], rng[1] - 1) segment_length = rng[1] - rng[0] if segment_length < 1: problem_segments.append([ quote(obj_name), 'Too small; each segment must be at least 1 byte.' ]) if seg_dict['size_bytes'] is not None and \ seg_dict['size_bytes'] != head_seg_resp.content_length: problem_segments.append([quote(obj_name), 'Size Mismatch']) if seg_dict['etag'] is not None and \ seg_dict['etag'] != head_seg_resp.etag: problem_segments.append([quote(obj_name), 'Etag Mismatch']) if head_seg_resp.last_modified: last_modified = head_seg_resp.last_modified else: # shouldn't happen last_modified = datetime.now() last_modified_formatted = \ last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f') seg_data = { 'name': '/' + seg_dict['path'].lstrip('/'), 'bytes': head_seg_resp.content_length, 'hash': head_seg_resp.etag, 'content_type': head_seg_resp.content_type, 'last_modified': last_modified_formatted } if seg_dict.get('range'): seg_data['range'] = seg_dict['range'] if config_true_value( head_seg_resp.headers.get('X-Static-Large-Object')): seg_data['sub_slo'] = True return segment_length, seg_data data_for_storage = [None] * len(parsed_data) with StreamingPile(self.concurrency) as pile: for obj_name, resp in pile.asyncstarmap( do_head, ((path, ) for path in path2indices)): for i in path2indices[obj_name]: segment_length, seg_data = validate_seg_dict( parsed_data[i], resp) data_for_storage[i] = seg_data total_size += segment_length if problem_segments: resp_body = get_response_body(out_content_type, {}, problem_segments) raise HTTPBadRequest(resp_body, content_type=out_content_type) slo_etag = md5() for seg_data in data_for_storage: if seg_data.get('range'): slo_etag.update('%s:%s;' % (seg_data['hash'], seg_data['range'])) else: slo_etag.update(seg_data['hash']) slo_etag = slo_etag.hexdigest() req.headers.update({ SYSMETA_SLO_ETAG: slo_etag, SYSMETA_SLO_SIZE: total_size, 'X-Static-Large-Object': 'True', }) json_data = json.dumps(data_for_storage) if six.PY3: json_data = json_data.encode('utf-8') req.body = json_data env = req.environ if not env.get('CONTENT_TYPE'): guessed_type, _junk = mimetypes.guess_type(req.path_info) env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream' env['swift.content_type_overridden'] = True env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size def start_response_wrapper(status, headers, exc_info=None): for i, (header, _value) in enumerate(headers): if header.lower() == 'etag': headers[i] = ('Etag', '"%s"' % slo_etag) break return start_response(status, headers, exc_info) return self.app(env, start_response_wrapper)
def POST(self, req): """ Handles Delete Multiple Objects. """ def object_key_iter(elem): for obj in elem.iterchildren('Object'): key = obj.find('./Key').text if not key: raise UserKeyMustBeSpecified() version = obj.find('./VersionId') if version is not None: version = version.text yield key, version max_body_size = min( # FWIW, AWS limits multideletes to 1000 keys, and swift limits # object names to 1024 bytes (by default). Add a factor of two to # allow some slop. 2 * self.conf.max_multi_delete_objects * MAX_OBJECT_NAME_LENGTH, # But, don't let operators shoot themselves in the foot 10 * 1024 * 1024) try: xml = req.xml(max_body_size) if not xml: raise MissingRequestBodyError() req.check_md5(xml) elem = fromstring(xml, 'Delete', self.logger) quiet = elem.find('./Quiet') if quiet is not None and quiet.text.lower() == 'true': self.quiet = True else: self.quiet = False delete_list = list(object_key_iter(elem)) if len(delete_list) > self.conf.max_multi_delete_objects: raise MalformedXML() except (XMLSyntaxError, DocumentInvalid): raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise elem = Element('DeleteResult') # check bucket existence try: req.get_response(self.app, 'HEAD') except AccessDenied as error: body = self._gen_error_body(error, elem, delete_list) return HTTPOk(body=body) if any(version is not None for _key, version in delete_list): # TODO: support deleting specific versions of objects raise S3NotImplemented() def do_delete(base_req, key, version): req = copy.copy(base_req) req.environ = copy.copy(base_req.environ) req.object_name = key try: query = req.gen_multipart_manifest_delete_query(self.app) req.get_response(self.app, method='DELETE', query=query) except NoSuchKey: pass except ErrorResponse as e: return key, {'code': e.__class__.__name__, 'message': e._msg} return key, None with StreamingPile(self.conf.multi_delete_concurrency) as pile: for key, err in pile.asyncstarmap( do_delete, ((req, key, version) for key, version in delete_list)): if err: error = SubElement(elem, 'Error') SubElement(error, 'Key').text = key SubElement(error, 'Code').text = err['code'] SubElement(error, 'Message').text = err['message'] elif not self.quiet: deleted = SubElement(elem, 'Deleted') SubElement(deleted, 'Key').text = key body = tostring(elem) return HTTPOk(body=body)