def initialize(self, hdrs=None, parms=None): if hdrs is None: hdrs = {} if parms is None: parms = {} if not self.name: return False status = self.conn.make_request('HEAD', self.path, hdrs=hdrs, parms=parms) if status == 404: return False elif not is_success(status): raise ResponseError(self.conn.response, 'HEAD', self.conn.make_path(self.path)) for hdr, val in self.conn.response.getheaders(): hdr = wsgi_to_str(hdr).lower() val = wsgi_to_str(val) if hdr == 'content-type': self.content_type = val if hdr.startswith('x-object-meta-'): self.metadata[hdr[14:]] = val if hdr == 'etag': self.etag = val if hdr == 'content-length': self.size = int(val) if hdr == 'last-modified': self.last_modified = val return True
def _update_metadata(self, req, broker, req_timestamp): metadata = { wsgi_to_str(key): (wsgi_to_str(value), req_timestamp.internal) for key, value in req.headers.items() if is_sys_or_user_meta('account', key)} if metadata: broker.update_metadata(metadata, validate_metadata=True)
def _build_list_bucket_result_type_two(self, req, objects, encoding_type, tag_max_keys, is_truncated): elem = self._build_base_listing_element(req, encoding_type) if is_truncated: if 'name' in objects[-1]: SubElement(elem, 'NextContinuationToken').text = \ b64encode(objects[-1]['name'].encode('utf8')) if 'subdir' in objects[-1]: SubElement(elem, 'NextContinuationToken').text = \ b64encode(objects[-1]['subdir'].encode('utf8')) if 'continuation-token' in req.params: SubElement(elem, 'ContinuationToken').text = \ swob.wsgi_to_str(req.params['continuation-token']) start_after = swob.wsgi_to_str(req.params.get('start-after')) if start_after is not None: if encoding_type == 'url': start_after = quote(start_after) SubElement(elem, 'StartAfter').text = start_after SubElement(elem, 'KeyCount').text = str(len(objects)) SubElement(elem, 'MaxKeys').text = str(tag_max_keys) delimiter = swob.wsgi_to_str(req.params.get('delimiter')) if delimiter: if encoding_type == 'url': delimiter = quote(delimiter) SubElement(elem, 'Delimiter').text = delimiter if encoding_type == 'url': SubElement(elem, 'EncodingType').text = encoding_type SubElement(elem, 'IsTruncated').text = \ 'true' if is_truncated else 'false' return elem
def _build_versions_result(self, req, objects, encoding_type, tag_max_keys, is_truncated): elem = Element('ListVersionsResult') SubElement(elem, 'Name').text = req.container_name prefix = swob.wsgi_to_str(req.params.get('prefix')) if prefix and encoding_type == 'url': prefix = quote(prefix) SubElement(elem, 'Prefix').text = prefix key_marker = swob.wsgi_to_str(req.params.get('key-marker')) if key_marker and encoding_type == 'url': key_marker = quote(key_marker) SubElement(elem, 'KeyMarker').text = key_marker SubElement(elem, 'VersionIdMarker').text = swob.wsgi_to_str( req.params.get('version-id-marker')) if is_truncated: if 'name' in objects[-1]: SubElement(elem, 'NextKeyMarker').text = \ objects[-1]['name'] SubElement(elem, 'NextVersionIdMarker').text = \ objects[-1].get('version') or 'null' if 'subdir' in objects[-1]: SubElement(elem, 'NextKeyMarker').text = \ objects[-1]['subdir'] SubElement(elem, 'NextVersionIdMarker').text = 'null' SubElement(elem, 'MaxKeys').text = str(tag_max_keys) delimiter = swob.wsgi_to_str(req.params.get('delimiter')) if delimiter is not None: if encoding_type == 'url': delimiter = quote(delimiter) SubElement(elem, 'Delimiter').text = delimiter if encoding_type == 'url': SubElement(elem, 'EncodingType').text = encoding_type SubElement(elem, 'IsTruncated').text = \ 'true' if is_truncated else 'false' return elem
def _build_list_bucket_result_type_one(self, req, objects, encoding_type, tag_max_keys, is_truncated): elem = self._build_base_listing_element(req, encoding_type) marker = swob.wsgi_to_str(req.params.get('marker')) if marker and encoding_type == 'url': marker = quote(marker) SubElement(elem, 'Marker').text = marker if is_truncated and 'delimiter' in req.params: if 'name' in objects[-1]: name = objects[-1]['name'] else: name = objects[-1]['subdir'] if encoding_type == 'url': name = quote(name.encode('utf-8')) SubElement(elem, 'NextMarker').text = name # XXX: really? no NextMarker when no delimiter?? SubElement(elem, 'MaxKeys').text = str(tag_max_keys) delimiter = swob.wsgi_to_str(req.params.get('delimiter')) if delimiter: if encoding_type == 'url': delimiter = quote(delimiter) SubElement(elem, 'Delimiter').text = delimiter if encoding_type == 'url': SubElement(elem, 'EncodingType').text = encoding_type SubElement(elem, 'IsTruncated').text = \ 'true' if is_truncated else 'false' return elem
def check_metadata(req, target_type): """ Check metadata sent in the request headers. This should only check that the metadata in the request given is valid. Checks against account/container overall metadata should be forwarded on to its respective server to be checked. :param req: request object :param target_type: str: one of: object, container, or account: indicates which type the target storage for the metadata is :returns: HTTPBadRequest with bad metadata otherwise None """ target_type = target_type.lower() prefix = 'x-%s-meta-' % target_type meta_count = 0 meta_size = 0 for key, value in req.headers.items(): if (isinstance(value, six.string_types) and len(value) > MAX_HEADER_SIZE): return HTTPBadRequest(body=b'Header value too long: %s' % wsgi_to_bytes(key[:MAX_META_NAME_LENGTH]), request=req, content_type='text/plain') if not key.lower().startswith(prefix): continue key = key[len(prefix):] if not key: return HTTPBadRequest(body='Metadata name cannot be empty', request=req, content_type='text/plain') bad_key = not check_utf8(wsgi_to_str(key)) bad_value = value and not check_utf8(wsgi_to_str(value)) if target_type in ('account', 'container') and (bad_key or bad_value): return HTTPBadRequest(body='Metadata must be valid UTF-8', request=req, content_type='text/plain') meta_count += 1 meta_size += len(key) + len(value) if len(key) > MAX_META_NAME_LENGTH: return HTTPBadRequest( body=wsgi_to_bytes('Metadata name too long: %s%s' % ( prefix, key)), request=req, content_type='text/plain') if len(value) > MAX_META_VALUE_LENGTH: return HTTPBadRequest( body=wsgi_to_bytes('Metadata value longer than %d: %s%s' % ( MAX_META_VALUE_LENGTH, prefix, key)), request=req, content_type='text/plain') if meta_count > MAX_META_COUNT: return HTTPBadRequest( body='Too many metadata items; max %d' % MAX_META_COUNT, request=req, content_type='text/plain') if meta_size > MAX_META_OVERALL_SIZE: return HTTPBadRequest( body='Total metadata too large; max %d' % MAX_META_OVERALL_SIZE, request=req, content_type='text/plain') return None
def auth_callback_same_container(req): try: _ver, acc, con, _rest = req.split_path(3, 4, True) except ValueError: return HTTPUnauthorized(request=req) if wsgi_to_str(acc) == account_to_match and \ wsgi_to_str(con) == container_to_match: return None else: return HTTPUnauthorized(request=req)
def _update_metadata(self, req, broker, req_timestamp, method): metadata = { wsgi_to_str(key): (wsgi_to_str(value), req_timestamp.internal) for key, value in req.headers.items() if key.lower() in self.save_headers or is_sys_or_user_meta('container', key)} if metadata: if 'X-Container-Sync-To' in metadata: if 'X-Container-Sync-To' not in broker.metadata or \ metadata['X-Container-Sync-To'][0] != \ broker.metadata['X-Container-Sync-To'][0]: broker.set_x_container_sync_points(-1, -1) broker.update_metadata(metadata, validate_metadata=True) self._update_sync_store(broker, method)
def _update_metadata(self, req, broker, req_timestamp, method): metadata = {} metadata.update( (wsgi_to_str(key), (wsgi_to_str(value), req_timestamp.internal)) for key, value in req.headers.items() if key.lower() in self.save_headers or is_sys_or_user_meta('container', key)) if metadata: if 'X-Container-Sync-To' in metadata: if 'X-Container-Sync-To' not in broker.metadata or \ metadata['X-Container-Sync-To'][0] != \ broker.metadata['X-Container-Sync-To'][0]: broker.set_x_container_sync_points(-1, -1) broker.update_metadata(metadata, validate_metadata=True) self._update_sync_store(broker, method)
def __call__(self, env, start_response): start_time = time.time() req = Request(env) self.logger.txn_id = req.headers.get('x-trans-id', None) if not check_utf8(wsgi_to_str(req.path_info), internal=True): res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL') else: try: # disallow methods which have not been marked 'public' if req.method not in self.allowed_methods: res = HTTPMethodNotAllowed() else: res = getattr(self, req.method)(req) except HTTPException as error_response: res = error_response except (Exception, Timeout): self.logger.exception( _('ERROR __call__ error with %(method)s %(path)s '), { 'method': req.method, 'path': req.path }) res = HTTPInternalServerError(body=traceback.format_exc()) if self.log_requests: trans_time = time.time() - start_time log_message = get_log_line(req, res, trans_time, '', self.log_format, self.anonymization_method, self.anonymization_salt) if req.method.upper() == 'REPLICATE': self.logger.debug(log_message) else: self.logger.info(log_message) return res(env, start_response)
def __call__(self, env, start_response): start_time = time.time() req = Request(env) self.logger.txn_id = req.headers.get('x-trans-id', None) if not check_utf8(wsgi_to_str(req.path_info)): res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL') else: try: # disallow methods which are not publicly accessible if req.method not in self.allowed_methods: res = HTTPMethodNotAllowed() else: res = getattr(self, req.method)(req) except HTTPException as error_response: res = error_response except (Exception, Timeout): self.logger.exception(_('ERROR __call__ error with %(method)s' ' %(path)s '), {'method': req.method, 'path': req.path}) res = HTTPInternalServerError(body=traceback.format_exc()) if self.log_requests: trans_time = time.time() - start_time additional_info = '' if res.headers.get('x-container-timestamp') is not None: additional_info += 'x-container-timestamp: %s' % \ res.headers['x-container-timestamp'] log_msg = get_log_line(req, res, trans_time, additional_info, self.log_format, self.anonymization_method, self.anonymization_salt) if req.method.upper() == 'REPLICATE': self.logger.debug(log_msg) else: self.logger.info(log_msg) return res(env, start_response)
def handle_request(self, req, start_response): """ Take a GET or HEAD request, and if it is for a dynamic large object manifest, return an appropriate response. Otherwise, simply pass it through. """ update_ignore_range_header(req, 'X-Object-Manifest') resp_iter = self._app_call(req.environ) # make sure this response is for a dynamic large object manifest for header, value in self._response_headers: if (header.lower() == 'x-object-manifest'): content_length = self._response_header_value('content-length') if content_length is not None and int(content_length) < 1024: # Go ahead and consume small bodies drain_and_close(resp_iter) close_if_possible(resp_iter) response = self.get_or_head_response( req, wsgi_to_str(wsgi_unquote(value))) return response(req.environ, start_response) # Not a dynamic large object manifest; just pass it through. start_response(self._response_status, self._response_headers, self._response_exc_info) return resp_iter
def extract_acl_and_report_errors(self, req): """ Return a user-readable string indicating the errors in the input ACL, or None if there are no errors. """ acl_header = 'x-account-access-control' acl_data = wsgi_to_str(req.headers.get(acl_header)) result = parse_acl(version=2, data=acl_data) if result is None: return 'Syntax error in input (%r)' % acl_data tempauth_acl_keys = 'admin read-write read-only'.split() for key in result: # While it is possible to construct auth systems that collaborate # on ACLs, TempAuth is not such an auth system. At this point, # it thinks it is authoritative. if key not in tempauth_acl_keys: return "Key %s not recognized" % json.dumps(key) for key in tempauth_acl_keys: if key not in result: continue if not isinstance(result[key], list): return "Value for key %s must be a list" % json.dumps(key) for grantee in result[key]: if not isinstance(grantee, six.string_types): return "Elements of %s list must be strings" % json.dumps( key) # Everything looks fine, no errors found internal_hdr = get_sys_meta_prefix('account') + 'core-access-control' req.headers[internal_hdr] = req.headers.pop(acl_header) return None
def get_objs_to_delete(self, req): """ Will populate objs_to_delete with data from request input. :params req: a Swob request :returns: a list of the contents of req.body when separated by newline. :raises HTTPException: on failures """ line = b'' data_remaining = True objs_to_delete = [] if req.content_length is None and \ req.headers.get('transfer-encoding', '').lower() != 'chunked': raise HTTPLengthRequired(request=req) while data_remaining: if b'\n' in line: obj_to_delete, line = line.split(b'\n', 1) if six.PY2: obj_to_delete = wsgi_unquote(obj_to_delete.strip()) else: # yeah, all this chaining is pretty terrible... # but it gets even worse trying to use UTF-8 and # errors='surrogateescape' when dealing with terrible # input like b'\xe2%98\x83' obj_to_delete = wsgi_to_str(wsgi_unquote( bytes_to_wsgi(obj_to_delete.strip()))) objs_to_delete.append({'name': obj_to_delete}) else: data = req.body_file.read(self.max_path_length) if data: line += data else: data_remaining = False if six.PY2: obj_to_delete = wsgi_unquote(line.strip()) else: obj_to_delete = wsgi_to_str(wsgi_unquote( bytes_to_wsgi(line.strip()))) if obj_to_delete: objs_to_delete.append({'name': obj_to_delete}) if len(objs_to_delete) > self.max_deletes_per_request: raise HTTPRequestEntityTooLarge( 'Maximum Bulk Deletes: %d per request' % self.max_deletes_per_request) if len(line) > self.max_path_length * 2: raise HTTPBadRequest('Invalid File Name') return objs_to_delete
def get_objs_to_delete(self, req): """ Will populate objs_to_delete with data from request input. :params req: a Swob request :returns: a list of the contents of req.body when separated by newline. :raises HTTPException: on failures """ line = b'' data_remaining = True objs_to_delete = [] if req.content_length is None and \ req.headers.get('transfer-encoding', '').lower() != 'chunked': raise HTTPLengthRequired(request=req) while data_remaining: if b'\n' in line: obj_to_delete, line = line.split(b'\n', 1) if six.PY2: obj_to_delete = wsgi_unquote(obj_to_delete.strip()) else: # yeah, all this chaining is pretty terrible... # but it gets even worse trying to use UTF-8 and # errors='surrogateescape' when dealing with terrible # input like b'\xe2%98\x83' obj_to_delete = wsgi_to_str( wsgi_unquote(bytes_to_wsgi(obj_to_delete.strip()))) objs_to_delete.append({'name': obj_to_delete}) else: data = req.body_file.read(self.max_path_length) if data: line += data else: data_remaining = False if six.PY2: obj_to_delete = wsgi_unquote(line.strip()) else: obj_to_delete = wsgi_to_str( wsgi_unquote(bytes_to_wsgi(line.strip()))) if obj_to_delete: objs_to_delete.append({'name': obj_to_delete}) if len(objs_to_delete) > self.max_deletes_per_request: raise HTTPRequestEntityTooLarge( 'Maximum Bulk Deletes: %d per request' % self.max_deletes_per_request) if len(line) > self.max_path_length * 2: raise HTTPBadRequest('Invalid File Name') return objs_to_delete
def setUp(self): global not_sleep self.old_loadapp = internal_client.loadapp self.old_sleep = internal_client.sleep internal_client.loadapp = lambda *a, **kw: None internal_client.sleep = not_sleep self.rcache = mkdtemp() self.conf = {'recon_cache_path': self.rcache} self.logger = debug_logger('test-expirer') self.ts = make_timestamp_iter() self.past_time = str(int(time() - 86400)) self.future_time = str(int(time() + 86400)) # Dummy task queue for test self.fake_swift = FakeInternalClient({ '.expiring_objects': { # this task container will be checked self.past_time: [ # tasks ready for execution self.past_time + '-a0/c0/o0', self.past_time + '-a1/c1/o1', self.past_time + '-a2/c2/o2', self.past_time + '-a3/c3/o3', self.past_time + '-a4/c4/o4', self.past_time + '-a5/c5/o5', self.past_time + '-a6/c6/o6', self.past_time + '-a7/c7/o7', # task objects for unicode test self.past_time + u'-a8/c8/o8\u2661', self.past_time + u'-a9/c9/o9\xf8', # this task will be skipped self.future_time + '-a10/c10/o10' ], # this task container will be skipped self.future_time: [self.future_time + '-a11/c11/o11'] } }) self.expirer = expirer.ObjectExpirer(self.conf, logger=self.logger, swift=self.fake_swift) # target object paths which should be expirerd now self.expired_target_path_list = [ swob.wsgi_to_str(tgt) for tgt in ( 'a0/c0/o0', 'a1/c1/o1', 'a2/c2/o2', 'a3/c3/o3', 'a4/c4/o4', 'a5/c5/o5', 'a6/c6/o6', 'a7/c7/o7', 'a8/c8/o8\xe2\x99\xa1', 'a9/c9/o9\xc3\xb8', ) ]
def _build_base_listing_element(self, req, encoding_type): elem = Element('ListBucketResult') SubElement(elem, 'Name').text = req.container_name prefix = swob.wsgi_to_str(req.params.get('prefix')) if prefix and encoding_type == 'url': prefix = quote(prefix) SubElement(elem, 'Prefix').text = prefix return elem
def _get_path_parts(self, env): """ Return the account, container and object name for the request, if it's an object request and one of the configured methods; otherwise, None is returned. :param env: The WSGI environment for the request. :returns: (Account str, container str, object str) or (None, None, None). """ if env['REQUEST_METHOD'] in self.conf['methods']: try: ver, acc, cont, obj = split_path(env['PATH_INFO'], 4, 4, True) except ValueError: return (None, None, None) if ver == 'v1' and obj.strip('/'): return (wsgi_to_str(acc), wsgi_to_str(cont), wsgi_to_str(obj)) return (None, None, None)
def check_object_creation(req, object_name): """ Check to ensure that everything is alright about an object to be created. :param req: HTTP request object :param object_name: name of object to be created :returns: HTTPRequestEntityTooLarge -- the object is too large :returns: HTTPLengthRequired -- missing content-length header and not a chunked request :returns: HTTPBadRequest -- missing or bad content-type header, or bad metadata :returns: HTTPNotImplemented -- unsupported transfer-encoding header value """ try: ml = req.message_length() except ValueError as e: return HTTPBadRequest(request=req, content_type='text/plain', body=str(e).encode('ascii')) except AttributeError as e: return HTTPNotImplemented(request=req, content_type='text/plain', body=str(e).encode('ascii')) if ml is not None and ml > MAX_FILE_SIZE: return HTTPRequestEntityTooLarge(body=b'Your request is too large.', request=req, content_type='text/plain') if req.content_length is None and \ req.headers.get('transfer-encoding') != 'chunked': return HTTPLengthRequired(body=b'Missing Content-Length header.', request=req, content_type='text/plain') if len(object_name) > MAX_OBJECT_NAME_LENGTH: return HTTPBadRequest(body=b'Object name length of %d longer than %d' % (len(object_name), MAX_OBJECT_NAME_LENGTH), request=req, content_type='text/plain') if 'Content-Type' not in req.headers: return HTTPBadRequest(request=req, content_type='text/plain', body=b'No content type') try: req = check_delete_headers(req) except HTTPException as e: return HTTPBadRequest(request=req, body=e.body, content_type='text/plain') if not check_utf8(wsgi_to_str(req.headers['Content-Type'])): return HTTPBadRequest(request=req, body=b'Invalid Content-Type', content_type='text/plain') return check_metadata(req, 'object')
def auth_callback_same_account(req): try: _ver, acc, _rest = req.split_path(2, 3, True) except ValueError: return HTTPUnauthorized(request=req) if wsgi_to_str(acc) == account_to_match: return None else: return HTTPUnauthorized(request=req)
def header_fields(self, required_fields, optional_fields=None): if optional_fields is None: optional_fields = () def is_int_header(header): if header.startswith('x-account-storage-policy-') and \ header.endswith(('-bytes-used', '-object-count')): return True return header in ( 'content-length', 'x-account-container-count', 'x-account-object-count', 'x-account-bytes-used', 'x-container-object-count', 'x-container-bytes-used', ) # NB: on py2, headers are always lower; on py3, they match the bytes # on the wire headers = dict((wsgi_to_str(h).lower(), wsgi_to_str(v)) for h, v in self.conn.response.getheaders()) ret = {} for return_key, header in required_fields: if header not in headers: raise ValueError("%s was not found in response headers: %r" % (header, headers)) if is_int_header(header): ret[return_key] = int(headers[header]) else: ret[return_key] = headers[header] for return_key, header in optional_fields: if header not in headers: continue if is_int_header(header): ret[return_key] = int(headers[header]) else: ret[return_key] = headers[header] return ret
def _parse_request_options(self, req, max_keys): encoding_type = req.params.get('encoding-type') if encoding_type is not None and encoding_type != 'url': err_msg = 'Invalid Encoding Method specified in Request' raise InvalidArgument('encoding-type', encoding_type, err_msg) # in order to judge that truncated is valid, check whether # max_keys + 1 th element exists in swift. query = { 'limit': max_keys + 1, } if 'prefix' in req.params: query['prefix'] = swob.wsgi_to_str(req.params['prefix']) if 'delimiter' in req.params: query['delimiter'] = swob.wsgi_to_str(req.params['delimiter']) fetch_owner = False if 'versions' in req.params: query['versions'] = swob.wsgi_to_str(req.params['versions']) listing_type = 'object-versions' version_marker = swob.wsgi_to_str( req.params.get('version-id-marker')) if 'key-marker' in req.params: query['marker'] = swob.wsgi_to_str(req.params['key-marker']) if version_marker is not None: if version_marker != 'null': try: Timestamp(version_marker) except ValueError: raise InvalidArgument( 'version-id-marker', version_marker, 'Invalid version id specified') query['version_marker'] = version_marker elif version_marker is not None: err_msg = ('A version-id marker cannot be specified without ' 'a key marker.') raise InvalidArgument('version-id-marker', version_marker, err_msg) elif int(req.params.get('list-type', '1')) == 2: listing_type = 'version-2' if 'start-after' in req.params: query['marker'] = swob.wsgi_to_str(req.params['start-after']) # continuation-token overrides start-after if 'continuation-token' in req.params: decoded = b64decode(req.params['continuation-token']) if not six.PY2: decoded = decoded.decode('utf8') query['marker'] = decoded if 'fetch-owner' in req.params: fetch_owner = config_true_value(req.params['fetch-owner']) else: listing_type = 'version-1' if 'marker' in req.params: query['marker'] = swob.wsgi_to_str(req.params['marker']) return encoding_type, query, listing_type, fetch_owner
def setUp(self): global not_sleep self.old_loadapp = internal_client.loadapp self.old_sleep = internal_client.sleep internal_client.loadapp = lambda *a, **kw: None internal_client.sleep = not_sleep self.rcache = mkdtemp() self.conf = {'recon_cache_path': self.rcache} self.logger = debug_logger('test-expirer') self.ts = make_timestamp_iter() self.past_time = str(int(time() - 86400)) self.future_time = str(int(time() + 86400)) # Dummy task queue for test self.fake_swift = FakeInternalClient({ '.expiring_objects': { # this task container will be checked self.past_time: [ # tasks ready for execution self.past_time + '-a0/c0/o0', self.past_time + '-a1/c1/o1', self.past_time + '-a2/c2/o2', self.past_time + '-a3/c3/o3', self.past_time + '-a4/c4/o4', self.past_time + '-a5/c5/o5', self.past_time + '-a6/c6/o6', self.past_time + '-a7/c7/o7', # task objects for unicode test self.past_time + u'-a8/c8/o8\u2661', self.past_time + u'-a9/c9/o9\xf8', # this task will be skipped self.future_time + '-a10/c10/o10'], # this task container will be skipped self.future_time: [ self.future_time + '-a11/c11/o11']} }) self.expirer = expirer.ObjectExpirer(self.conf, logger=self.logger, swift=self.fake_swift) # target object paths which should be expirerd now self.expired_target_path_list = [ swob.wsgi_to_str(tgt) for tgt in ( 'a0/c0/o0', 'a1/c1/o1', 'a2/c2/o2', 'a3/c3/o3', 'a4/c4/o4', 'a5/c5/o5', 'a6/c6/o6', 'a7/c7/o7', 'a8/c8/o8\xe2\x99\xa1', 'a9/c9/o9\xc3\xb8', ) ]
def check_object_creation(req, object_name): """ Check to ensure that everything is alright about an object to be created. :param req: HTTP request object :param object_name: name of object to be created :returns: HTTPRequestEntityTooLarge -- the object is too large :returns: HTTPLengthRequired -- missing content-length header and not a chunked request :returns: HTTPBadRequest -- missing or bad content-type header, or bad metadata :returns: HTTPNotImplemented -- unsupported transfer-encoding header value """ try: ml = req.message_length() except ValueError as e: return HTTPBadRequest(request=req, content_type='text/plain', body=str(e)) except AttributeError as e: return HTTPNotImplemented(request=req, content_type='text/plain', body=str(e)) if ml is not None and ml > MAX_FILE_SIZE: return HTTPRequestEntityTooLarge(body='Your request is too large.', request=req, content_type='text/plain') if req.content_length is None and \ req.headers.get('transfer-encoding') != 'chunked': return HTTPLengthRequired(body='Missing Content-Length header.', request=req, content_type='text/plain') if len(object_name) > MAX_OBJECT_NAME_LENGTH: return HTTPBadRequest(body='Object name length of %d longer than %d' % (len(object_name), MAX_OBJECT_NAME_LENGTH), request=req, content_type='text/plain') if 'Content-Type' not in req.headers: return HTTPBadRequest(request=req, content_type='text/plain', body=b'No content type') try: req = check_delete_headers(req) except HTTPException as e: return HTTPBadRequest(request=req, body=e.body, content_type='text/plain') if not check_utf8(wsgi_to_str(req.headers['Content-Type'])): return HTTPBadRequest(request=req, body='Invalid Content-Type', content_type='text/plain') return check_metadata(req, 'object')
def read(self, size=-1, offset=0, hdrs=None, buffer=None, callback=None, cfg=None, parms=None): if cfg is None: cfg = {} if parms is None: parms = {} if size > 0: range_string = 'bytes=%d-%d' % (offset, (offset + size) - 1) if hdrs: hdrs['Range'] = range_string else: hdrs = {'Range': range_string} status = self.conn.make_request('GET', self.path, hdrs=hdrs, cfg=cfg, parms=parms) if not is_success(status): raise ResponseError(self.conn.response, 'GET', self.conn.make_path(self.path)) for hdr, val in self.conn.response.getheaders(): if hdr.lower() == 'content-type': self.content_type = wsgi_to_str(val) if hdr.lower() == 'content-range': self.content_range = val if hasattr(buffer, 'write'): scratch = self.conn.response.read(8192) transferred = 0 while len(scratch) > 0: buffer.write(scratch) transferred += len(scratch) if callable(callback): callback(transferred, self.size) scratch = self.conn.response.read(8192) return None else: return self.conn.response.read()
def get_controller(self, req): """ Get the controller to handle a request. :param req: the request :returns: tuple of (controller class, path dictionary) :raises ValueError: (thrown by split_path) if given invalid path """ if req.path == '/info': d = dict(version=None, expose_info=self.expose_info, disallowed_sections=self.disallowed_sections, admin_key=self.admin_key) return InfoController, d version, account, container, obj = split_path(wsgi_to_str(req.path), 1, 4, True) d = dict(version=version, account_name=account, container_name=container, object_name=obj) if account and not valid_api_version(version): raise APIVersionError('Invalid path') if obj and container and account: info = get_container_info(req.environ, self) if is_server_error(info.get('status')): raise HTTPServiceUnavailable(request=req) policy_index = req.headers.get('X-Backend-Storage-Policy-Index', info['storage_policy']) policy = POLICIES.get_by_index(policy_index) if not policy: # This indicates that a new policy has been created, # with rings, deployed, released (i.e. deprecated = # False), used by a client to create a container via # another proxy that was restarted after the policy # was released, and is now cached - all before this # worker was HUPed to stop accepting new # connections. There should never be an "unknown" # index - but when there is - it's probably operator # error and hopefully temporary. raise HTTPServiceUnavailable('Unknown Storage Policy') return self.obj_controller_router[policy], d elif container and account: return ContainerController, d elif account and not container and not obj: return AccountController, d return None, d
def split_and_validate_path(request, minsegs=1, maxsegs=None, rest_with_last=False): """ Utility function to split and validate the request path. :returns: result of :meth:`~swift.common.utils.split_path` if everything's okay, as native strings :raises HTTPBadRequest: if something's not okay """ try: segs = request.split_path(minsegs, maxsegs, rest_with_last) validate_device_partition(segs[0], segs[1]) return [wsgi_to_str(seg) for seg in segs] except ValueError as err: raise HTTPBadRequest(body=str(err), request=request, content_type='text/plain')
def get_controller(self, req): """ Get the controller to handle a request. :param req: the request :returns: tuple of (controller class, path dictionary) :raises ValueError: (thrown by split_path) if given invalid path """ if req.path == '/info': d = dict(version=None, expose_info=self.expose_info, disallowed_sections=self.disallowed_sections, admin_key=self.admin_key) return InfoController, d version, account, container, obj = split_path( wsgi_to_str(req.path), 1, 4, True) d = dict(version=version, account_name=account, container_name=container, object_name=obj) if account and not valid_api_version(version): raise APIVersionError('Invalid path') if obj and container and account: info = get_container_info(req.environ, self) policy_index = req.headers.get('X-Backend-Storage-Policy-Index', info['storage_policy']) policy = POLICIES.get_by_index(policy_index) if not policy: # This indicates that a new policy has been created, # with rings, deployed, released (i.e. deprecated = # False), used by a client to create a container via # another proxy that was restarted after the policy # was released, and is now cached - all before this # worker was HUPed to stop accepting new # connections. There should never be an "unknown" # index - but when there is - it's probably operator # error and hopefully temporary. raise HTTPServiceUnavailable('Unknown Storage Policy') return self.obj_controller_router[policy], d elif container and account: return ContainerController, d elif account and not container and not obj: return AccountController, d return None, d
def verify_v1_keys_for_path(self, wsgi_path, expected_keys, key_id=None): put_keys = None self.app.meta_version_to_write = '1' for method, resp_class, status in (('PUT', swob.HTTPCreated, '201'), ('POST', swob.HTTPAccepted, '202'), ('GET', swob.HTTPOk, '200'), ('HEAD', swob.HTTPNoContent, '204')): resp_headers = {} self.swift.register(method, '/v1' + wsgi_path, resp_class, resp_headers, b'') req = Request.blank('/v1' + wsgi_path, environ={'REQUEST_METHOD': method}) start_response, calls = capture_start_response() self.app(req.environ, start_response) self.assertEqual(1, len(calls)) self.assertTrue(calls[0][0].startswith(status)) self.assertNotIn('swift.crypto.override', req.environ) self.assertIn(CRYPTO_KEY_CALLBACK, req.environ, '%s not set in env' % CRYPTO_KEY_CALLBACK) keys = req.environ.get(CRYPTO_KEY_CALLBACK)(key_id=key_id) self.assertIn('id', keys) id = keys.pop('id') path = swob.wsgi_to_str(wsgi_path) if '//' in path: path = path[path.index('//') + 1:] if six.PY2: self.assertEqual(path, id['path']) else: self.assertEqual(swob.str_to_wsgi(path), id['path']) self.assertEqual('1', id['v']) keys.pop('all_ids') self.assertListEqual( sorted(expected_keys), sorted(keys.keys()), '%s %s got keys %r, but expected %r' % (method, path, keys.keys(), expected_keys)) if put_keys is not None: # check all key sets were consistent for this path self.assertDictEqual(put_keys, keys) else: put_keys = keys self.app.meta_version_to_write = '2' # Clean up after ourselves return put_keys
def handle_request(self, req, start_response): """ Take a GET or HEAD request, and if it is for a dynamic large object manifest, return an appropriate response. Otherwise, simply pass it through. """ resp_iter = self._app_call(req.environ) # make sure this response is for a dynamic large object manifest for header, value in self._response_headers: if (header.lower() == 'x-object-manifest'): close_if_possible(resp_iter) response = self.get_or_head_response( req, wsgi_to_str(wsgi_unquote(value))) return response(req.environ, start_response) # Not a dynamic large object manifest; just pass it through. start_response(self._response_status, self._response_headers, self._response_exc_info) return resp_iter
def __call__(self, env, start_response): req = Request(env) try: parts = [wsgi_to_str(part) for part in req.split_path(2, 4, True)] except ValueError: return self.app(env, start_response) if req.method in ('PUT', 'POST', 'GET', 'HEAD'): # handle only those request methods that may require keys km_context = KeyMasterContext( self, *parts[1:], meta_version_to_write=self.meta_version_to_write) try: return km_context.handle_request(req, start_response) except HTTPException as err_resp: return err_resp(env, start_response) # anything else return self.app(env, start_response)
def _start_response(status, headers, exc_info=None): headers = self._clean_outgoing_headers(headers) if env['REQUEST_METHOD'] in ('GET', 'HEAD') and status[0] == '2': # figure out the right value for content-disposition # 1) use the value from the query string # 2) use the value from the object metadata # 3) use the object name (default) out_headers = [] existing_disposition = None for h, v in headers: if h.lower() != 'content-disposition': out_headers.append((h, v)) else: existing_disposition = v if inline_disposition: if filename: disposition_value = disposition_format('inline', filename) else: disposition_value = 'inline' elif filename: disposition_value = disposition_format('attachment', filename) elif existing_disposition: disposition_value = existing_disposition else: name = basename(wsgi_to_str(env['PATH_INFO']).rstrip('/')) disposition_value = disposition_format('attachment', name) # this is probably just paranoia, I couldn't actually get a # newline into existing_disposition value = disposition_value.replace('\n', '%0A') out_headers.append(('Content-Disposition', value)) # include Expires header for better cache-control out_headers.append(('Expires', strftime( "%a, %d %b %Y %H:%M:%S GMT", gmtime(temp_url_expires)))) headers = out_headers return start_response(status, headers, exc_info)
def POST(self, req): """ Handles Complete Multipart Upload. """ upload_id = req.params['uploadId'] resp = _get_upload_info(req, self.app, upload_id) headers = { 'Accept': 'application/json', sysmeta_header('object', 'upload-id'): upload_id } for key, val in resp.headers.items(): _key = key.lower() if _key.startswith('x-amz-meta-'): headers['x-object-meta-' + _key[11:]] = val hct_header = sysmeta_header('object', 'has-content-type') if resp.sysmeta_headers.get(hct_header) == 'yes': content_type = resp.sysmeta_headers.get( sysmeta_header('object', 'content-type')) elif hct_header in resp.sysmeta_headers: # has-content-type is present but false, so no content type was # set on initial upload. In that case, we won't set one on our # PUT request. Swift will end up guessing one based on the # object name. content_type = None else: content_type = resp.headers.get('Content-Type') if content_type: headers['Content-Type'] = content_type container = req.container_name + MULTIUPLOAD_SUFFIX s3_etag_hasher = md5(usedforsecurity=False) manifest = [] previous_number = 0 try: xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) if not xml: raise InvalidRequest(msg='You must specify at least one part') if 'content-md5' in req.headers: # If an MD5 was provided, we need to verify it. # Note that S3Request already took care of translating to ETag if req.headers['etag'] != md5( xml, usedforsecurity=False).hexdigest(): raise BadDigest(content_md5=req.headers['content-md5']) # We're only interested in the body here, in the # multipart-upload controller -- *don't* let it get # plumbed down to the object-server del req.headers['etag'] complete_elem = fromstring(xml, 'CompleteMultipartUpload', self.logger) for part_elem in complete_elem.iterchildren('Part'): part_number = int(part_elem.find('./PartNumber').text) if part_number <= previous_number: raise InvalidPartOrder(upload_id=upload_id) previous_number = part_number etag = normalize_etag(part_elem.find('./ETag').text) if len(etag) != 32 or any(c not in '0123456789abcdef' for c in etag): raise InvalidPart(upload_id=upload_id, part_number=part_number) manifest.append({ 'path': '/%s/%s/%s/%d' % (wsgi_to_str(container), wsgi_to_str( req.object_name), upload_id, part_number), 'etag': etag }) s3_etag_hasher.update(binascii.a2b_hex(etag)) except (XMLSyntaxError, DocumentInvalid): # NB: our schema definitions catch uploads with no parts here raise MalformedXML() except ErrorResponse: raise except Exception as e: self.logger.error(e) raise s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest)) s3_etag_header = sysmeta_header('object', 'etag') if resp.sysmeta_headers.get(s3_etag_header) == s3_etag: # This header should only already be present if the upload marker # has been cleaned up and the current target uses the same # upload-id; assuming the segments to use haven't changed, the work # is already done return HTTPOk(body=_make_complete_body(req, s3_etag, False), content_type='application/xml') headers[s3_etag_header] = s3_etag # Leave base header value blank; SLO will populate c_etag = '; s3_etag=%s' % s3_etag headers[get_container_update_override_key('etag')] = c_etag too_small_message = ('s3api requires that each segment be at least ' '%d bytes' % self.conf.min_segment_size) def size_checker(manifest): # Check the size of each segment except the last and make sure # they are all more than the minimum upload chunk size. # Note that we need to use the *internal* keys, since we're # looking at the manifest that's about to be written. return [(item['name'], too_small_message) for item in manifest[:-1] if item and item['bytes'] < self.conf.min_segment_size] req.environ['swift.callback.slo_manifest_hook'] = size_checker start_time = time.time() def response_iter(): # NB: XML requires that the XML declaration, if present, be at the # very start of the document. Clients *will* call us out on not # being valid XML if we pass through whitespace before it. # Track whether we've sent anything yet so we can yield out that # declaration *first* yielded_anything = False try: try: # TODO: add support for versioning put_resp = req.get_response(self.app, 'PUT', body=json.dumps(manifest), query={ 'multipart-manifest': 'put', 'heartbeat': 'on' }, headers=headers) if put_resp.status_int == 202: body = [] put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): if time.time() - start_time < 10: # Include some grace period to keep # ceph-s3tests happy continue if not yielded_anything: yield (b'<?xml version="1.0" ' b'encoding="UTF-8"?>\n') yielded_anything = True yield chunk continue body.append(chunk) body = json.loads(b''.join(body)) if body['Response Status'] != '201 Created': for seg, err in body['Errors']: if err == too_small_message: raise EntityTooSmall() elif err in ('Etag Mismatch', '404 Not Found'): raise InvalidPart(upload_id=upload_id) raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) if too_small_message in msg: raise EntityTooSmall(msg) elif ', Etag Mismatch' in msg: raise InvalidPart(upload_id=upload_id) elif ', 404 Not Found' in msg: raise InvalidPart(upload_id=upload_id) else: raise # clean up the multipart-upload record obj = '%s/%s' % (req.object_name, upload_id) try: req.get_response(self.app, 'DELETE', container, obj) except NoSuchKey: # The important thing is that we wrote out a tombstone to # make sure the marker got cleaned up. If it's already # gone (e.g., because of concurrent completes or a retried # complete), so much the better. pass yield _make_complete_body(req, s3_etag, yielded_anything) except ErrorResponse as err_resp: if yielded_anything: err_resp.xml_declaration = False yield b'\n' else: # Oh good, we can still change HTTP status code, too! resp.status = err_resp.status for chunk in err_resp({}, lambda *a: None): yield chunk resp = HTTPOk() # assume we're good for now... but see above! resp.app_iter = reiterate(response_iter()) resp.content_type = "application/xml" return resp
def handle_request(self, req): """ Entry point for proxy server. Should return a WSGI-style callable (such as swob.Response). :param req: swob.Request object """ try: self.logger.set_statsd_prefix('proxy-server') if req.content_length and req.content_length < 0: self.logger.increment('errors') return HTTPBadRequest(request=req, body='Invalid Content-Length') try: if not check_utf8(wsgi_to_str(req.path_info)): self.logger.increment('errors') return HTTPPreconditionFailed( request=req, body='Invalid UTF8 or contains NULL') except UnicodeError: self.logger.increment('errors') return HTTPPreconditionFailed( request=req, body='Invalid UTF8 or contains NULL') try: controller, path_parts = self.get_controller(req) except APIVersionError: self.logger.increment('errors') return HTTPBadRequest(request=req) except ValueError: self.logger.increment('errors') return HTTPNotFound(request=req) if not controller: self.logger.increment('errors') return HTTPPreconditionFailed(request=req, body='Bad URL') if self.deny_host_headers and \ req.host.split(':')[0] in self.deny_host_headers: return HTTPForbidden(request=req, body='Invalid host header') self.logger.set_statsd_prefix('proxy-server.' + controller.server_type.lower()) controller = controller(self, **path_parts) if 'swift.trans_id' not in req.environ: # if this wasn't set by an earlier middleware, set it now trans_id_suffix = self.trans_id_suffix trans_id_extra = req.headers.get('x-trans-id-extra') if trans_id_extra: trans_id_suffix += '-' + trans_id_extra[:32] trans_id = generate_trans_id(trans_id_suffix) req.environ['swift.trans_id'] = trans_id self.logger.txn_id = trans_id req.headers['x-trans-id'] = req.environ['swift.trans_id'] controller.trans_id = req.environ['swift.trans_id'] self.logger.client_ip = get_remote_client(req) allowed_methods = controller.allowed_methods if config_true_value(req.headers.get( 'X-Backend-Allow-Private-Methods', False)): allowed_methods = set(allowed_methods).union( controller.private_methods) if req.method not in allowed_methods: return HTTPMethodNotAllowed(request=req, headers={ 'Allow': ', '.join(allowed_methods)}) handler = getattr(controller, req.method) old_authorize = None if 'swift.authorize' in req.environ: # We call authorize before the handler, always. If authorized, # we remove the swift.authorize hook so isn't ever called # again. If not authorized, we return the denial unless the # controller's method indicates it'd like to gather more # information and try again later. resp = req.environ['swift.authorize'](req) if not resp: # No resp means authorized, no delayed recheck required. old_authorize = req.environ['swift.authorize'] else: # Response indicates denial, but we might delay the denial # and recheck later. If not delayed, return the error now. if not getattr(handler, 'delay_denial', None): return resp # Save off original request method (GET, POST, etc.) in case it # gets mutated during handling. This way logging can display the # method the client actually sent. req.environ.setdefault('swift.orig_req_method', req.method) try: if old_authorize: req.environ.pop('swift.authorize', None) return handler(req) finally: if old_authorize: req.environ['swift.authorize'] = old_authorize except HTTPException as error_response: return error_response except (Exception, Timeout): self.logger.exception(_('ERROR Unhandled exception in request')) return HTTPServerError(request=req)
def handle_request(self, req): """ Entry point for proxy server. Should return a WSGI-style callable (such as swob.Response). :param req: swob.Request object """ try: self.logger.set_statsd_prefix('proxy-server') if req.content_length and req.content_length < 0: self.logger.increment('errors') return HTTPBadRequest(request=req, body='Invalid Content-Length') try: if not check_utf8(wsgi_to_str(req.path_info), internal=req.allow_reserved_names): self.logger.increment('errors') return HTTPPreconditionFailed( request=req, body='Invalid UTF8 or contains NULL') except UnicodeError: self.logger.increment('errors') return HTTPPreconditionFailed( request=req, body='Invalid UTF8 or contains NULL') try: controller, path_parts = self.get_controller(req) except APIVersionError: self.logger.increment('errors') return HTTPBadRequest(request=req) except ValueError: self.logger.increment('errors') return HTTPNotFound(request=req) if not controller: self.logger.increment('errors') return HTTPPreconditionFailed(request=req, body='Bad URL') if self.deny_host_headers and \ req.host.split(':')[0] in self.deny_host_headers: return HTTPForbidden(request=req, body='Invalid host header') self.logger.set_statsd_prefix('proxy-server.' + controller.server_type.lower()) controller = controller(self, **path_parts) if 'swift.trans_id' not in req.environ: # if this wasn't set by an earlier middleware, set it now trans_id_suffix = self.trans_id_suffix trans_id_extra = req.headers.get('x-trans-id-extra') if trans_id_extra: trans_id_suffix += '-' + trans_id_extra[:32] trans_id = generate_trans_id(trans_id_suffix) req.environ['swift.trans_id'] = trans_id self.logger.txn_id = trans_id req.headers['x-trans-id'] = req.environ['swift.trans_id'] controller.trans_id = req.environ['swift.trans_id'] self.logger.client_ip = get_remote_client(req) allowed_methods = controller.allowed_methods if config_true_value( req.headers.get('X-Backend-Allow-Private-Methods', False)): allowed_methods = set(allowed_methods).union( controller.private_methods) if req.method not in allowed_methods: return HTTPMethodNotAllowed( request=req, headers={'Allow': ', '.join(allowed_methods)}) handler = getattr(controller, req.method) old_authorize = None if 'swift.authorize' in req.environ: # We call authorize before the handler, always. If authorized, # we remove the swift.authorize hook so isn't ever called # again. If not authorized, we return the denial unless the # controller's method indicates it'd like to gather more # information and try again later. resp = req.environ['swift.authorize'](req) if not resp: # No resp means authorized, no delayed recheck required. old_authorize = req.environ['swift.authorize'] else: # Response indicates denial, but we might delay the denial # and recheck later. If not delayed, return the error now. if not getattr(handler, 'delay_denial', None): return resp # Save off original request method (GET, POST, etc.) in case it # gets mutated during handling. This way logging can display the # method the client actually sent. req.environ.setdefault('swift.orig_req_method', req.method) try: if old_authorize: req.environ.pop('swift.authorize', None) return handler(req) finally: if old_authorize: req.environ['swift.authorize'] = old_authorize except HTTPException as error_response: return error_response except (Exception, Timeout): self.logger.exception(_('ERROR Unhandled exception in request')) return HTTPServerError(request=req)
def PUT(self, req): """Handle HTTP PUT request.""" drive, part, account, container, obj = get_obj_name_and_placement(req) req_timestamp = valid_timestamp(req) if 'x-container-sync-to' in req.headers: err, sync_to, realm, realm_key = validate_sync_to( req.headers['x-container-sync-to'], self.allowed_sync_hosts, self.realms_conf) if err: return HTTPBadRequest(err) try: check_drive(self.root, drive, self.mount_check) except ValueError: return HTTPInsufficientStorage(drive=drive, request=req) if not self.check_free_space(drive): return HTTPInsufficientStorage(drive=drive, request=req) requested_policy_index = self.get_and_validate_policy_index(req) broker = self._get_container_broker(drive, part, account, container) if obj: # put container object # obj put expects the policy_index header, default is for # legacy support during upgrade. obj_policy_index = requested_policy_index or 0 self._maybe_autocreate(broker, req_timestamp, account, obj_policy_index) # redirect if a shard exists for this object name response = self._redirect_to_shard(req, broker, obj) if response: return response broker.put_object( obj, req_timestamp.internal, int(req.headers['x-size']), wsgi_to_str(req.headers['x-content-type']), wsgi_to_str(req.headers['x-etag']), 0, obj_policy_index, wsgi_to_str(req.headers.get('x-content-type-timestamp')), wsgi_to_str(req.headers.get('x-meta-timestamp'))) return HTTPCreated(request=req) record_type = req.headers.get('x-backend-record-type', '').lower() if record_type == RECORD_TYPE_SHARD: try: # validate incoming data... shard_ranges = [ ShardRange.from_dict(sr) for sr in json.loads(req.body) ] except (ValueError, KeyError, TypeError) as err: return HTTPBadRequest('Invalid body: %r' % err) created = self._maybe_autocreate(broker, req_timestamp, account, requested_policy_index) self._update_metadata(req, broker, req_timestamp, 'PUT') if shard_ranges: # TODO: consider writing the shard ranges into the pending # file, but if so ensure an all-or-none semantic for the write broker.merge_shard_ranges(shard_ranges) else: # put container if requested_policy_index is None: # use the default index sent by the proxy if available new_container_policy = req.headers.get( 'X-Backend-Storage-Policy-Default', int(POLICIES.default)) else: new_container_policy = requested_policy_index created = self._update_or_create(req, broker, req_timestamp.internal, new_container_policy, requested_policy_index) self._update_metadata(req, broker, req_timestamp, 'PUT') resp = self.account_update(req, account, container, broker) if resp: return resp if created: return HTTPCreated(request=req, headers={ 'x-backend-storage-policy-index': broker.storage_policy_index }) else: return HTTPAccepted(request=req, headers={ 'x-backend-storage-policy-index': broker.storage_policy_index })
def authorize(self, req): """ Returns None if the request is authorized to continue or a standard WSGI response callable if not. """ try: _junk, account, container, obj = req.split_path(1, 4, True) except ValueError: self.logger.increment('errors') return HTTPNotFound(request=req) if self._get_account_prefix(account) is None: self.logger.debug("Account name: %s doesn't start with " "reseller_prefix(s): %s." % (account, ','.join(self.reseller_prefixes))) return self.denied_response(req) # At this point, TempAuth is convinced that it is authoritative. # If you are sending an ACL header, it must be syntactically valid # according to TempAuth's rules for ACL syntax. acl_data = req.headers.get('x-account-access-control') if acl_data is not None: error = self.extract_acl_and_report_errors(req) if error: msg = 'X-Account-Access-Control invalid: %s\n\nInput: %s\n' % ( error, acl_data) headers = [('Content-Type', 'text/plain; charset=UTF-8')] return HTTPBadRequest(request=req, headers=headers, body=msg) user_groups = (req.remote_user or '').split(',') account_user = user_groups[1] if len(user_groups) > 1 else None if '.reseller_admin' in user_groups and \ account not in self.reseller_prefixes and \ not self._dot_account(account): req.environ['swift_owner'] = True self.logger.debug("User %s has reseller admin authorizing." % account_user) return None if wsgi_to_str(account) in user_groups and \ (req.method not in ('DELETE', 'PUT') or container): # The user is admin for the account and is not trying to do an # account DELETE or PUT account_prefix = self._get_account_prefix(account) require_group = self.account_rules.get(account_prefix).get( 'require_group') if require_group and require_group in user_groups: req.environ['swift_owner'] = True self.logger.debug("User %s has admin and %s group." " Authorizing." % (account_user, require_group)) return None elif not require_group: req.environ['swift_owner'] = True self.logger.debug("User %s has admin authorizing." % account_user) return None if (req.environ.get('swift_sync_key') and (req.environ['swift_sync_key'] == req.headers.get('x-container-sync-key', None)) and 'x-timestamp' in req.headers): self.logger.debug("Allow request with container sync-key: %s." % req.environ['swift_sync_key']) return None if req.method == 'OPTIONS': # allow OPTIONS requests to proceed as normal self.logger.debug("Allow OPTIONS request.") return None referrers, groups = parse_acl(getattr(req, 'acl', None)) if referrer_allowed(req.referer, referrers): if obj or '.rlistings' in groups: self.logger.debug("Allow authorizing %s via referer ACL." % req.referer) return None for user_group in user_groups: if user_group in groups: self.logger.debug("User %s allowed in ACL: %s authorizing." % (account_user, user_group)) return None # Check for access via X-Account-Access-Control acct_acls = self.account_acls(req) if acct_acls: # At least one account ACL is set in this account's sysmeta data, # so we should see whether this user is authorized by the ACLs. user_group_set = set(user_groups) if user_group_set.intersection(acct_acls['admin']): req.environ['swift_owner'] = True self.logger.debug('User %s allowed by X-Account-Access-Control' ' (admin)' % account_user) return None if (user_group_set.intersection(acct_acls['read-write']) and (container or req.method in ('GET', 'HEAD'))): # The RW ACL allows all operations to containers/objects, but # only GET/HEAD to accounts (and OPTIONS, above) self.logger.debug('User %s allowed by X-Account-Access-Control' ' (read-write)' % account_user) return None if (user_group_set.intersection(acct_acls['read-only']) and req.method in ('GET', 'HEAD')): self.logger.debug('User %s allowed by X-Account-Access-Control' ' (read-only)' % account_user) return None return self.denied_response(req)
def get_or_head_response(self, req, x_object_manifest): ''' :param req: user's request :param x_object_manifest: as unquoted, native string ''' response_headers = self._response_headers container, obj_prefix = x_object_manifest.split('/', 1) version, account, _junk = req.split_path(2, 3, True) version = wsgi_to_str(version) account = wsgi_to_str(account) error_response, segments = self._get_container_listing( req, version, account, container, obj_prefix) if error_response: return error_response have_complete_listing = len(segments) < \ constraints.CONTAINER_LISTING_LIMIT first_byte = last_byte = None actual_content_length = None content_length_for_swob_range = None if req.range and len(req.range.ranges) == 1: content_length_for_swob_range = sum(o['bytes'] for o in segments) # This is a hack to handle suffix byte ranges (e.g. "bytes=-5"), # which we can't honor unless we have a complete listing. _junk, range_end = req.range.ranges_for_length(float("inf"))[0] # If this is all the segments, we know whether or not this # range request is satisfiable. # # Alternately, we may not have all the segments, but this range # falls entirely within the first page's segments, so we know # that it is satisfiable. if (have_complete_listing or range_end < content_length_for_swob_range): byteranges = req.range.ranges_for_length( content_length_for_swob_range) if not byteranges: headers = {'Accept-Ranges': 'bytes'} if have_complete_listing: headers['Content-Range'] = 'bytes */%d' % ( content_length_for_swob_range, ) return HTTPRequestedRangeNotSatisfiable( request=req, headers=headers) first_byte, last_byte = byteranges[0] # For some reason, swob.Range.ranges_for_length adds 1 to the # last byte's position. last_byte -= 1 actual_content_length = last_byte - first_byte + 1 else: # The range may or may not be satisfiable, but we can't tell # based on just one page of listing, and we're not going to go # get more pages because that would use up too many resources, # so we ignore the Range header and return the whole object. actual_content_length = None content_length_for_swob_range = None req.range = None response_headers = [ (h, v) for h, v in response_headers if h.lower() not in ("content-length", "content-range")] if content_length_for_swob_range is not None: # Here, we have to give swob a big-enough content length so that # it can compute the actual content length based on the Range # header. This value will not be visible to the client; swob will # substitute its own Content-Length. # # Note: if the manifest points to at least CONTAINER_LISTING_LIMIT # segments, this may be less than the sum of all the segments' # sizes. However, it'll still be greater than the last byte in the # Range header, so it's good enough for swob. response_headers.append(('Content-Length', str(content_length_for_swob_range))) elif have_complete_listing: actual_content_length = sum(o['bytes'] for o in segments) response_headers.append(('Content-Length', str(actual_content_length))) if have_complete_listing: response_headers = [(h, v) for h, v in response_headers if h.lower() != "etag"] etag = md5() for seg_dict in segments: etag.update(seg_dict['hash'].strip('"').encode('utf8')) response_headers.append(('Etag', '"%s"' % etag.hexdigest())) app_iter = None if req.method == 'GET': listing_iter = RateLimitedIterator( self._segment_listing_iterator( req, version, account, container, obj_prefix, segments, first_byte=first_byte, last_byte=last_byte), self.dlo.rate_limit_segments_per_sec, limit_after=self.dlo.rate_limit_after_segment) app_iter = SegmentedIterable( req, self.dlo.app, listing_iter, ua_suffix="DLO MultipartGET", swift_source="DLO", name=req.path, logger=self.logger, max_get_time=self.dlo.max_get_time, response_body_length=actual_content_length) try: app_iter.validate_first_segment() except (SegmentError, ListingIterError): return HTTPConflict(request=req) resp = Response(request=req, headers=response_headers, conditional_response=True, app_iter=app_iter) return resp
def handle_get_token(self, req): """ Handles the various `request for token and service end point(s)` calls. There are various formats to support the various auth servers in the past. Examples:: GET <auth-prefix>/v1/<act>/auth X-Auth-User: <act>:<usr> or X-Storage-User: <usr> X-Auth-Key: <key> or X-Storage-Pass: <key> GET <auth-prefix>/auth X-Auth-User: <act>:<usr> or X-Storage-User: <act>:<usr> X-Auth-Key: <key> or X-Storage-Pass: <key> GET <auth-prefix>/v1.0 X-Auth-User: <act>:<usr> or X-Storage-User: <act>:<usr> X-Auth-Key: <key> or X-Storage-Pass: <key> On successful authentication, the response will have X-Auth-Token and X-Storage-Token set to the token to use with Swift and X-Storage-URL set to the URL to the default Swift cluster to use. :param req: The swob.Request to process. :returns: swob.Response, 2xx on success with data set as explained above. """ # Validate the request info try: pathsegs = split_path(req.path_info, 1, 3, True) except ValueError: self.logger.increment('errors') return HTTPNotFound(request=req) if pathsegs[0] == 'v1' and pathsegs[2] == 'auth': account = pathsegs[1] user = req.headers.get('x-storage-user') if not user: user = req.headers.get('x-auth-user') if not user or ':' not in user: self.logger.increment('token_denied') auth = 'Swift realm="%s"' % account return HTTPUnauthorized(request=req, headers={'Www-Authenticate': auth}) account2, user = user.split(':', 1) if wsgi_to_str(account) != account2: self.logger.increment('token_denied') auth = 'Swift realm="%s"' % account return HTTPUnauthorized(request=req, headers={'Www-Authenticate': auth}) key = req.headers.get('x-storage-pass') if not key: key = req.headers.get('x-auth-key') elif pathsegs[0] in ('auth', 'v1.0'): user = req.headers.get('x-auth-user') if not user: user = req.headers.get('x-storage-user') if not user or ':' not in user: self.logger.increment('token_denied') auth = 'Swift realm="unknown"' return HTTPUnauthorized(request=req, headers={'Www-Authenticate': auth}) account, user = user.split(':', 1) key = req.headers.get('x-auth-key') if not key: key = req.headers.get('x-storage-pass') else: return HTTPBadRequest(request=req) if not all((account, user, key)): self.logger.increment('token_denied') realm = account or 'unknown' return HTTPUnauthorized(request=req, headers={'Www-Authenticate': 'Swift realm="%s"' % realm}) # Authenticate user account_user = account + ':' + user if account_user not in self.users: self.logger.increment('token_denied') auth = 'Swift realm="%s"' % account return HTTPUnauthorized(request=req, headers={'Www-Authenticate': auth}) if self.users[account_user]['key'] != key: self.logger.increment('token_denied') auth = 'Swift realm="unknown"' return HTTPUnauthorized(request=req, headers={'Www-Authenticate': auth}) account_id = self.users[account_user]['url'].rsplit('/', 1)[-1] # Get memcache client memcache_client = cache_from_env(req.environ) if not memcache_client: raise Exception('Memcache required') # See if a token already exists and hasn't expired token = None memcache_user_key = '%s/user/%s' % (self.reseller_prefix, account_user) candidate_token = memcache_client.get(memcache_user_key) if candidate_token: memcache_token_key = \ '%s/token/%s' % (self.reseller_prefix, candidate_token) cached_auth_data = memcache_client.get(memcache_token_key) if cached_auth_data: expires, old_groups = cached_auth_data old_groups = [group.encode('utf8') if six.PY2 else group for group in old_groups.split(',')] new_groups = self._get_user_groups(account, account_user, account_id) if expires > time() and \ set(old_groups) == set(new_groups.split(',')): token = candidate_token # Create a new token if one didn't exist if not token: # Generate new token token = '%stk%s' % (self.reseller_prefix, uuid4().hex) expires = time() + self.token_life groups = self._get_user_groups(account, account_user, account_id) # Save token memcache_token_key = '%s/token/%s' % (self.reseller_prefix, token) memcache_client.set(memcache_token_key, (expires, groups), time=float(expires - time())) # Record the token with the user info for future use. memcache_user_key = \ '%s/user/%s' % (self.reseller_prefix, account_user) memcache_client.set(memcache_user_key, token, time=float(expires - time())) resp = Response(request=req, headers={ 'x-auth-token': token, 'x-storage-token': token, 'x-auth-token-expires': str(int(expires - time()))}) url = self.users[account_user]['url'].replace('$HOST', resp.host_url) if self.storage_url_scheme != 'default': url = self.storage_url_scheme + ':' + url.split(':', 1)[1] resp.headers['x-storage-url'] = url return resp
def _listing(self, env, start_response, prefix=None): """ Sends an HTML object listing to the remote client. :param env: The original WSGI environment dict. :param start_response: The original WSGI start_response hook. :param prefix: Any prefix desired for the container listing. """ label = wsgi_to_str(env['PATH_INFO']) if self._listings_label: groups = wsgi_to_str(env['PATH_INFO']).split('/') label = '{0}/{1}'.format(self._listings_label, '/'.join(groups[4:])) if not config_true_value(self._listings): body = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 ' \ 'Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n' \ '<html>\n' \ '<head>\n' \ '<title>Listing of %s</title>\n' % cgi.escape(label) if self._listings_css: body += ' <link rel="stylesheet" type="text/css" ' \ 'href="%s" />\n' % self._build_css_path(prefix or '') else: body += ' <style type="text/css">\n' \ ' h1 {font-size: 1em; font-weight: bold;}\n' \ ' p {font-size: 2}\n' \ ' </style>\n' body += '</head>\n<body>' \ ' <h1>Web Listing Disabled</h1>' \ ' <p>The owner of this web site has disabled web listing.' \ ' <p>If you are the owner of this web site, you can enable' \ ' web listing by setting X-Container-Meta-Web-Listings.</p>' if self._index: body += '<h1>Index File Not Found</h1>' \ ' <p>The owner of this web site has set ' \ ' <b>X-Container-Meta-Web-Index: %s</b>. ' \ ' However, this file is not found.</p>' % self._index body += ' </body>\n</html>\n' resp = HTTPNotFound(body=body)(env, self._start_response) return self._error_response(resp, env, start_response) tmp_env = make_env( env, 'GET', '/%s/%s/%s' % ( self.version, self.account, self.container), self.agent, swift_source='SW') tmp_env['QUERY_STRING'] = 'delimiter=/' if prefix: tmp_env['QUERY_STRING'] += '&prefix=%s' % wsgi_quote(prefix) else: prefix = '' resp = self._app_call(tmp_env) if not is_success(self._get_status_int()): return self._error_response(resp, env, start_response) listing = None body = b''.join(resp) if body: listing = json.loads(body) if not listing: resp = HTTPNotFound()(env, self._start_response) return self._error_response(resp, env, start_response) headers = {'Content-Type': 'text/html; charset=UTF-8'} body = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 ' \ 'Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n' \ '<html>\n' \ ' <head>\n' \ ' <title>Listing of %s</title>\n' % \ cgi.escape(label) if self._listings_css: body += ' <link rel="stylesheet" type="text/css" ' \ 'href="%s" />\n' % (self._build_css_path(prefix)) else: body += ' <style type="text/css">\n' \ ' h1 {font-size: 1em; font-weight: bold;}\n' \ ' th {text-align: left; padding: 0px 1em 0px 1em;}\n' \ ' td {padding: 0px 1em 0px 1em;}\n' \ ' a {text-decoration: none;}\n' \ ' </style>\n' body += ' </head>\n' \ ' <body>\n' \ ' <h1 id="title">Listing of %s</h1>\n' \ ' <table id="listing">\n' \ ' <tr id="heading">\n' \ ' <th class="colname">Name</th>\n' \ ' <th class="colsize">Size</th>\n' \ ' <th class="coldate">Date</th>\n' \ ' </tr>\n' % cgi.escape(label) if prefix: body += ' <tr id="parent" class="item">\n' \ ' <td class="colname"><a href="../">../</a></td>\n' \ ' <td class="colsize"> </td>\n' \ ' <td class="coldate"> </td>\n' \ ' </tr>\n' for item in listing: if 'subdir' in item: subdir = item['subdir'] if six.PY3 else \ item['subdir'].encode('utf-8') if prefix: subdir = subdir[len(prefix):] body += ' <tr class="item subdir">\n' \ ' <td class="colname"><a href="%s">%s</a></td>\n' \ ' <td class="colsize"> </td>\n' \ ' <td class="coldate"> </td>\n' \ ' </tr>\n' % \ (quote(subdir), cgi.escape(subdir)) for item in listing: if 'name' in item: name = item['name'] if six.PY3 else \ item['name'].encode('utf-8') if prefix: name = name[len(prefix):] content_type = item['content_type'] if six.PY3 else \ item['content_type'].encode('utf-8') bytes = human_readable(item['bytes']) last_modified = ( cgi.escape(item['last_modified'] if six.PY3 else item['last_modified'].encode('utf-8')). split('.')[0].replace('T', ' ')) body += ' <tr class="item %s">\n' \ ' <td class="colname"><a href="%s">%s</a></td>\n' \ ' <td class="colsize">%s</td>\n' \ ' <td class="coldate">%s</td>\n' \ ' </tr>\n' % \ (' '.join('type-' + cgi.escape(t.lower(), quote=True) for t in content_type.split('/')), quote(name), cgi.escape(name), bytes, last_modified) body += ' </table>\n' \ ' </body>\n' \ '</html>\n' resp = Response(headers=headers, body=body) return resp(env, start_response)
def handle_object(self, env, start_response): """ Handles a possible static web request for an object. This object could resolve into an index or listing request. :param env: The original WSGI environment dict. :param start_response: The original WSGI start_response hook. """ tmp_env = dict(env) tmp_env['HTTP_USER_AGENT'] = \ '%s StaticWeb' % env.get('HTTP_USER_AGENT') tmp_env['swift.source'] = 'SW' resp = self._app_call(tmp_env) status_int = self._get_status_int() self._get_container_info(env) if is_success(status_int) or is_redirection(status_int): # Treat directory marker objects as not found if not self._dir_type: self._dir_type = 'application/directory' content_length = self._response_header_value('content-length') content_length = int(content_length) if content_length else 0 if self._response_header_value('content-type') == self._dir_type \ and content_length <= 1: status_int = HTTP_NOT_FOUND else: start_response(self._response_status, self._response_headers, self._response_exc_info) return resp if status_int != HTTP_NOT_FOUND: # Retaining the previous code's behavior of not using custom error # pages for non-404 errors. self._error = None return self._error_response(resp, env, start_response) if not self._listings and not self._index: start_response(self._response_status, self._response_headers, self._response_exc_info) return resp status_int = HTTP_NOT_FOUND if self._index: tmp_env = dict(env) tmp_env['HTTP_USER_AGENT'] = \ '%s StaticWeb' % env.get('HTTP_USER_AGENT') tmp_env['swift.source'] = 'SW' if not tmp_env['PATH_INFO'].endswith('/'): tmp_env['PATH_INFO'] += '/' tmp_env['PATH_INFO'] += self._index resp = self._app_call(tmp_env) status_int = self._get_status_int() if is_success(status_int) or is_redirection(status_int): if not env['PATH_INFO'].endswith('/'): return self._redirect_with_slash(env, start_response) start_response(self._response_status, self._response_headers, self._response_exc_info) return resp if status_int == HTTP_NOT_FOUND: if not env['PATH_INFO'].endswith('/'): tmp_env = make_env( env, 'GET', '/%s/%s/%s' % ( self.version, self.account, self.container), self.agent, swift_source='SW') tmp_env['QUERY_STRING'] = 'limit=1&delimiter=/&prefix=%s' % ( quote(wsgi_to_str(self.obj) + '/'), ) resp = self._app_call(tmp_env) body = b''.join(resp) if not is_success(self._get_status_int()) or not body or \ not json.loads(body): resp = HTTPNotFound()(env, self._start_response) return self._error_response(resp, env, start_response) return self._redirect_with_slash(env, start_response) return self._listing(env, start_response, self.obj)
def PUT(self, req): """Handle HTTP PUT request.""" drive, part, account, container, obj = split_and_validate_path( req, 4, 5, True) req_timestamp = valid_timestamp(req) if 'x-container-sync-to' in req.headers: err, sync_to, realm, realm_key = validate_sync_to( req.headers['x-container-sync-to'], self.allowed_sync_hosts, self.realms_conf) if err: return HTTPBadRequest(err) try: check_drive(self.root, drive, self.mount_check) except ValueError: return HTTPInsufficientStorage(drive=drive, request=req) if not self.check_free_space(drive): return HTTPInsufficientStorage(drive=drive, request=req) requested_policy_index = self.get_and_validate_policy_index(req) broker = self._get_container_broker(drive, part, account, container) if obj: # put container object # obj put expects the policy_index header, default is for # legacy support during upgrade. obj_policy_index = requested_policy_index or 0 self._maybe_autocreate(broker, req_timestamp, account, obj_policy_index) # redirect if a shard exists for this object name response = self._redirect_to_shard(req, broker, obj) if response: return response broker.put_object(obj, req_timestamp.internal, int(req.headers['x-size']), wsgi_to_str(req.headers['x-content-type']), wsgi_to_str(req.headers['x-etag']), 0, obj_policy_index, wsgi_to_str(req.headers.get( 'x-content-type-timestamp')), wsgi_to_str(req.headers.get('x-meta-timestamp'))) return HTTPCreated(request=req) record_type = req.headers.get('x-backend-record-type', '').lower() if record_type == RECORD_TYPE_SHARD: try: # validate incoming data... shard_ranges = [ShardRange.from_dict(sr) for sr in json.loads(req.body)] except (ValueError, KeyError, TypeError) as err: return HTTPBadRequest('Invalid body: %r' % err) created = self._maybe_autocreate(broker, req_timestamp, account, requested_policy_index) self._update_metadata(req, broker, req_timestamp, 'PUT') if shard_ranges: # TODO: consider writing the shard ranges into the pending # file, but if so ensure an all-or-none semantic for the write broker.merge_shard_ranges(shard_ranges) else: # put container if requested_policy_index is None: # use the default index sent by the proxy if available new_container_policy = req.headers.get( 'X-Backend-Storage-Policy-Default', int(POLICIES.default)) else: new_container_policy = requested_policy_index created = self._update_or_create(req, broker, req_timestamp.internal, new_container_policy, requested_policy_index) self._update_metadata(req, broker, req_timestamp, 'PUT') resp = self.account_update(req, account, container, broker) if resp: return resp if created: return HTTPCreated(request=req, headers={'x-backend-storage-policy-index': broker.storage_policy_index}) else: return HTTPAccepted(request=req, headers={'x-backend-storage-policy-index': broker.storage_policy_index})
def handle_delete_iter(self, req, objs_to_delete=None, user_agent='BulkDelete', swift_source='BD', out_content_type='text/plain'): """ A generator that can be assigned to a swob Response's app_iter which, when iterated over, will delete the objects specified in request body. Will occasionally yield whitespace while request is being processed. When the request is completed will yield a response body that can be parsed to determine success. See above documentation for details. :params req: a swob Request :params objs_to_delete: a list of dictionaries that specifies the (native string) objects to be deleted. If None, uses self.get_objs_to_delete to query request. """ last_yield = time() if out_content_type and out_content_type.endswith('/xml'): to_yield = b'<?xml version="1.0" encoding="UTF-8"?>\n' else: to_yield = b' ' separator = b'' failed_files = [] resp_dict = {'Response Status': HTTPOk().status, 'Response Body': '', 'Number Deleted': 0, 'Number Not Found': 0} req.environ['eventlet.minimum_write_chunk_size'] = 0 try: if not out_content_type: raise HTTPNotAcceptable(request=req) try: vrs, account, _junk = req.split_path(2, 3, True) except ValueError: raise HTTPNotFound(request=req) vrs = wsgi_to_str(vrs) account = wsgi_to_str(account) incoming_format = req.headers.get('Content-Type') if incoming_format and \ not incoming_format.startswith('text/plain'): # For now only accept newline separated object names raise HTTPNotAcceptable(request=req) if objs_to_delete is None: objs_to_delete = self.get_objs_to_delete(req) failed_file_response = {'type': HTTPBadRequest} def delete_filter(predicate, objs_to_delete): for obj_to_delete in objs_to_delete: obj_name = obj_to_delete['name'] if not obj_name: continue if not predicate(obj_name): continue if obj_to_delete.get('error'): if obj_to_delete['error']['code'] == HTTP_NOT_FOUND: resp_dict['Number Not Found'] += 1 else: failed_files.append([ wsgi_quote(str_to_wsgi(obj_name)), obj_to_delete['error']['message']]) continue delete_path = '/'.join(['', vrs, account, obj_name.lstrip('/')]) if not constraints.check_utf8(delete_path): failed_files.append([wsgi_quote(str_to_wsgi(obj_name)), HTTPPreconditionFailed().status]) continue yield (obj_name, delete_path) def objs_then_containers(objs_to_delete): # process all objects first yield delete_filter(lambda name: '/' in name.strip('/'), objs_to_delete) # followed by containers yield delete_filter(lambda name: '/' not in name.strip('/'), objs_to_delete) def do_delete(obj_name, delete_path): delete_obj_req = make_subrequest( req.environ, method='DELETE', path=wsgi_quote(str_to_wsgi(delete_path)), headers={'X-Auth-Token': req.headers.get('X-Auth-Token')}, body='', agent='%(orig)s ' + user_agent, swift_source=swift_source) return (delete_obj_req.get_response(self.app), obj_name, 0) with StreamingPile(self.delete_concurrency) as pile: for names_to_delete in objs_then_containers(objs_to_delete): for resp, obj_name, retry in pile.asyncstarmap( do_delete, names_to_delete): if last_yield + self.yield_frequency < time(): last_yield = time() yield to_yield to_yield, separator = b' ', b'\r\n\r\n' self._process_delete(resp, pile, obj_name, resp_dict, failed_files, failed_file_response, retry) if len(failed_files) >= self.max_failed_deletes: # Abort, but drain off the in-progress deletes for resp, obj_name, retry in pile: if last_yield + self.yield_frequency < time(): last_yield = time() yield to_yield to_yield, separator = b' ', b'\r\n\r\n' # Don't pass in the pile, as we shouldn't retry self._process_delete( resp, None, obj_name, resp_dict, failed_files, failed_file_response, retry) msg = 'Max delete failures exceeded' raise HTTPBadRequest(msg) if failed_files: resp_dict['Response Status'] = \ failed_file_response['type']().status elif not (resp_dict['Number Deleted'] or resp_dict['Number Not Found']): resp_dict['Response Status'] = HTTPBadRequest().status resp_dict['Response Body'] = 'Invalid bulk delete.' except HTTPException as err: resp_dict['Response Status'] = err.status resp_dict['Response Body'] = err.body.decode('utf-8') except Exception: self.logger.exception('Error in bulk delete.') resp_dict['Response Status'] = HTTPServerError().status yield separator + get_response_body(out_content_type, resp_dict, failed_files, 'delete')
def handle_extract_iter(self, req, compress_type, out_content_type='text/plain'): """ A generator that can be assigned to a swob Response's app_iter which, when iterated over, will extract and PUT the objects pulled from the request body. Will occasionally yield whitespace while request is being processed. When the request is completed will yield a response body that can be parsed to determine success. See above documentation for details. :params req: a swob Request :params compress_type: specifying the compression type of the tar. Accepts '', 'gz', or 'bz2' """ resp_dict = {'Response Status': HTTPCreated().status, 'Response Body': '', 'Number Files Created': 0} failed_files = [] last_yield = time() if out_content_type and out_content_type.endswith('/xml'): to_yield = b'<?xml version="1.0" encoding="UTF-8"?>\n' else: to_yield = b' ' separator = b'' containers_accessed = set() req.environ['eventlet.minimum_write_chunk_size'] = 0 try: if not out_content_type: raise HTTPNotAcceptable(request=req) if req.content_length is None and \ req.headers.get('transfer-encoding', '').lower() != 'chunked': raise HTTPLengthRequired(request=req) try: vrs, account, extract_base = req.split_path(2, 3, True) except ValueError: raise HTTPNotFound(request=req) extract_base = extract_base or '' extract_base = extract_base.rstrip('/') tar = tarfile.open(mode='r|' + compress_type, fileobj=req.body_file) failed_response_type = HTTPBadRequest containers_created = 0 while True: if last_yield + self.yield_frequency < time(): last_yield = time() yield to_yield to_yield, separator = b' ', b'\r\n\r\n' tar_info = tar.next() if tar_info is None or \ len(failed_files) >= self.max_failed_extractions: break if tar_info.isfile(): obj_path = tar_info.name if not six.PY2: obj_path = obj_path.encode('utf-8', 'surrogateescape') obj_path = bytes_to_wsgi(obj_path) if obj_path.startswith('./'): obj_path = obj_path[2:] obj_path = obj_path.lstrip('/') if extract_base: obj_path = extract_base + '/' + obj_path if '/' not in obj_path: continue # ignore base level file destination = '/'.join( ['', vrs, account, obj_path]) container = obj_path.split('/', 1)[0] if not constraints.check_utf8(wsgi_to_str(destination)): failed_files.append( [wsgi_quote(obj_path[:self.max_path_length]), HTTPPreconditionFailed().status]) continue if tar_info.size > constraints.MAX_FILE_SIZE: failed_files.append([ wsgi_quote(obj_path[:self.max_path_length]), HTTPRequestEntityTooLarge().status]) continue container_failure = None if container not in containers_accessed: cont_path = '/'.join(['', vrs, account, container]) try: if self.create_container(req, cont_path): containers_created += 1 if containers_created > self.max_containers: raise HTTPBadRequest( 'More than %d containers to create ' 'from tar.' % self.max_containers) except CreateContainerError as err: # the object PUT to this container still may # succeed if acls are set container_failure = [ wsgi_quote(cont_path[:self.max_path_length]), err.status] if err.status_int == HTTP_UNAUTHORIZED: raise HTTPUnauthorized(request=req) except ValueError: failed_files.append([ wsgi_quote(obj_path[:self.max_path_length]), HTTPBadRequest().status]) continue tar_file = tar.extractfile(tar_info) create_headers = { 'Content-Length': tar_info.size, 'X-Auth-Token': req.headers.get('X-Auth-Token'), } create_obj_req = make_subrequest( req.environ, method='PUT', path=wsgi_quote(destination), headers=create_headers, agent='%(orig)s BulkExpand', swift_source='EA') create_obj_req.environ['wsgi.input'] = tar_file for pax_key, pax_value in tar_info.pax_headers.items(): header_name = pax_key_to_swift_header(pax_key) if header_name: # Both pax_key and pax_value are unicode # strings; the key is already UTF-8 encoded, but # we still have to encode the value. create_obj_req.headers[header_name] = \ pax_value.encode("utf-8") resp = create_obj_req.get_response(self.app) containers_accessed.add(container) if resp.is_success: resp_dict['Number Files Created'] += 1 else: if container_failure: failed_files.append(container_failure) if resp.status_int == HTTP_UNAUTHORIZED: failed_files.append([ wsgi_quote(obj_path[:self.max_path_length]), HTTPUnauthorized().status]) raise HTTPUnauthorized(request=req) if resp.status_int // 100 == 5: failed_response_type = HTTPBadGateway failed_files.append([ wsgi_quote(obj_path[:self.max_path_length]), resp.status]) if failed_files: resp_dict['Response Status'] = failed_response_type().status elif not resp_dict['Number Files Created']: resp_dict['Response Status'] = HTTPBadRequest().status resp_dict['Response Body'] = 'Invalid Tar File: No Valid Files' except HTTPException as err: resp_dict['Response Status'] = err.status resp_dict['Response Body'] = err.body.decode('utf-8') except (tarfile.TarError, zlib.error) as tar_error: resp_dict['Response Status'] = HTTPBadRequest().status resp_dict['Response Body'] = 'Invalid Tar File: %s' % tar_error except Exception: self.logger.exception('Error in extract archive.') resp_dict['Response Status'] = HTTPServerError().status yield separator + get_response_body( out_content_type, resp_dict, failed_files, 'extract')