def handle_object(self, env, start_response): """ Handles a possible static web request for an object. This object could resolve into an index or listing request. :param env: The original WSGI environment dict. :param start_response: The original WSGI start_response hook. """ tmp_env = dict(env) tmp_env['HTTP_USER_AGENT'] = \ '%s StaticWeb' % env.get('HTTP_USER_AGENT') resp = self._app_call(tmp_env) status_int = self._get_status_int() if is_success(status_int) or is_redirection(status_int): start_response(self._response_status, self._response_headers, self._response_exc_info) return resp if status_int != HTTP_NOT_FOUND: return self._error_response(resp, env, start_response) self._get_container_info(env) if not self._listings and not self._index: return self.app(env, start_response) status_int = HTTP_NOT_FOUND if self._index: tmp_env = dict(env) tmp_env['HTTP_USER_AGENT'] = \ '%s StaticWeb' % env.get('HTTP_USER_AGENT') if tmp_env['PATH_INFO'][-1] != '/': tmp_env['PATH_INFO'] += '/' tmp_env['PATH_INFO'] += self._index resp = self._app_call(tmp_env) status_int = self._get_status_int() if is_success(status_int) or is_redirection(status_int): if env['PATH_INFO'][-1] != '/': resp = HTTPMovedPermanently( location=env['PATH_INFO'] + '/') self._log_response(env, resp.status_int) return resp(env, start_response) start_response(self._response_status, self._response_headers, self._response_exc_info) return resp if status_int == HTTP_NOT_FOUND: if env['PATH_INFO'][-1] != '/': tmp_env = make_pre_authed_env(env, 'GET', '/%s/%s/%s' % (self.version, self.account, self.container), self.agent) tmp_env['QUERY_STRING'] = 'limit=1&format=json&delimiter' \ '=/&limit=1&prefix=%s' % quote(self.obj + '/') resp = self._app_call(tmp_env) body = ''.join(resp) if not is_success(self._get_status_int()) or not body or \ not json.loads(body): resp = HTTPNotFound()(env, self._start_response) return self._error_response(resp, env, start_response) resp = HTTPMovedPermanently(location=env['PATH_INFO'] + '/') self._log_response(env, resp.status_int) return resp(env, start_response) return self._listing(env, start_response, self.obj)
def handle_put(self, req, start_response): self._check_headers(req) keys = self.get_keys(req.environ, required=['object', 'container']) self.encrypt_user_metadata(req, keys) enc_input_proxy = EncInputWrapper(self.crypto, keys, req, self.logger) req.environ['wsgi.input'] = enc_input_proxy resp = self._app_call(req.environ) # If an etag is in the response headers and a plaintext etag was # calculated, then overwrite the response value with the plaintext etag # provided it matches the ciphertext etag. If it does not match then do # not overwrite and allow the response value to return to client. mod_resp_headers = self._response_headers if (is_success(self._get_status_int()) and enc_input_proxy.plaintext_md5): plaintext_etag = enc_input_proxy.plaintext_md5.hexdigest() ciphertext_etag = enc_input_proxy.ciphertext_md5.hexdigest() mod_resp_headers = [ (h, v if (h.lower() != 'etag' or v.strip('"') != ciphertext_etag) else plaintext_etag) for h, v in mod_resp_headers] start_response(self._response_status, mod_resp_headers, self._response_exc_info) return resp
def _listing_pages_iter(self, lcontainer, lprefix, env): lpartition, lnodes = self.app.container_ring.get_nodes( self.account_name, lcontainer) marker = '' while True: lreq = Request.blank('i will be overridden by env', environ=env) # Don't quote PATH_INFO, by WSGI spec lreq.environ['PATH_INFO'] = \ '/%s/%s' % (self.account_name, lcontainer) lreq.environ['REQUEST_METHOD'] = 'GET' lreq.environ['QUERY_STRING'] = \ 'format=json&prefix=%s&marker=%s' % (quote(lprefix), quote(marker)) lnodes = self.app.sort_nodes(lnodes) lresp = self.GETorHEAD_base( lreq, _('Container'), lpartition, lnodes, lreq.path_info, len(lnodes)) if 'swift.authorize' in env: lreq.acl = lresp.headers.get('x-container-read') aresp = env['swift.authorize'](lreq) if aresp: raise ListingIterNotAuthorized(aresp) if lresp.status_int == HTTP_NOT_FOUND: raise ListingIterNotFound() elif not is_success(lresp.status_int): raise ListingIterError() if not lresp.body: break sublisting = json.loads(lresp.body) if not sublisting: break marker = sublisting[-1]['name'].encode('utf-8') yield sublisting
def GETorHEAD(self, req): """Handler for HTTP GET/HEAD requests.""" partition, nodes = self.app.account_ring.get_nodes(self.account_name) resp = self.GETorHEAD_base( req, _('Account'), self.app.account_ring, partition, req.path_info.rstrip('/')) if resp.status_int == HTTP_NOT_FOUND and self.app.account_autocreate: if len(self.account_name) > MAX_ACCOUNT_NAME_LENGTH: resp = HTTPBadRequest(request=req) resp.body = 'Account name length of %d longer than %d' % \ (len(self.account_name), MAX_ACCOUNT_NAME_LENGTH) return resp headers = self.generate_request_headers(req) resp = self.make_requests( Request.blank('/v1/' + self.account_name), self.app.account_ring, partition, 'PUT', '/' + self.account_name, [headers] * len(nodes)) if not is_success(resp.status_int): self.app.logger.warning('Could not autocreate account %r' % self.account_name) return resp resp = self.GETorHEAD_base( req, _('Account'), self.app.account_ring, partition, req.path_info.rstrip('/')) return resp
def _connect_put_node(self, nodes, part, path, headers, logger_thread_locals): """Method for a file PUT connect""" self.app.logger.thread_locals = logger_thread_locals for node in nodes: try: start_time = time.time() with ConnectionTimeout(self.app.conn_timeout): conn = http_connect( node['ip'], node['port'], node['device'], part, 'PUT', path, headers) self.app.set_node_timing(node, time.time() - start_time) with Timeout(self.app.node_timeout): resp = conn.getexpect() if resp.status == HTTP_CONTINUE: conn.resp = None conn.node = node return conn elif is_success(resp.status): conn.resp = resp conn.node = node return conn elif resp.status == HTTP_INSUFFICIENT_STORAGE: self.error_limit(node) except: self.exception_occurred(node, _('Object'), _('Expect: 100-continue on %s') % path)
def _error_response(self, response, env, start_response): """ Sends the error response to the remote client, possibly resolving a custom error response body based on x-container-meta-web-error. :param response: The error response we should default to sending. :param env: The original request WSGI environment. :param start_response: The WSGI start_response hook. """ if not self._error: start_response(self._response_status, self._response_headers, self._response_exc_info) return response save_response_status = self._response_status save_response_headers = self._response_headers save_response_exc_info = self._response_exc_info resp = self._app_call(make_pre_authed_env( env, 'GET', '/%s/%s/%s/%s%s' % ( self.version, self.account, self.container, self._get_status_int(), self._error), self.agent, swift_source='SW')) if is_success(self._get_status_int()): start_response(save_response_status, self._response_headers, self._response_exc_info) return resp start_response(save_response_status, save_response_headers, save_response_exc_info) return response
def handle_container(self, env, start_response): """ Handles a possible static web request for a container. :param env: The original WSGI environment dict. :param start_response: The original WSGI start_response hook. """ self._get_container_info(env) if not self._listings and not self._index: if config_true_value(env.get('HTTP_X_WEB_MODE', 'f')): return HTTPNotFound()(env, start_response) return self.app(env, start_response) if env['PATH_INFO'][-1] != '/': resp = HTTPMovedPermanently( location=(env['PATH_INFO'] + '/')) return resp(env, start_response) if not self._index: return self._listing(env, start_response) tmp_env = dict(env) tmp_env['HTTP_USER_AGENT'] = \ '%s StaticWeb' % env.get('HTTP_USER_AGENT') tmp_env['swift.source'] = 'SW' tmp_env['PATH_INFO'] += self._index resp = self._app_call(tmp_env) status_int = self._get_status_int() if status_int == HTTP_NOT_FOUND: return self._listing(env, start_response) elif not is_success(self._get_status_int()) or \ not is_redirection(self._get_status_int()): return self._error_response(resp, env, start_response) start_response(self._response_status, self._response_headers, self._response_exc_info) return resp
def POST(self, req): """HTTP POST request handler.""" account_partition, accounts = self.app.account_ring.get_nodes(self.account_name) headers = {'X-Timestamp': normalize_timestamp(time.time()), 'X-Trans-Id': self.trans_id, 'Connection': 'close'} self.transfer_headers(req.headers, headers) resp = self.make_requests(self.account_name,req, self.app.account_ring, account_partition, 'POST', req.path_info, [headers] * len(accounts)) if resp.status_int == HTTP_NOT_FOUND and self.app.account_autocreate: if len(self.account_name) > MAX_ACCOUNT_NAME_LENGTH: respbody = 'Account name length of %d longer than %d' % \ (len(self.account_name), MAX_ACCOUNT_NAME_LENGTH) return jresponse('-1', respbody, req,400) resp = self.make_requests(self.account_name, Request.blank('/v1/' + self.account_name), self.app.account_ring, account_partition, 'PUT', '/' + self.account_name, [headers] * len(accounts)) if not is_success(resp.status_int): self.app.logger.warning('Could not autocreate account %r' % self.account_name) return resp return resp
def META(self, req): """Handler for HTTP GET/HEAD requests.""" partition, nodes = self.app.account_ring.get_nodes(self.account_name) shuffle(nodes) resp = self.META_base(req, _('Account'), partition, nodes, req.path_info.rstrip('/'), len(nodes)) if resp.status_int == HTTP_NOT_FOUND and self.app.account_autocreate: if len(self.account_name) > MAX_ACCOUNT_NAME_LENGTH: respbody = 'Account name length of %d longer than %d' % \ (len(self.account_name), MAX_ACCOUNT_NAME_LENGTH) return jresponse('-1', respbody, req,400) headers = {'X-Timestamp': normalize_timestamp(time.time()), 'X-Trans-Id': self.trans_id, 'Connection': 'close'} resp = self.make_requests(self.account_name, Request.blank('/v1/' + self.account_name), self.app.account_ring, partition, 'PUT', '/' + self.account_name, [headers] * len(nodes)) if not is_success(resp.status_int): self.app.logger.warning('Could not autocreate account %r' % self.account_name) return resp resp = self.META_base(req, _('Account'), partition, nodes, req.path_info.rstrip('/'), len(nodes)) return resp
def POST(self, req): """HTTP POST request handler.""" error_response = check_metadata(req, 'account') if error_response: return error_response account_partition, accounts = \ self.app.account_ring.get_nodes(self.account_name) headers = {'X-Timestamp': normalize_timestamp(time.time()), 'X-Trans-Id': self.trans_id, 'Connection': 'close'} self.transfer_headers(req.headers, headers) if self.app.memcache: self.app.memcache.delete('account%s' % req.path_info.rstrip('/')) resp = self.make_requests( req, self.app.account_ring, account_partition, 'POST', req.path_info, [headers] * len(accounts)) if resp.status_int == HTTP_NOT_FOUND and self.app.account_autocreate: if len(self.account_name) > MAX_ACCOUNT_NAME_LENGTH: resp = HTTPBadRequest(request=req) resp.body = 'Account name length of %d longer than %d' % \ (len(self.account_name), MAX_ACCOUNT_NAME_LENGTH) return resp resp = self.make_requests( Request.blank('/v1/' + self.account_name), self.app.account_ring, account_partition, 'PUT', '/' + self.account_name, [headers] * len(accounts)) if not is_success(resp.status_int): self.app.logger.warning('Could not autocreate account %r' % self.account_name) return resp return resp
def _fetch_sub_slo_segments(self, req, version, acc, con, obj): """ Fetch the submanifest, parse it, and return it. Raise exception on failures. """ sub_req = make_subrequest( req.environ, path='/'.join(['', version, acc, con, obj]), method='GET', headers={'x-auth-token': req.headers.get('x-auth-token')}, agent=('%(orig)s ' + 'SLO MultipartGET'), swift_source='SLO') sub_resp = sub_req.get_response(self.slo.app) if not is_success(sub_resp.status_int): close_if_possible(sub_resp.app_iter) raise ListingIterError( 'ERROR: while fetching %s, GET of submanifest %s ' 'failed with status %d' % (req.path, sub_req.path, sub_resp.status_int)) try: with closing_if_possible(sub_resp.app_iter): return json.loads(''.join(sub_resp.app_iter)) except ValueError as err: raise ListingIterError( 'ERROR: while fetching %s, JSON-decoding of submanifest %s ' 'failed with %s' % (req.path, sub_req.path, err))
def GETorHEAD(self, env, start_response): app_iter = self._app_call(env) status = self._get_status_int() headers = dict(self._response_headers) if is_success(status): if 'QUERY_STRING' in env: args = dict(urlparse.parse_qsl(env['QUERY_STRING'], 1)) else: args = {} if 'acl' in args: return get_acl(self.account_name) new_hdrs = {} for key, val in headers.iteritems(): _key = key.lower() if _key.startswith('x-object-meta-'): new_hdrs['x-amz-meta-' + key[14:]] = val elif _key in ('content-length', 'content-type', 'content-range', 'content-encoding', 'etag', 'last-modified'): new_hdrs[key] = val return Response(status=status, headers=new_hdrs, app_iter=app_iter) elif status == HTTP_UNAUTHORIZED: return get_err_response('AccessDenied') elif status == HTTP_NOT_FOUND: return get_err_response('NoSuchKey') else: return get_err_response('InvalidURI')
def get_container_info(self, app): """ get_container_info will return a result dict of get_container_info from the backend Swift. :returns: a dictionary of container info from swift.controllers.base.get_container_info :raises: NoSuchBucket when the container doesn't exist :raises: InternalError when the request failed without 404 """ if self.is_authenticated: # if we have already authenticated, yes we can use the account # name like as AUTH_xxx for performance efficiency sw_req = self.to_swift_req(app, self.container_name, None) info = get_container_info(sw_req.environ, app) if is_success(info['status']): return info elif info['status'] == 404: raise NoSuchBucket(self.container_name) else: raise InternalError( 'unexpected status code %d' % info['status']) else: # otherwise we do naive HEAD request with the authentication resp = self.get_response(app, 'HEAD', self.container_name, '') return headers_to_container_info( resp.sw_headers, resp.status_int) # pylint: disable-msg=E1101
def object_update(self, node, part, op, obj, headers_out): """ Perform the object update to the container :param node: node dictionary from the container ring :param part: partition that holds the container :param op: operation performed (ex: 'PUT' or 'DELETE') :param obj: object name being updated :param headers_out: headers to send with the update """ try: with ConnectionTimeout(self.conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, op, obj, headers_out) with Timeout(self.node_timeout): resp = conn.getresponse() resp.read() success = is_success(resp.status) if not success: self.logger.debug( _('Error code %(status)d is returned from remote ' 'server %(ip)s: %(port)s / %(device)s'), {'status': resp.status, 'ip': node['ip'], 'port': node['port'], 'device': node['device']}) return (success, node['id']) except (Exception, Timeout): self.logger.exception(_('ERROR with remote server ' '%(ip)s:%(port)s/%(device)s'), node) return HTTP_INTERNAL_SERVER_ERROR, node['id']
def _get_direct_account_container(path, stype, node, part, marker=None, limit=None, prefix=None, delimiter=None, conn_timeout=5, response_timeout=15): """Base class for get direct account and container. Do not use directly use the get_direct_account or get_direct_container instead. """ qs = 'format=json' if marker: qs += '&marker=%s' % quote(marker) if limit: qs += '&limit=%d' % limit if prefix: qs += '&prefix=%s' % quote(prefix) if delimiter: qs += '&delimiter=%s' % quote(delimiter) with Timeout(conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, 'GET', path, query_string=qs, headers=gen_headers()) with Timeout(response_timeout): resp = conn.getresponse() if not is_success(resp.status): resp.read() raise DirectClientException(stype, 'GET', node, part, path, resp) resp_headers = HeaderKeyDict() for header, value in resp.getheaders(): resp_headers[header] = value if resp.status == HTTP_NO_CONTENT: resp.read() return resp_headers, [] return resp_headers, json.loads(resp.read())
def direct_post_object(node, part, account, container, name, headers, conn_timeout=5, response_timeout=15): """ Direct update to object metadata on object server. :param node: node dictionary from the ring :param part: partition the container is on :param account: account name :param container: container name :param name: object name :param headers: headers to store as metadata :param conn_timeout: timeout in seconds for establishing the connection :param response_timeout: timeout in seconds for getting the response :raises ClientException: HTTP POST request failed """ path = '/%s/%s/%s' % (account, container, name) with Timeout(conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, 'POST', path, headers=gen_headers(headers, True)) with Timeout(response_timeout): resp = conn.getresponse() resp.read() if not is_success(resp.status): raise DirectClientException('Object', 'POST', node, part, path, resp)
def direct_delete_object(node, part, account, container, obj, conn_timeout=5, response_timeout=15, headers=None): """ Delete object directly from the object server. :param node: node dictionary from the ring :param part: partition the container is on :param account: account name :param container: container name :param obj: object name :param conn_timeout: timeout in seconds for establishing the connection :param response_timeout: timeout in seconds for getting the response :returns: response from server """ if headers is None: headers = {} headers = gen_headers(headers, add_ts='x-timestamp' not in ( k.lower() for k in headers)) path = '/%s/%s/%s' % (account, container, obj) with Timeout(conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, 'DELETE', path, headers=headers) with Timeout(response_timeout): resp = conn.getresponse() resp.read() if not is_success(resp.status): raise DirectClientException('Object', 'DELETE', node, part, path, resp)
def direct_head_container(node, part, account, container, conn_timeout=5, response_timeout=15): """ Request container information directly from the container server. :param node: node dictionary from the ring :param part: partition the container is on :param account: account name :param container: container name :param conn_timeout: timeout in seconds for establishing the connection :param response_timeout: timeout in seconds for getting the response :returns: a dict containing the response's headers in a HeaderKeyDict """ path = '/%s/%s' % (account, container) with Timeout(conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, 'HEAD', path, headers=gen_headers()) with Timeout(response_timeout): resp = conn.getresponse() resp.read() if not is_success(resp.status): raise DirectClientException('Container', 'HEAD', node, part, path, resp) resp_headers = HeaderKeyDict() for header, value in resp.getheaders(): resp_headers[header] = value return resp_headers
def direct_head_object(node, part, account, container, obj, conn_timeout=5, response_timeout=15, headers=None): """ Request object information directly from the object server. :param node: node dictionary from the ring :param part: partition the container is on :param account: account name :param container: container name :param obj: object name :param conn_timeout: timeout in seconds for establishing the connection :param response_timeout: timeout in seconds for getting the response :param headers: dict to be passed into HTTPConnection headers :returns: a dict containing the response's headers in a HeaderKeyDict """ if headers is None: headers = {} headers = gen_headers(headers) path = '/%s/%s/%s' % (account, container, obj) with Timeout(conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, 'HEAD', path, headers=headers) with Timeout(response_timeout): resp = conn.getresponse() resp.read() if not is_success(resp.status): raise DirectClientException('Object', 'HEAD', node, part, path, resp) resp_headers = HeaderKeyDict() for header, value in resp.getheaders(): resp_headers[header] = value return resp_headers
def direct_delete_object(node, part, account, container, obj, conn_timeout=5, response_timeout=15, headers={}): """ Delete object directly from the object server. :param node: node dictionary from the ring :param part: partition the container is on :param account: account name :param container: container name :param obj: object name :param conn_timeout: timeout in seconds for establishing the connection :param response_timeout: timeout in seconds for getting the response :returns: response from server """ path = '/%s/%s/%s' % (account, container, obj) headers['X-Timestamp'] = normalize_timestamp(time()) with Timeout(conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, 'DELETE', path, headers) with Timeout(response_timeout): resp = conn.getresponse() resp.read() if not is_success(resp.status): raise ClientException( 'Object server %s:%s direct DELETE %s gave status %s' % (node['ip'], node['port'], repr('/%s/%s%s' % (node['device'], part, path)), resp.status), http_host=node['ip'], http_port=node['port'], http_device=node['device'], http_status=resp.status, http_reason=resp.reason)
def direct_head_container(node, part, account, container, conn_timeout=50, response_timeout=15): """ Request container information directly from the container server. :param node: node dictionary from the ring :param part: partition the container is on :param account: account name :param container: container name :param conn_timeout: timeout in seconds for establishing the connection :param response_timeout: timeout in seconds for getting the response :returns: a dict containing the response's headers (all header names will be lowercase) """ path = '/%s/%s' % (account, container) with Timeout(conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, 'HEAD', path) with Timeout(response_timeout): resp = conn.getresponse() resp.read() if not is_success(resp.status): raise ClientException( 'Container server %s:%s direct HEAD %s gave status %s' % (node['ip'], node['port'], repr('/%s/%s%s' % (node['device'], part, path)), resp.status), http_host=node['ip'], http_port=node['port'], http_device=node['device'], http_status=resp.status, http_reason=resp.reason) resp_headers = {} for header, value in resp.getheaders(): resp_headers[header.lower()] = value return resp_headers
def initialize(self, hdrs=None, parms=None): if hdrs is None: hdrs = {} if parms is None: parms = {} if not self.name: return False status = self.conn.make_request('HEAD', self.path, hdrs=hdrs, parms=parms) if status == 404: return False elif not is_success(status): raise ResponseError(self.conn.response, 'HEAD', self.conn.make_path(self.path)) for hdr in self.conn.response.getheaders(): if hdr[0].lower() == 'content-type': self.content_type = hdr[1] if hdr[0].lower().startswith('x-object-meta-'): self.metadata[hdr[0][14:]] = hdr[1] if hdr[0].lower() == 'etag': self.etag = hdr[1] if hdr[0].lower() == 'content-length': self.size = int(hdr[1]) if hdr[0].lower() == 'last-modified': self.last_modified = hdr[1] return True
def __call__(self, req): # We only want to step in on object DELETE requests if req.method != "DELETE": return self.app try: vrs, acc, con, obj = req.split_path(4, 4, rest_with_last=True) except ValueError: # not an object request return self.app # Okay, this is definitely an object DELETE request; let's see if it's # one we want to step in for. if self.is_trash(con) and self.block_trash_deletes: return swob.HTTPMethodNotAllowed( content_type="text/plain", body=("Attempted to delete from a trash container, but " "block_trash_deletes is enabled\n"), ) elif not self.should_save_copy(req.environ, con, obj): return self.app trash_container = self.trash_prefix + con copy_status, copy_headers, copy_body = self.copy_object(req, trash_container, obj) if copy_status == 404: self.create_trash_container(req, vrs, acc, trash_container) copy_status, copy_headers, copy_body = self.copy_object(req, trash_container, obj) elif not http.is_success(copy_status): # other error; propagate this to the client return swob.Response(body=friendly_error(copy_body), status=copy_status, headers=copy_headers) return self.app
def create(self, env, vrs, account, container, versions=None): """ Perform a container PUT request :param env: WSGI environment for original request :param vrs: API version, e.g. "v1" :param account: account in which to create the container :param container: container name :param versions: value for X-Versions-Location header (for container versioning) :returns: None :raises: HTTPException on failure (non-2xx response) """ env = env.copy() env['REQUEST_METHOD'] = 'PUT' env["PATH_INFO"] = "/%s/%s/%s" % (vrs, account, container) if versions: env['HTTP_X_VERSIONS_LOCATION'] = versions resp_iter = self._app_call(env) # The body of a PUT response is either empty or very short (e.g. error # message), so we can get away with slurping the whole thing. body = ''.join(resp_iter) close_if_possible(resp_iter) status_int = int(self._response_status.split(' ', 1)[0]) if not http.is_success(status_int): raise swob.HTTPException( status=self._response_status, headers=self._response_headers, body=friendly_error(body))
def handle_get(self, req, start_response): app_resp = self._app_call(req.environ) if is_success(self._get_status_int()): # only decrypt body of 2xx responses out_content_type = get_listing_content_type(req) if out_content_type == 'application/json': handler = self.process_json_resp keys = self.get_decryption_keys(req) elif out_content_type.endswith('/xml'): handler = self.process_xml_resp keys = self.get_decryption_keys(req) else: handler = keys = None if handler and keys: try: app_resp = handler(keys['container'], app_resp) except EncryptionException as err: msg = "Error decrypting container listing" self.logger.error(_('%(msg)s: %(err)s') % {'msg': msg, 'err': err}) raise HTTPInternalServerError( body=msg, content_type='text/plain') start_response(self._response_status, self._response_headers, self._response_exc_info) return app_resp
def _listing_pages_iter(self, account_name, lcontainer, lprefix, env): marker = '' while True: lreq = make_pre_authed_request( env, method='GET', swift_source='VW', path='/v1/%s/%s' % (account_name, lcontainer)) lreq.environ['QUERY_STRING'] = \ 'format=json&prefix=%s&marker=%s' % (quote(lprefix), quote(marker)) lresp = lreq.get_response(self.app) if not is_success(lresp.status_int): if lresp.status_int == HTTP_NOT_FOUND: raise ListingIterNotFound() elif is_client_error(lresp.status_int): raise HTTPPreconditionFailed() else: raise ListingIterError() if not lresp.body: break sublisting = json.loads(lresp.body) if not sublisting: break marker = sublisting[-1]['name'].encode('utf-8') yield sublisting
def POST(self, req): """HTTP POST request handler.""" error_response = check_metadata(req, 'account') if error_response: return error_response account_partition, accounts = \ self.app.account_ring.get_nodes(self.account_name) headers = self.generate_request_headers(req, transfer=True) if self.app.memcache: self.app.memcache.delete( get_account_memcache_key(self.account_name)) resp = self.make_requests( req, self.app.account_ring, account_partition, 'POST', req.path_info, [headers] * len(accounts)) if resp.status_int == HTTP_NOT_FOUND and self.app.account_autocreate: if len(self.account_name) > MAX_ACCOUNT_NAME_LENGTH: resp = HTTPBadRequest(request=req) resp.body = 'Account name length of %d longer than %d' % \ (len(self.account_name), MAX_ACCOUNT_NAME_LENGTH) return resp resp = self.make_requests( Request.blank('/v1/' + self.account_name), self.app.account_ring, account_partition, 'PUT', '/' + self.account_name, [headers] * len(accounts)) if not is_success(resp.status_int): self.app.logger.warning('Could not autocreate account %r' % self.account_name) return resp return resp
def _fetch_sub_slo_segments(self, req, version, acc, con, obj): """ Fetch the submanifest, parse it, and return it. Raise exception on failures. """ sub_req = req.copy_get() sub_req.range = None sub_req.environ['PATH_INFO'] = '/'.join(['', version, acc, con, obj]) sub_req.environ['swift.source'] = 'SLO' sub_req.user_agent = "%s SLO MultipartGET" % sub_req.user_agent sub_resp = sub_req.get_response(self.slo.app) if not is_success(sub_resp.status_int): raise ListingIterError( 'ERROR: while fetching %s, GET of submanifest %s ' 'failed with status %d' % (req.path, sub_req.path, sub_resp.status_int)) try: with closing_if_possible(sub_resp.app_iter): return json.loads(''.join(sub_resp.app_iter)) except ValueError as err: raise ListingIterError( 'ERROR: while fetching %s, JSON-decoding of submanifest %s ' 'failed with %s' % (req.path, sub_req.path, err))
def direct_post_object(node, part, account, container, name, headers, conn_timeout=5, response_timeout=15): """ Direct update to object metadata on object server. :param node: node dictionary from the ring :param part: partition the container is on :param account: account name :param container: container name :param name: object name :param headers: headers to store as metadata :param conn_timeout: timeout in seconds for establishing the connection :param response_timeout: timeout in seconds for getting the response :raises ClientException: HTTP POST request failed """ path = '/%s/%s/%s' % (account, container, name) headers['X-Timestamp'] = normalize_timestamp(time()) with Timeout(conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, 'POST', path, headers=headers) with Timeout(response_timeout): resp = conn.getresponse() resp.read() if not is_success(resp.status): raise ClientException( 'Object server %s:%s direct POST %s gave status %s' % (node['ip'], node['port'], repr('/%s/%s%s' % (node['device'], part, path)), resp.status), http_host=node['ip'], http_port=node['port'], http_device=node['device'], http_status=resp.status, http_reason=resp.reason)
def write(self, data=b'', hdrs=None, parms=None, callback=None, cfg=None, return_resp=False): if hdrs is None: hdrs = {} if parms is None: parms = {} if cfg is None: cfg = {} block_size = 2 ** 20 if all(hasattr(data, attr) for attr in ('flush', 'seek', 'fileno')): try: data.flush() data.seek(0) except IOError: pass self.size = int(os.fstat(data.fileno())[6]) else: data = io.BytesIO(data) self.size = data.seek(0, os.SEEK_END) data.seek(0) headers = self.make_headers(cfg=cfg) headers.update(hdrs) def try_request(): # rewind to be ready for another attempt data.seek(0) self.conn.put_start(self.path, hdrs=headers, parms=parms, cfg=cfg) transferred = 0 for buff in iter(lambda: data.read(block_size), b''): self.conn.put_data(buff) transferred += len(buff) if callable(callback): callback(transferred, self.size) self.conn.put_end() return self.conn.response try: self.response = self.conn.request_with_retry(try_request) except RequestError as e: raise ResponseError(self.conn.response, 'PUT', self.conn.make_path(self.path), details=str(e)) if not is_success(self.response.status): raise ResponseError(self.conn.response, 'PUT', self.conn.make_path(self.path)) try: data.seek(0) except IOError: pass self.md5 = self.compute_md5sum(data) if return_resp: return self.conn.response return True
def direct_put_object(node, part, account, container, name, contents, content_length=None, etag=None, content_type=None, headers=None, conn_timeout=5, response_timeout=15, chunk_size=65535): """ Put object directly from the object server. :param node: node dictionary from the ring :param part: partition the container is on :param account: account name :param container: container name :param name: object name :param contents: an iterable or string to read object data from :param content_length: value to send as content-length header :param etag: etag of contents :param content_type: value to send as content-type header :param headers: additional headers to include in the request :param conn_timeout: timeout in seconds for establishing the connection :param response_timeout: timeout in seconds for getting the response :param chunk_size: if defined, chunk size of data to send. :returns: etag from the server response """ path = '/%s/%s/%s' % (account, container, name) if headers is None: headers = {} if etag: headers['ETag'] = etag.strip('"') if content_length is not None: headers['Content-Length'] = str(content_length) else: for n, v in headers.iteritems(): if n.lower() == 'content-length': content_length = int(v) if content_type is not None: headers['Content-Type'] = content_type else: headers['Content-Type'] = 'application/octet-stream' if not contents: headers['Content-Length'] = '0' if isinstance(contents, basestring): contents = [contents] #Incase the caller want to insert an object with specific age add_ts = 'X-Timestamp' not in headers if content_length is None: headers['Transfer-Encoding'] = 'chunked' with Timeout(conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, 'PUT', path, headers=gen_headers(headers, add_ts)) contents_f = FileLikeIter(contents) if content_length is None: chunk = contents_f.read(chunk_size) while chunk: conn.send('%x\r\n%s\r\n' % (len(chunk), chunk)) chunk = contents_f.read(chunk_size) conn.send('0\r\n\r\n') else: left = content_length while left > 0: size = chunk_size if size > left: size = left chunk = contents_f.read(size) if not chunk: break conn.send(chunk) left -= len(chunk) with Timeout(response_timeout): resp = conn.getresponse() resp.read() if not is_success(resp.status): raise ClientException( 'Object server %s:%s direct PUT %s gave status %s' % (node['ip'], node['port'], repr('/%s/%s%s' % (node['device'], part, path)), resp.status), http_host=node['ip'], http_port=node['port'], http_device=node['device'], http_status=resp.status, http_reason=resp.reason) return resp.getheader('etag').strip('"')
def _requests_to_bytes_iter(self): # Take the requests out of self._coalesce_requests, actually make # the requests, and generate the bytes from the responses. # # Yields 2-tuples (segment-name, byte-chunk). The segment name is # used for logging. for data_or_req, seg_etag, seg_size in self._coalesce_requests(): if isinstance(data_or_req, bytes): # ugly, awful overloading yield ('data segment', data_or_req) continue seg_req = data_or_req seg_resp = seg_req.get_response(self.app) if not is_success(seg_resp.status_int): close_if_possible(seg_resp.app_iter) raise SegmentError( 'While processing manifest %s, ' 'got %d while retrieving %s' % (self.name, seg_resp.status_int, seg_req.path)) elif ( (seg_etag and (seg_resp.etag != seg_etag)) or (seg_size and (seg_resp.content_length != seg_size) and not seg_req.range)): # The content-length check is for security reasons. Seems # possible that an attacker could upload a >1mb object and # then replace it with a much smaller object with same # etag. Then create a big nested SLO that calls that # object many times which would hammer our obj servers. If # this is a range request, don't check content-length # because it won't match. close_if_possible(seg_resp.app_iter) raise SegmentError( 'Object segment no longer valid: ' '%(path)s etag: %(r_etag)s != %(s_etag)s or ' '%(r_size)s != %(s_size)s.' % { 'path': seg_req.path, 'r_etag': seg_resp.etag, 'r_size': seg_resp.content_length, 's_etag': seg_etag, 's_size': seg_size }) else: self.current_resp = seg_resp seg_hash = None if seg_resp.etag and not seg_req.headers.get('Range'): # Only calculate the MD5 if it we can use it to validate seg_hash = hashlib.md5() document_iters = maybe_multipart_byteranges_to_document_iters( seg_resp.app_iter, seg_resp.headers['Content-Type']) for chunk in itertools.chain.from_iterable(document_iters): if seg_hash: seg_hash.update(chunk) yield (seg_req.path, chunk) close_if_possible(seg_resp.app_iter) if seg_hash and seg_hash.hexdigest() != seg_resp.etag: raise SegmentError( "Bad MD5 checksum in %(name)s for %(seg)s: headers had" " %(etag)s, but object MD5 was actually %(actual)s" % { 'seg': seg_req.path, 'etag': seg_resp.etag, 'name': self.name, 'actual': seg_hash.hexdigest() })
def _fetch_and_merge_shard_ranges(self, http, broker): response = http.replicate('get_shard_ranges') if is_success(response.status): broker.merge_shard_ranges(json.loads(response.data))
def PUT(self, req): """HTTP PUT request handler.""" container_info = self.container_info( self.account_name, self.container_name, account_autocreate=self.app.account_autocreate) container_partition = container_info['partition'] containers = container_info['nodes'] req.acl = container_info['write_acl'] req.environ['swift_sync_key'] = container_info['sync_key'] object_versions = container_info['versions'] if 'swift.authorize' in req.environ: aresp = req.environ['swift.authorize'](req) if aresp: return aresp if not containers: return HTTPNotFound(request=req) if 'x-delete-after' in req.headers: try: x_delete_after = int(req.headers['x-delete-after']) except ValueError: return HTTPBadRequest(request=req, content_type='text/plain', body='Non-integer X-Delete-After') req.headers['x-delete-at'] = '%d' % (time.time() + x_delete_after) if 'x-delete-at' in req.headers: try: x_delete_at = int(req.headers['x-delete-at']) if x_delete_at < time.time(): return HTTPBadRequest(body='X-Delete-At in past', request=req, content_type='text/plain') except ValueError: return HTTPBadRequest(request=req, content_type='text/plain', body='Non-integer X-Delete-At') delete_at_container = str(x_delete_at / self.app.expiring_objects_container_divisor * self.app.expiring_objects_container_divisor) delete_at_part, delete_at_nodes = \ self.app.container_ring.get_nodes( self.app.expiring_objects_account, delete_at_container) else: delete_at_part = delete_at_nodes = None partition, nodes = self.app.object_ring.get_nodes( self.account_name, self.container_name, self.object_name) # do a HEAD request for container sync and checking object versions if 'x-timestamp' in req.headers or (object_versions and not req.environ.get('swift_versioned_copy')): hreq = Request.blank(req.path_info, headers={'X-Newest': 'True'}, environ={'REQUEST_METHOD': 'HEAD'}) hresp = self.GETorHEAD_base(hreq, _('Object'), partition, nodes, hreq.path_info, len(nodes)) # Used by container sync feature if 'x-timestamp' in req.headers: try: req.headers['X-Timestamp'] = \ normalize_timestamp(float(req.headers['x-timestamp'])) if hresp.environ and 'swift_x_timestamp' in hresp.environ and \ float(hresp.environ['swift_x_timestamp']) >= \ float(req.headers['x-timestamp']): return HTTPAccepted(request=req) except ValueError: return HTTPBadRequest(request=req, content_type='text/plain', body='X-Timestamp should be a UNIX timestamp float value; ' 'was %r' % req.headers['x-timestamp']) else: req.headers['X-Timestamp'] = normalize_timestamp(time.time()) # Sometimes the 'content-type' header exists, but is set to None. content_type_manually_set = True if not req.headers.get('content-type'): guessed_type, _junk = mimetypes.guess_type(req.path_info) req.headers['Content-Type'] = guessed_type or \ 'application/octet-stream' content_type_manually_set = False error_response = check_object_creation(req, self.object_name) if error_response: return error_response if object_versions and not req.environ.get('swift_versioned_copy'): is_manifest = 'x-object-manifest' in req.headers or \ 'x-object-manifest' in hresp.headers if hresp.status_int != HTTP_NOT_FOUND and not is_manifest: # This is a version manifest and needs to be handled # differently. First copy the existing data to a new object, # then write the data from this request to the version manifest # object. lcontainer = object_versions.split('/')[0] prefix_len = '%03x' % len(self.object_name) lprefix = prefix_len + self.object_name + '/' ts_source = hresp.environ.get('swift_x_timestamp') if ts_source is None: ts_source = time.mktime(time.strptime( hresp.headers['last-modified'], '%a, %d %b %Y %H:%M:%S GMT')) new_ts = normalize_timestamp(ts_source) vers_obj_name = lprefix + new_ts copy_headers = { 'Destination': '%s/%s' % (lcontainer, vers_obj_name)} copy_environ = {'REQUEST_METHOD': 'COPY', 'swift_versioned_copy': True } copy_req = Request.blank(req.path_info, headers=copy_headers, environ=copy_environ) copy_resp = self.COPY(copy_req) if is_client_error(copy_resp.status_int): # missing container or bad permissions return HTTPPreconditionFailed(request=req) elif not is_success(copy_resp.status_int): # could not copy the data, bail return HTTPServiceUnavailable(request=req) reader = req.environ['wsgi.input'].read data_source = iter(lambda: reader(self.app.client_chunk_size), '') source_header = req.headers.get('X-Copy-From') source_resp = None if source_header: source_header = unquote(source_header) acct = req.path_info.split('/', 2)[1] if isinstance(acct, unicode): acct = acct.encode('utf-8') if not source_header.startswith('/'): source_header = '/' + source_header source_header = '/' + acct + source_header try: src_container_name, src_obj_name = \ source_header.split('/', 3)[2:] except ValueError: return HTTPPreconditionFailed(request=req, body='X-Copy-From header must be of the form' '<container name>/<object name>') source_req = req.copy_get() source_req.path_info = source_header source_req.headers['X-Newest'] = 'true' orig_obj_name = self.object_name orig_container_name = self.container_name self.object_name = src_obj_name self.container_name = src_container_name source_resp = self.GET(source_req) if source_resp.status_int >= HTTP_MULTIPLE_CHOICES: return source_resp self.object_name = orig_obj_name self.container_name = orig_container_name new_req = Request.blank(req.path_info, environ=req.environ, headers=req.headers) data_source = source_resp.app_iter new_req.content_length = source_resp.content_length if new_req.content_length is None: # This indicates a transfer-encoding: chunked source object, # which currently only happens because there are more than # CONTAINER_LISTING_LIMIT segments in a segmented object. In # this case, we're going to refuse to do the server-side copy. return HTTPRequestEntityTooLarge(request=req) new_req.etag = source_resp.etag # we no longer need the X-Copy-From header del new_req.headers['X-Copy-From'] if not content_type_manually_set: new_req.headers['Content-Type'] = \ source_resp.headers['Content-Type'] if new_req.headers.get('x-fresh-metadata', 'false').lower() \ not in TRUE_VALUES: for k, v in source_resp.headers.items(): if k.lower().startswith('x-object-meta-'): new_req.headers[k] = v for k, v in req.headers.items(): if k.lower().startswith('x-object-meta-'): new_req.headers[k] = v req = new_req node_iter = self.iter_nodes(partition, nodes, self.app.object_ring) pile = GreenPile(len(nodes)) for container in containers: nheaders = dict(req.headers.iteritems()) nheaders['Connection'] = 'close' nheaders['X-Container-Host'] = '%(ip)s:%(port)s' % container nheaders['X-Container-Partition'] = container_partition nheaders['X-Container-Device'] = container['device'] nheaders['Expect'] = '100-continue' if delete_at_nodes: node = delete_at_nodes.pop(0) nheaders['X-Delete-At-Host'] = '%(ip)s:%(port)s' % node nheaders['X-Delete-At-Partition'] = delete_at_part nheaders['X-Delete-At-Device'] = node['device'] pile.spawn(self._connect_put_node, node_iter, partition, req.path_info, nheaders, self.app.logger.thread_locals) conns = [conn for conn in pile if conn] if len(conns) <= len(nodes) / 2: self.app.logger.error( _('Object PUT returning 503, %(conns)s/%(nodes)s ' 'required connections'), {'conns': len(conns), 'nodes': len(nodes) // 2 + 1}) return HTTPServiceUnavailable(request=req) chunked = req.headers.get('transfer-encoding') bytes_transferred = 0 try: with ContextPool(len(nodes)) as pool: for conn in conns: conn.failed = False conn.queue = Queue(self.app.put_queue_depth) pool.spawn(self._send_file, conn, req.path) while True: with ChunkReadTimeout(self.app.client_timeout): try: chunk = next(data_source) except StopIteration: if chunked: [conn.queue.put('0\r\n\r\n') for conn in conns] break bytes_transferred += len(chunk) if bytes_transferred > MAX_FILE_SIZE: return HTTPRequestEntityTooLarge(request=req) for conn in list(conns): if not conn.failed: conn.queue.put('%x\r\n%s\r\n' % (len(chunk), chunk) if chunked else chunk) else: conns.remove(conn) if len(conns) <= len(nodes) / 2: self.app.logger.error(_('Object PUT exceptions during' ' send, %(conns)s/%(nodes)s required connections'), {'conns': len(conns), 'nodes': len(nodes) / 2 + 1}) return HTTPServiceUnavailable(request=req) for conn in conns: if conn.queue.unfinished_tasks: conn.queue.join() conns = [conn for conn in conns if not conn.failed] except ChunkReadTimeout, err: self.app.logger.warn( _('ERROR Client read timeout (%ss)'), err.seconds) self.app.logger.increment('client_timeouts') return HTTPRequestTimeout(request=req)
reasons = [] bodies = [] etags = set() for conn in conns: try: with Timeout(self.app.node_timeout): response = conn.getresponse() statuses.append(response.status) reasons.append(response.reason) bodies.append(response.read()) if response.status >= HTTP_INTERNAL_SERVER_ERROR: self.error_occurred(conn.node, _('ERROR %(status)d %(body)s From Object Server ' \ 're: %(path)s') % {'status': response.status, 'body': bodies[-1][:1024], 'path': req.path}) elif is_success(response.status): etags.add(response.getheader('etag').strip('"')) except (Exception, Timeout): self.exception_occurred(conn.node, _('Object'), _('Trying to get final status of PUT to %s') % req.path) if len(etags) > 1: self.app.logger.error( _('Object servers returned %s mismatched etags'), len(etags)) return HTTPServerError(request=req) etag = len(etags) and etags.pop() or None while len(statuses) < len(nodes): statuses.append(HTTP_SERVICE_UNAVAILABLE) reasons.append('') bodies.append('') resp = self.best_response(req, statuses, reasons, bodies, _('Object PUT'), etag=etag)
def updates(self): """ Handles the UPDATES step of an SSYNC request. Receives a set of PUT and DELETE subrequests that will be routed to the object server itself for processing. These contain the information requested by the MISSING_CHECK step. The PUT and DELETE subrequests are formatted pretty much exactly like regular HTTP requests, excepting the HTTP version on the first request line. The process is generally: 1. Sender sends `:UPDATES: START` and begins sending the PUT and DELETE subrequests. 2. Receiver gets `:UPDATES: START` and begins routing the subrequests to the object server. 3. Sender sends `:UPDATES: END`. 4. Receiver gets `:UPDATES: END` and sends `:UPDATES: START` and `:UPDATES: END` (assuming no errors). 5. Sender gets `:UPDATES: START` and `:UPDATES: END`. If too many subrequests fail, as configured by replication_failure_threshold and replication_failure_ratio, the receiver will hang up the request early so as to not waste any more time. At step 4, the receiver will send back an error if there were any failures (that didn't cause a hangup due to the above thresholds) so the sender knows the whole was not entirely a success. This is so the sender knows if it can remove an out of place partition, for example. """ with exceptions.MessageTimeout(self.app.client_timeout, 'updates start'): line = self.fp.readline(self.app.network_chunk_size) if line.strip() != ':UPDATES: START': raise Exception('Looking for :UPDATES: START got %r' % line[:1024]) successes = 0 failures = 0 while True: with exceptions.MessageTimeout(self.app.client_timeout, 'updates line'): line = self.fp.readline(self.app.network_chunk_size) if not line or line.strip() == ':UPDATES: END': break # Read first line METHOD PATH of subrequest. method, path = line.strip().split(' ', 1) subreq = swob.Request.blank('/%s/%s%s' % (self.device, self.partition, path), environ={'REQUEST_METHOD': method}) # Read header lines. content_length = None replication_headers = [] while True: with exceptions.MessageTimeout(self.app.client_timeout): line = self.fp.readline(self.app.network_chunk_size) if not line: raise Exception('Got no headers for %s %s' % (method, path)) line = line.strip() if not line: break header, value = line.split(':', 1) header = header.strip().lower() value = value.strip() subreq.headers[header] = value if header != 'etag': # make sure ssync doesn't cause 'Etag' to be added to # obj metadata in addition to 'ETag' which object server # sets (note capitalization) replication_headers.append(header) if header == 'content-length': content_length = int(value) # Establish subrequest body, if needed. if method in ('DELETE', 'POST'): if content_length not in (None, 0): raise Exception('%s subrequest with content-length %s' % (method, path)) elif method == 'PUT': if content_length is None: raise Exception('No content-length sent for %s %s' % (method, path)) def subreq_iter(): left = content_length while left > 0: with exceptions.MessageTimeout(self.app.client_timeout, 'updates content'): chunk = self.fp.read( min(left, self.app.network_chunk_size)) if not chunk: raise Exception('Early termination for %s %s' % (method, path)) left -= len(chunk) yield chunk subreq.environ['wsgi.input'] = utils.FileLikeIter( subreq_iter()) else: raise Exception('Invalid subrequest method %s' % method) subreq.headers['X-Backend-Storage-Policy-Index'] = int(self.policy) subreq.headers['X-Backend-Replication'] = 'True' if self.node_index is not None: # primary node should not 409 if it has a non-primary fragment subreq.headers['X-Backend-Ssync-Frag-Index'] = self.node_index if replication_headers: subreq.headers['X-Backend-Replication-Headers'] = \ ' '.join(replication_headers) # Route subrequest and translate response. resp = subreq.get_response(self.app) if http.is_success(resp.status_int) or \ resp.status_int == http.HTTP_NOT_FOUND: successes += 1 else: failures += 1 if failures >= self.app.replication_failure_threshold and ( not successes or float(failures) / successes > self.app.replication_failure_ratio): raise Exception('Too many %d failures to %d successes' % (failures, successes)) # The subreq may have failed, but we want to read the rest of the # body from the remote side so we can continue on with the next # subreq. for junk in subreq.environ['wsgi.input']: pass if failures: raise swob.HTTPInternalServerError( 'ERROR: With :UPDATES: %d failures to %d successes' % (failures, successes)) yield ':UPDATES: START\r\n' yield ':UPDATES: END\r\n'
def _GET_using_cache(self, req): # It may be possible to fulfil the request from cache: we only reach # here if request record_type is 'shard' or 'auto', so if the container # state is 'sharded' then look for cached shard ranges. However, if # X-Newest is true then we always fetch from the backend servers. get_newest = config_true_value(req.headers.get('x-newest', False)) if get_newest: self.app.logger.debug( 'Skipping shard cache lookup (x-newest) for %s', req.path_qs) info = None else: info = _get_info_from_caches(self.app, req.environ, self.account_name, self.container_name) if (info and is_success(info['status']) and info.get('sharding_state') == 'sharded'): # container is sharded so we may have the shard ranges cached headers = headers_from_container_info(info) if headers: # only use cached values if all required headers available infocache = req.environ.setdefault('swift.infocache', {}) memcache = cache_from_env(req.environ, True) cache_key = get_cache_key(self.account_name, self.container_name, shard='listing') cached_ranges = infocache.get(cache_key) if cached_ranges is None and memcache: cached_ranges = memcache.get(cache_key) if cached_ranges is not None: infocache[cache_key] = tuple(cached_ranges) # shard ranges can be returned from cache self.app.logger.debug('Found %d shards in cache for %s', len(cached_ranges), req.path_qs) headers.update({ 'x-backend-record-type': 'shard', 'x-backend-cached-results': 'true' }) shard_range_body = self._filter_resp_shard_ranges( req, cached_ranges) # mimic GetOrHeadHandler.get_working_response... # note: server sets charset with content_type but proxy # GETorHEAD_base does not, so don't set it here either resp = Response(request=req, body=shard_range_body) update_headers(resp, headers) resp.last_modified = math.ceil( float(headers['x-put-timestamp'])) resp.environ['swift_x_timestamp'] = headers.get( 'x-timestamp') resp.accept_ranges = 'bytes' resp.content_type = 'application/json' return resp # The request was not fulfilled from cache so send to the backend # server, but instruct the backend server to ignore name constraints in # request params if returning shard ranges so that the response can # potentially be cached. Only do this if the container state is # 'sharded'. We don't attempt to cache shard ranges for a 'sharding' # container as they may include the container itself as a 'gap filler' # for shard ranges that have not yet cleaved; listings from 'gap # filler' shard ranges are likely to become stale as the container # continues to cleave objects to its shards and caching them is # therefore more likely to result in stale or incomplete listings on # subsequent container GETs. req.headers['x-backend-override-shard-name-filter'] = 'sharded' resp = self._GETorHEAD_from_backend(req) sharding_state = resp.headers.get('x-backend-sharding-state', '').lower() resp_record_type = resp.headers.get('x-backend-record-type', '').lower() complete_listing = config_true_value( resp.headers.pop('x-backend-override-shard-name-filter', False)) # given that we sent 'x-backend-override-shard-name-filter=sharded' we # should only receive back 'x-backend-override-shard-name-filter=true' # if the sharding state is 'sharded', but check them both anyway... if (resp_record_type == 'shard' and sharding_state == 'sharded' and complete_listing): # backend returned unfiltered listing state shard ranges so parse # them and replace response body with filtered listing cache_key = get_cache_key(self.account_name, self.container_name, shard='listing') data = self._parse_listing_response(req, resp) backend_shard_ranges = self._parse_shard_ranges(req, data, resp) if backend_shard_ranges is not None: cached_ranges = [dict(sr) for sr in backend_shard_ranges] if resp.headers.get('x-backend-sharding-state') == 'sharded': # cache in infocache even if no shard ranges returned; this # is unexpected but use that result for this request infocache = req.environ.setdefault('swift.infocache', {}) infocache[cache_key] = tuple(cached_ranges) memcache = cache_from_env(req.environ, True) if memcache and cached_ranges: # cache in memcache only if shard ranges as expected self.app.logger.debug('Caching %d shards for %s', len(cached_ranges), req.path_qs) memcache.set( cache_key, cached_ranges, time=self.app.recheck_listing_shard_ranges) # filter returned shard ranges according to request constraints resp.body = self._filter_resp_shard_ranges(req, cached_ranges) return resp
def _make_req(node, part, method, path, headers, stype, conn_timeout=5, response_timeout=15, send_timeout=15, contents=None, content_length=None, chunk_size=65535): """ Make request to backend storage node. (i.e. 'Account', 'Container', 'Object') :param node: a node dict from a ring :param part: an integer, the partition number :param method: a string, the HTTP method (e.g. 'PUT', 'DELETE', etc) :param path: a string, the request path :param headers: a dict, header name => value :param stype: a string, describing the type of service :param conn_timeout: timeout while waiting for connection; default is 5 seconds :param response_timeout: timeout while waiting for response; default is 15 seconds :param send_timeout: timeout for sending request body; default is 15 seconds :param contents: an iterable or string to read object data from :param content_length: value to send as content-length header :param chunk_size: if defined, chunk size of data to send :returns: an HTTPResponse object :raises DirectClientException: if the response status is not 2xx :raises eventlet.Timeout: if either conn_timeout or response_timeout is exceeded """ if contents is not None: if content_length is not None: headers['Content-Length'] = str(content_length) else: for n, v in headers.items(): if n.lower() == 'content-length': content_length = int(v) if not contents: headers['Content-Length'] = '0' if isinstance(contents, six.string_types): contents = [contents] if content_length is None: headers['Transfer-Encoding'] = 'chunked' with Timeout(conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, method, path, headers=headers) if contents is not None: contents_f = FileLikeIter(contents) with Timeout(send_timeout): if content_length is None: chunk = contents_f.read(chunk_size) while chunk: conn.send(b'%x\r\n%s\r\n' % (len(chunk), chunk)) chunk = contents_f.read(chunk_size) conn.send(b'0\r\n\r\n') else: left = content_length while left > 0: size = chunk_size if size > left: size = left chunk = contents_f.read(size) if not chunk: break conn.send(chunk) left -= len(chunk) with Timeout(response_timeout): resp = conn.getresponse() resp.read() if not is_success(resp.status): raise DirectClientException(stype, method, node, part, path, resp) return resp
def direct_get_object(node, part, account, container, obj, conn_timeout=5, response_timeout=15, resp_chunk_size=None, headers=None): """ Get object directly from the object server. :param node: node dictionary from the ring :param part: partition the container is on :param account: account name :param container: container name :param obj: object name :param conn_timeout: timeout in seconds for establishing the connection :param response_timeout: timeout in seconds for getting the response :param resp_chunk_size: if defined, chunk size of data to read. :param headers: dict to be passed into HTTPConnection headers :returns: a tuple of (response headers, the object's contents) The response headers will be a dict and all header names will be lowercase. """ if headers is None: headers = {} path = '/%s/%s/%s' % (account, container, obj) with Timeout(conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, 'GET', path, headers=gen_headers(headers)) with Timeout(response_timeout): resp = conn.getresponse() if not is_success(resp.status): resp.read() raise ClientException( 'Object server %s:%s direct GET %s gave status %s' % (node['ip'], node['port'], repr('/%s/%s%s' % (node['device'], part, path)), resp.status), http_host=node['ip'], http_port=node['port'], http_device=node['device'], http_status=resp.status, http_reason=resp.reason) if resp_chunk_size: def _object_body(): buf = resp.read(resp_chunk_size) while buf: yield buf buf = resp.read(resp_chunk_size) object_body = _object_body() else: object_body = resp.read() resp_headers = {} for header, value in resp.getheaders(): resp_headers[header.lower()] = value return resp_headers, object_body
def account_update(self, req, account, container, broker): """ Update the account server(s) with latest container info. :param req: swob.Request object :param account: account name :param container: container name :param broker: container DB broker object :returns: if all the account requests return a 404 error code, HTTPNotFound response object, if the account cannot be updated due to a malformed header, an HTTPBadRequest response object, otherwise None. """ account_hosts = [h.strip() for h in req.headers.get('X-Account-Host', '').split(',')] account_devices = [d.strip() for d in req.headers.get('X-Account-Device', '').split(',')] account_partition = req.headers.get('X-Account-Partition', '') if len(account_hosts) != len(account_devices): # This shouldn't happen unless there's a bug in the proxy, # but if there is, we want to know about it. self.logger.error(_('ERROR Account update failed: different ' 'numbers of hosts and devices in request: ' '"%s" vs "%s"' % (req.headers.get('X-Account-Host', ''), req.headers.get('X-Account-Device', '')))) return HTTPBadRequest(req=req) if account_partition: updates = zip(account_hosts, account_devices) else: updates = [] account_404s = 0 for account_host, account_device in updates: account_ip, account_port = account_host.rsplit(':', 1) new_path = '/' + '/'.join([account, container]) info = broker.get_info() account_headers = HeaderKeyDict({ 'x-put-timestamp': info['put_timestamp'], 'x-delete-timestamp': info['delete_timestamp'], 'x-object-count': info['object_count'], 'x-bytes-used': info['bytes_used'], 'x-trans-id': req.headers.get('x-trans-id', '-'), 'user-agent': 'container-server %s' % os.getpid(), 'referer': req.as_referer()}) if req.headers.get('x-account-override-deleted', 'no').lower() == \ 'yes': account_headers['x-account-override-deleted'] = 'yes' try: with ConnectionTimeout(self.conn_timeout): conn = http_connect( account_ip, account_port, account_device, account_partition, 'PUT', new_path, account_headers) with Timeout(self.node_timeout): account_response = conn.getresponse() account_response.read() if account_response.status == HTTP_NOT_FOUND: account_404s += 1 elif not is_success(account_response.status): self.logger.error(_( 'ERROR Account update failed ' 'with %(ip)s:%(port)s/%(device)s (will retry ' 'later): Response %(status)s %(reason)s'), {'ip': account_ip, 'port': account_port, 'device': account_device, 'status': account_response.status, 'reason': account_response.reason}) except (Exception, Timeout): self.logger.exception(_( 'ERROR account update failed with ' '%(ip)s:%(port)s/%(device)s (will retry later)'), {'ip': account_ip, 'port': account_port, 'device': account_device}) if updates and account_404s == len(updates): return HTTPNotFound(req=req) else: return None
def async_update(self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice): """ Sends or saves an async update. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param host: host that the container is on :param partition: partition that the container is on :param contdevice: device name that the container is on :param headers_out: dictionary of headers to send in the container request :param objdevice: device name that the object is in """ full_path = '/%s/%s/%s' % (account, container, obj) if all([host, partition, contdevice]): try: with ConnectionTimeout(self.conn_timeout): ip, port = host.rsplit(':', 1) conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out) with Timeout(self.node_timeout): response = conn.getresponse() response.read() if is_success(response.status): return else: self.logger.error( _('ERROR Container update failed ' '(saving for async update later): %(status)d ' 'response from %(ip)s:%(port)s/%(dev)s'), { 'status': response.status, 'ip': ip, 'port': port, 'dev': contdevice }) except (Exception, Timeout): self.logger.exception( _('ERROR container update failed with ' '%(ip)s:%(port)s/%(dev)s (saving for async update later)' ), { 'ip': ip, 'port': port, 'dev': contdevice }) async_dir = os.path.join(self.devices, objdevice, ASYNCDIR) ohash = hash_path(account, container, obj) self.logger.increment('async_pendings') write_pickle( { 'op': op, 'account': account, 'container': container, 'obj': obj, 'headers': headers_out }, os.path.join( async_dir, ohash[-3:], ohash + '-' + normalize_timestamp(headers_out['x-timestamp'])), os.path.join(self.devices, objdevice, 'tmp'))
def _handle_object_versions(self, req): """ This method handles versionining of objects in containers that have the feature enabled. When a new PUT request is sent, the proxy checks for previous versions of that same object name. If found, it is copied to a different container and the new version is stored in its place. This method was added as part of the PUT method refactoring and the functionality is expected to be moved to middleware """ container_info = self.container_info(self.account_name, self.container_name, req) policy_index = req.headers.get('X-Backend-Storage-Policy-Index', container_info['storage_policy']) obj_ring = self.app.get_object_ring(policy_index) partition, nodes = obj_ring.get_nodes(self.account_name, self.container_name, self.object_name) object_versions = container_info['versions'] # do a HEAD request for checking object versions if object_versions and not req.environ.get('swift_versioned_copy'): # make sure proxy-server uses the right policy index _headers = { 'X-Backend-Storage-Policy-Index': policy_index, 'X-Newest': 'True' } hreq = Request.blank(req.path_info, headers=_headers, environ={'REQUEST_METHOD': 'HEAD'}) hresp = self.GETorHEAD_base(hreq, _('Object'), obj_ring, partition, hreq.swift_entity_path) is_manifest = 'X-Object-Manifest' in req.headers or \ 'X-Object-Manifest' in hresp.headers if hresp.status_int != HTTP_NOT_FOUND and not is_manifest: # This is a version manifest and needs to be handled # differently. First copy the existing data to a new object, # then write the data from this request to the version manifest # object. lcontainer = object_versions.split('/')[0] prefix_len = '%03x' % len(self.object_name) lprefix = prefix_len + self.object_name + '/' ts_source = hresp.environ.get('swift_x_timestamp') if ts_source is None: ts_source = time.mktime( time.strptime(hresp.headers['last-modified'], '%a, %d %b %Y %H:%M:%S GMT')) new_ts = Timestamp(ts_source).internal vers_obj_name = lprefix + new_ts copy_headers = { 'Destination': '%s/%s' % (lcontainer, vers_obj_name) } copy_environ = { 'REQUEST_METHOD': 'COPY', 'swift_versioned_copy': True } copy_req = Request.blank(req.path_info, headers=copy_headers, environ=copy_environ) copy_resp = self.COPY(copy_req) if is_client_error(copy_resp.status_int): # missing container or bad permissions raise HTTPPreconditionFailed(request=req) elif not is_success(copy_resp.status_int): # could not copy the data, bail raise HTTPServiceUnavailable(request=req)
def _internal_iter(self): start_time = time.time() bytes_left = self.response_body_length try: for seg_path, seg_etag, seg_size, first_byte, last_byte \ in self.listing_iter: if time.time() - start_time > self.max_get_time: raise SegmentError('ERROR: While processing manifest %s, ' 'max LO GET time of %ds exceeded' % (self.name, self.max_get_time)) # Make sure that the segment is a plain old object, not some # flavor of large object, so that we can check its MD5. path = seg_path + '?multipart-manifest=get' seg_req = make_subrequest( self.req.environ, path=path, method='GET', headers={ 'x-auth-token': self.req.headers.get('x-auth-token') }, agent=('%(orig)s ' + self.ua_suffix), swift_source=self.swift_source) if first_byte is not None or last_byte is not None: seg_req.headers['Range'] = "bytes=%s-%s" % ( # The 0 is to avoid having a range like "bytes=-10", # which actually means the *last* 10 bytes. '0' if first_byte is None else first_byte, '' if last_byte is None else last_byte) seg_resp = seg_req.get_response(self.app) if not is_success(seg_resp.status_int): close_if_possible(seg_resp.app_iter) raise SegmentError( 'ERROR: While processing manifest %s, ' 'got %d while retrieving %s' % (self.name, seg_resp.status_int, seg_path)) elif ((seg_etag and (seg_resp.etag != seg_etag)) or (seg_size and (seg_resp.content_length != seg_size) and not seg_req.range)): # The content-length check is for security reasons. Seems # possible that an attacker could upload a >1mb object and # then replace it with a much smaller object with same # etag. Then create a big nested SLO that calls that # object many times which would hammer our obj servers. If # this is a range request, don't check content-length # because it won't match. close_if_possible(seg_resp.app_iter) raise SegmentError( 'Object segment no longer valid: ' '%(path)s etag: %(r_etag)s != %(s_etag)s or ' '%(r_size)s != %(s_size)s.' % { 'path': seg_req.path, 'r_etag': seg_resp.etag, 'r_size': seg_resp.content_length, 's_etag': seg_etag, 's_size': seg_size }) else: self.current_resp = seg_resp seg_hash = hashlib.md5() for chunk in seg_resp.app_iter: seg_hash.update(chunk) if bytes_left is None: yield chunk elif bytes_left >= len(chunk): yield chunk bytes_left -= len(chunk) else: yield chunk[:bytes_left] bytes_left -= len(chunk) close_if_possible(seg_resp.app_iter) raise SegmentError( 'Too many bytes for %(name)s; truncating in ' '%(seg)s with %(left)d bytes left' % { 'name': self.name, 'seg': seg_req.path, 'left': bytes_left }) close_if_possible(seg_resp.app_iter) if seg_resp.etag and seg_hash.hexdigest() != seg_resp.etag \ and first_byte is None and last_byte is None: raise SegmentError( "Bad MD5 checksum in %(name)s for %(seg)s: headers had" " %(etag)s, but object MD5 was actually %(actual)s" % { 'seg': seg_req.path, 'etag': seg_resp.etag, 'name': self.name, 'actual': seg_hash.hexdigest() }) if bytes_left: raise SegmentError( 'Not enough bytes for %s; closing connection' % self.name) except (ListingIterError, SegmentError): self.logger.exception( _('ERROR: An error occurred ' 'while retrieving segments')) raise finally: if self.current_resp: close_if_possible(self.current_resp.app_iter)
def _adjust_put_response(self, req, additional_resp_headers): if is_success(self._get_status_int()): for header, value in additional_resp_headers.items(): self._response_headers.append((header, value))
def DELETE(self, req): """HTTP DELETE request handler.""" container_info = self.container_info(self.account_name, self.container_name, req) # pass the policy index to storage nodes via req header policy_index = req.headers.get('X-Backend-Storage-Policy-Index', container_info['storage_policy']) obj_ring = self.app.get_object_ring(policy_index) # pass the policy index to storage nodes via req header req.headers['X-Backend-Storage-Policy-Index'] = policy_index container_partition = container_info['partition'] containers = container_info['nodes'] req.acl = container_info['write_acl'] req.environ['swift_sync_key'] = container_info['sync_key'] object_versions = container_info['versions'] if object_versions: # this is a version manifest and needs to be handled differently object_versions = unquote(object_versions) lcontainer = object_versions.split('/')[0] prefix_len = '%03x' % len(self.object_name) lprefix = prefix_len + self.object_name + '/' item_list = [] try: for _item in self._listing_iter(lcontainer, lprefix, req.environ): item_list.append(_item) except ListingIterNotFound: # no worries, last_item is None pass except ListingIterNotAuthorized as err: return err.aresp except ListingIterError: return HTTPServerError(request=req) while len(item_list) > 0: previous_version = item_list.pop() # there are older versions so copy the previous version to the # current object and delete the previous version orig_container = self.container_name orig_obj = self.object_name self.container_name = lcontainer self.object_name = previous_version['name'].encode('utf-8') copy_path = '/v1/' + self.account_name + '/' + \ self.container_name + '/' + self.object_name copy_headers = { 'X-Newest': 'True', 'Destination': orig_container + '/' + orig_obj } copy_environ = { 'REQUEST_METHOD': 'COPY', 'swift_versioned_copy': True } creq = Request.blank(copy_path, headers=copy_headers, environ=copy_environ) copy_resp = self.COPY(creq) if copy_resp.status_int == HTTP_NOT_FOUND: # the version isn't there so we'll try with previous self.container_name = orig_container self.object_name = orig_obj continue if is_client_error(copy_resp.status_int): # some user error, maybe permissions return HTTPPreconditionFailed(request=req) elif not is_success(copy_resp.status_int): # could not copy the data, bail return HTTPServiceUnavailable(request=req) # reset these because the COPY changed them self.container_name = lcontainer self.object_name = previous_version['name'].encode('utf-8') new_del_req = Request.blank(copy_path, environ=req.environ) container_info = self.container_info(self.account_name, self.container_name, req) policy_idx = container_info['storage_policy'] obj_ring = self.app.get_object_ring(policy_idx) # pass the policy index to storage nodes via req header new_del_req.headers['X-Backend-Storage-Policy-Index'] = \ policy_idx container_partition = container_info['partition'] containers = container_info['nodes'] new_del_req.acl = container_info['write_acl'] new_del_req.path_info = copy_path req = new_del_req # remove 'X-If-Delete-At', since it is not for the older copy if 'X-If-Delete-At' in req.headers: del req.headers['X-If-Delete-At'] break if 'swift.authorize' in req.environ: aresp = req.environ['swift.authorize'](req) if aresp: return aresp if not containers: return HTTPNotFound(request=req) partition, nodes = obj_ring.get_nodes(self.account_name, self.container_name, self.object_name) # Used by container sync feature if 'x-timestamp' in req.headers: try: req_timestamp = Timestamp(req.headers['X-Timestamp']) except ValueError: return HTTPBadRequest( request=req, content_type='text/plain', body='X-Timestamp should be a UNIX timestamp float value; ' 'was %r' % req.headers['x-timestamp']) req.headers['X-Timestamp'] = req_timestamp.internal else: req.headers['X-Timestamp'] = Timestamp(time.time()).internal headers = self._backend_requests(req, len(nodes), container_partition, containers) # When deleting objects treat a 404 status as 204. status_overrides = {404: 204} resp = self.make_requests(req, obj_ring, partition, 'DELETE', req.swift_entity_path, headers, overrides=status_overrides) return resp
def headers(self): if is_success(self.status_int): self._headers.update(self.base_headers) return self._headers
def process_container(self, dbfile): """ Process a container, and update the information in the account. :param dbfile: container DB to process """ start_time = time.time() broker = ContainerBroker(dbfile, logger=self.logger) info = broker.get_info() # Don't send updates if the container was auto-created since it # definitely doesn't have up to date statistics. if Timestamp(info['put_timestamp']) <= 0: return if self.account_suppressions.get(info['account'], 0) > time.time(): return if info['put_timestamp'] > info['reported_put_timestamp'] or \ info['delete_timestamp'] > info['reported_delete_timestamp'] \ or info['object_count'] != info['reported_object_count'] or \ info['bytes_used'] != info['reported_bytes_used']: container = '/%s/%s' % (info['account'], info['container']) part, nodes = self.get_account_ring().get_nodes(info['account']) events = [ spawn(self.container_report, node, part, container, info['put_timestamp'], info['delete_timestamp'], info['object_count'], info['bytes_used'], info['storage_policy_index']) for node in nodes ] successes = 0 for event in events: if is_success(event.wait()): successes += 1 if successes >= quorum_size(len(events)): self.logger.increment('successes') self.successes += 1 self.logger.debug( _('Update report sent for %(container)s %(dbfile)s'), { 'container': container, 'dbfile': dbfile }) broker.reported(info['put_timestamp'], info['delete_timestamp'], info['object_count'], info['bytes_used']) else: self.logger.increment('failures') self.failures += 1 self.logger.debug( _('Update report failed for %(container)s %(dbfile)s'), { 'container': container, 'dbfile': dbfile }) self.account_suppressions[info['account']] = until = \ time.time() + self.account_suppression_time if self.new_account_suppressions: print(info['account'], until, file=self.new_account_suppressions) # Only track timing data for attempted updates: self.logger.timing_since('timing', start_time) else: self.logger.increment('no_changes') self.no_changes += 1
def __call__(self, env, start_response): def my_start_response(status, headers, exc_info=None): self.status = status self.headers = list(headers) self.exc_info = exc_info self.env = env self.start_response = start_response # If request was already processed by autosync # (here or at the original cluster where it first hit) if 'HTTP_X_ORIG_CLUSTER' in env: print >> sys.stderr, 'HTTP_X_ORIG_CLUSTER found!' if self.override_auth: env['swift_owner'] = True return self.app(env, start_response) # If it is a local call or a tempurl object call if 'swift.authorize_override' in env: return self.app(env, start_response) # Get Placement parameters if 'swift.my_cluster' in env: self.my_cluster = env['swift.my_cluster'] else: self.my_cluster = self.default_my_cluster if 'swift.placement' in env: placement = env['swift.placement'] else: placement = self.default_placement or self.my_cluster if not self.my_cluster or not placement: return self.app(env, start_response) self.req = Request(env) # For now we support only placement here and in one other place if self.my_cluster not in placement: return HTTPInternalServerError(request=self.req) # return self.redirect() peers = [p for p in placement if p != self.my_cluster] if len(peers) != 1: return HTTPInternalServerError(request=self.req) # This request needs to be handled localy try: (version, account, container, obj) = \ self.req.split_path(2, 4, True) except ValueError: return self.app(env, start_response) if obj or self.req.method in ('OPTIONS', 'GET', 'HEAD'): # business as usual - I will serve the request locally and be done # TBD, in case of 404 returned from GET object, try a remote copy? return self.app(env, start_response) # Lets see, its either PUT, POST or DELETE account/container # Otherwise said - 'we need to change the account/container' # both here and with peers... # As part of any container creation/modification (PUT/POST): # Create a new key to protect the container communication from now # and until the next time the container is updated. # Note that race may occur with container-sync daemons resulting in # container-sync failing due to misaligned keys. # Changing the keys per update help support changes in the placement # and can serve as a simple mechanism for replacing conatienr sync keys # If this turns out to be an issue, we may extract and reuse the same # key for the duration of the container existance. if container and self.req.method in ['POST', 'PUT']: key = ''.join(choice(self.keychars) for x in range(64)) # Add the key to the env when calling the local cluster env['HTTP_X_CONTAINER_SYNC_KEY'] = key # Set the container replica of the local cluster to sync to the # last cluster in the list of peers sync_to_peer = peers[-1] # Sync to the prev peer sync_to = sync_to_peer + self.env['PATH_INFO'] env['HTTP_X_CONTAINER_SYNC_TO'] = sync_to else: key = None # Signals that there are no Container-Sync headers # Try localy, if we fail and not DELETE respond with a faliure. resp_data = self.app(self.env, my_start_response) data = ''.join(iter(resp_data)) if hasattr(resp_data, 'close'): resp_data.close() resp_status_int = int(self.status[:3]) # Faliure at local cluster during anything but DELETE... abandon ship if not is_success(resp_status_int) and self.req.method != 'DELETE': # Dont even try the peers start_response(self.status, self.headers, self.exc_info) return data # Call peers and accomulate responses try: # Note that key is None if not during container PUT/POST resps = self.send_to_peers(peers, key) # Append the local cluster response resps.append((resp_status_int, self.headers, data)) except: return HTTPServiceUnavailable(request=self.req) resp = None if self.req.method == 'DELETE': # Special treatment to DELETE - respond with the best we have resp = self.highest_response(resps, swap={'404': '1'}) else: # PUT/POST - respond only if all success if self.all_success(resps): resp = self.highest_response(resps) else: # PUT/POST with local success and remote faliure resp = HTTPServiceUnavailable(request=self.req) return resp(env, start_response)
def _listing_pages_iter(self, account_name, lcontainer, lprefix, env, marker='', end_marker='', reverse=True): '''Get "pages" worth of objects that start with a prefix. The optional keyword arguments ``marker``, ``end_marker``, and ``reverse`` are used similar to how they are for containers. We're either coming: - directly from ``_listing_iter``, in which case none of the optional args are specified, or - from ``_in_proxy_reverse_listing``, in which case ``reverse`` is ``False`` and both ``marker`` and ``end_marker`` are specified (although they may still be blank). ''' while True: lreq = make_pre_authed_request(env, method='GET', swift_source='VW', path='/v1/%s/%s' % (account_name, lcontainer)) lreq.environ['QUERY_STRING'] = \ 'format=json&prefix=%s&marker=%s' % ( quote(lprefix), quote(marker)) if end_marker: lreq.environ['QUERY_STRING'] += '&end_marker=%s' % ( quote(end_marker)) if reverse: lreq.environ['QUERY_STRING'] += '&reverse=on' lresp = lreq.get_response(self.app) if not is_success(lresp.status_int): if lresp.status_int == HTTP_NOT_FOUND: raise ListingIterNotFound() elif is_client_error(lresp.status_int): raise HTTPPreconditionFailed() else: raise ListingIterError() if not lresp.body: break sublisting = json.loads(lresp.body) if not sublisting: break # When using the ``reverse`` param, check that the listing is # actually reversed first_item = sublisting[0]['name'].encode('utf-8') last_item = sublisting[-1]['name'].encode('utf-8') page_is_after_marker = marker and first_item > marker if reverse and (first_item < last_item or page_is_after_marker): # Apparently there's at least one pre-2.6.0 container server yield self._in_proxy_reverse_listing(account_name, lcontainer, lprefix, env, marker, sublisting) return marker = last_item yield sublisting
def _load_next_segment(self): """ Loads the self.segment_iter with the next object segment's contents. :raises: StopIteration when there are no more object segments or segment no longer matches SLO manifest specifications. """ try: self.ratelimit_index += 1 self.segment_dict = self.segment_peek or self.listing.next() self.segment_peek = None if self.container is None: container, obj = \ self.segment_dict['name'].lstrip('/').split('/', 1) else: container, obj = self.container, self.segment_dict['name'] partition = self.controller.app.object_ring.get_part( self.controller.account_name, container, obj) path = '/%s/%s/%s' % (self.controller.account_name, container, obj) req = Request.blank(path) if self.seek: req.range = 'bytes=%s-' % self.seek self.seek = 0 if not self.is_slo and self.ratelimit_index > \ self.controller.app.rate_limit_after_segment: sleep(max(self.next_get_time - time.time(), 0)) self.next_get_time = time.time() + \ 1.0 / self.controller.app.rate_limit_segments_per_sec resp = self.controller.GETorHEAD_base( req, _('Object'), self.controller.app.object_ring, partition, path) if self.is_slo and resp.status_int == HTTP_NOT_FOUND: raise SloSegmentError(_( 'Could not load object segment %(path)s:' ' %(status)s') % {'path': path, 'status': resp.status_int}) if not is_success(resp.status_int): raise Exception(_( 'Could not load object segment %(path)s:' ' %(status)s') % {'path': path, 'status': resp.status_int}) if self.is_slo: if resp.etag != self.segment_dict['hash']: raise SloSegmentError(_( 'Object segment no longer valid: ' '%(path)s etag: %(r_etag)s != %(s_etag)s.' % {'path': path, 'r_etag': resp.etag, 's_etag': self.segment_dict['hash']})) if 'X-Static-Large-Object' in resp.headers: raise SloSegmentError(_( 'SLO can not be made of other SLOs: %s' % path)) self.segment_iter = resp.app_iter # See NOTE: swift_conn at top of file about this. self.segment_iter_swift_conn = getattr(resp, 'swift_conn', None) except StopIteration: raise except SloSegmentError, err: if not getattr(err, 'swift_logged', False): self.controller.app.logger.error(_( 'ERROR: While processing manifest ' '/%(acc)s/%(cont)s/%(obj)s, %(err)s'), {'acc': self.controller.account_name, 'cont': self.controller.container_name, 'obj': self.controller.object_name, 'err': err}) err.swift_logged = True self.response.status_int = HTTP_CONFLICT raise StopIteration('Invalid manifiest segment')
def _requests_to_bytes_iter(self): # Take the requests out of self._coalesce_requests, actually make # the requests, and generate the bytes from the responses. # # Yields 2-tuples (segment-name, byte-chunk). The segment name is # used for logging. for data_or_req, seg_etag, seg_size in self._coalesce_requests(): if isinstance(data_or_req, bytes): # ugly, awful overloading yield ('data segment', data_or_req) continue seg_req = data_or_req seg_resp = seg_req.get_response(self.app) if not is_success(seg_resp.status_int): # Error body should be short body = seg_resp.body if not six.PY2: body = body.decode('utf8') msg = 'While processing manifest %s, got %d (%s) ' \ 'while retrieving %s' % ( self.name, seg_resp.status_int, body if len(body) <= 60 else body[:57] + '...', seg_req.path) if is_server_error(seg_resp.status_int): self.logger.error(msg) raise HTTPServiceUnavailable(request=seg_req, content_type='text/plain') raise SegmentError(msg) elif ( (seg_etag and (seg_resp.etag != seg_etag)) or (seg_size and (seg_resp.content_length != seg_size) and not seg_req.range)): # The content-length check is for security reasons. Seems # possible that an attacker could upload a >1mb object and # then replace it with a much smaller object with same # etag. Then create a big nested SLO that calls that # object many times which would hammer our obj servers. If # this is a range request, don't check content-length # because it won't match. close_if_possible(seg_resp.app_iter) raise SegmentError( 'Object segment no longer valid: ' '%(path)s etag: %(r_etag)s != %(s_etag)s or ' '%(r_size)s != %(s_size)s.' % { 'path': seg_req.path, 'r_etag': seg_resp.etag, 'r_size': seg_resp.content_length, 's_etag': seg_etag, 's_size': seg_size }) else: self.current_resp = seg_resp resp_len = 0 seg_hash = None if seg_resp.etag and not seg_req.headers.get('Range'): # Only calculate the MD5 if it we can use it to validate seg_hash = hashlib.md5() document_iters = maybe_multipart_byteranges_to_document_iters( seg_resp.app_iter, seg_resp.headers['Content-Type']) for chunk in itertools.chain.from_iterable(document_iters): if seg_hash: seg_hash.update(chunk) resp_len += len(chunk) yield (seg_req.path, chunk) close_if_possible(seg_resp.app_iter) if seg_hash: if resp_len != seg_resp.content_length: raise SegmentError( "Bad response length for %(seg)s as part of %(name)s: " "headers had %(from_headers)s, but response length " "was actually %(actual)s" % { 'seg': seg_req.path, 'from_headers': seg_resp.content_length, 'name': self.name, 'actual': resp_len }) if seg_hash.hexdigest() != seg_resp.etag: raise SegmentError( "Bad MD5 checksum for %(seg)s as part of %(name)s: " "headers had %(etag)s, but object MD5 was actually " "%(actual)s" % { 'seg': seg_req.path, 'etag': seg_resp.etag, 'name': self.name, 'actual': seg_hash.hexdigest() })
def handle_obj_versions_put(self, req, object_versions, object_name, policy_index): ret = None # do a HEAD request to check object versions _headers = {'X-Newest': 'True', 'X-Backend-Storage-Policy-Index': policy_index, 'x-auth-token': req.headers.get('x-auth-token')} # make a pre_auth request in case the user has write access # to container, but not READ. This was allowed in previous version # (i.e., before middleware) so keeping the same behavior here head_req = make_pre_authed_request( req.environ, path=req.path_info, headers=_headers, method='HEAD', swift_source='VW') hresp = head_req.get_response(self.app) is_dlo_manifest = 'X-Object-Manifest' in req.headers or \ 'X-Object-Manifest' in hresp.headers # if there's an existing object, then copy it to # X-Versions-Location if is_success(hresp.status_int) and not is_dlo_manifest: lcontainer = object_versions.split('/')[0] prefix_len = '%03x' % len(object_name) lprefix = prefix_len + object_name + '/' ts_source = hresp.environ.get('swift_x_timestamp') if ts_source is None: ts_source = time.mktime(time.strptime( hresp.headers['last-modified'], '%a, %d %b %Y %H:%M:%S GMT')) new_ts = Timestamp(ts_source).internal vers_obj_name = lprefix + new_ts copy_headers = { 'Destination': '%s/%s' % (lcontainer, vers_obj_name), 'x-auth-token': req.headers.get('x-auth-token')} # COPY implementation sets X-Newest to True when it internally # does a GET on source object. So, we don't have to explicity # set it in request headers here. copy_req = make_pre_authed_request( req.environ, path=req.path_info, headers=copy_headers, method='COPY', swift_source='VW') copy_resp = copy_req.get_response(self.app) if is_success(copy_resp.status_int): # success versioning previous existing object # return None and handle original request ret = None else: if is_client_error(copy_resp.status_int): # missing container or bad permissions ret = HTTPPreconditionFailed(request=req) else: # could not copy the data, bail ret = HTTPServiceUnavailable(request=req) else: if hresp.status_int == HTTP_NOT_FOUND or is_dlo_manifest: # nothing to version # return None and handle original request ret = None else: # if not HTTP_NOT_FOUND, return error immediately ret = hresp return ret
def all_success(self, resps): for resp in resps: if not is_success(resp[0]): return False return True
def _internal_iter(self): bytes_left = self.response_body_length try: for seg_req, seg_etag, seg_size in self._coalesce_requests(): seg_resp = seg_req.get_response(self.app) if not is_success(seg_resp.status_int): close_if_possible(seg_resp.app_iter) raise SegmentError( 'ERROR: While processing manifest %s, ' 'got %d while retrieving %s' % (self.name, seg_resp.status_int, seg_req.path)) elif ((seg_etag and (seg_resp.etag != seg_etag)) or (seg_size and (seg_resp.content_length != seg_size) and not seg_req.range)): # The content-length check is for security reasons. Seems # possible that an attacker could upload a >1mb object and # then replace it with a much smaller object with same # etag. Then create a big nested SLO that calls that # object many times which would hammer our obj servers. If # this is a range request, don't check content-length # because it won't match. close_if_possible(seg_resp.app_iter) raise SegmentError( 'Object segment no longer valid: ' '%(path)s etag: %(r_etag)s != %(s_etag)s or ' '%(r_size)s != %(s_size)s.' % { 'path': seg_req.path, 'r_etag': seg_resp.etag, 'r_size': seg_resp.content_length, 's_etag': seg_etag, 's_size': seg_size }) else: self.current_resp = seg_resp seg_hash = None if seg_resp.etag and not seg_req.headers.get('Range'): # Only calculate the MD5 if it we can use it to validate seg_hash = hashlib.md5() document_iters = maybe_multipart_byteranges_to_document_iters( seg_resp.app_iter, seg_resp.headers['Content-Type']) for chunk in itertools.chain.from_iterable(document_iters): if seg_hash: seg_hash.update(chunk) if bytes_left is None: yield chunk elif bytes_left >= len(chunk): yield chunk bytes_left -= len(chunk) else: yield chunk[:bytes_left] bytes_left -= len(chunk) close_if_possible(seg_resp.app_iter) raise SegmentError( 'Too many bytes for %(name)s; truncating in ' '%(seg)s with %(left)d bytes left' % { 'name': self.name, 'seg': seg_req.path, 'left': bytes_left }) close_if_possible(seg_resp.app_iter) if seg_hash and seg_hash.hexdigest() != seg_resp.etag: raise SegmentError( "Bad MD5 checksum in %(name)s for %(seg)s: headers had" " %(etag)s, but object MD5 was actually %(actual)s" % { 'seg': seg_req.path, 'etag': seg_resp.etag, 'name': self.name, 'actual': seg_hash.hexdigest() }) if bytes_left: raise SegmentError( 'Not enough bytes for %s; closing connection' % self.name) except (ListingIterError, SegmentError): self.logger.exception( _('ERROR: An error occurred ' 'while retrieving segments')) raise finally: if self.current_resp: close_if_possible(self.current_resp.app_iter)
def handle_obj_versions_delete(self, req, object_versions, account_name, container_name, object_name): lcontainer = object_versions.split('/')[0] prefix_len = '%03x' % len(object_name) lprefix = prefix_len + object_name + '/' item_iter = self._listing_iter(account_name, lcontainer, lprefix, req) authed = False for previous_version in item_iter: if not authed: # we're about to start making COPY requests - need to # validate the write access to the versioned container if 'swift.authorize' in req.environ: container_info = get_container_info( req.environ, self.app) req.acl = container_info.get('write_acl') aresp = req.environ['swift.authorize'](req) if aresp: return aresp authed = True # there are older versions so copy the previous version to the # current object and delete the previous version prev_obj_name = previous_version['name'].encode('utf-8') copy_path = '/v1/' + account_name + '/' + \ lcontainer + '/' + prev_obj_name copy_headers = {'X-Newest': 'True', 'Destination': container_name + '/' + object_name, 'x-auth-token': req.headers.get('x-auth-token')} copy_req = make_pre_authed_request( req.environ, path=copy_path, headers=copy_headers, method='COPY', swift_source='VW') copy_resp = copy_req.get_response(self.app) # if the version isn't there, keep trying with previous version if copy_resp.status_int == HTTP_NOT_FOUND: continue if not is_success(copy_resp.status_int): if is_client_error(copy_resp.status_int): # some user error, maybe permissions return HTTPPreconditionFailed(request=req) else: # could not copy the data, bail return HTTPServiceUnavailable(request=req) # reset these because the COPY changed them new_del_req = make_pre_authed_request( req.environ, path=copy_path, method='DELETE', swift_source='VW') req = new_del_req # remove 'X-If-Delete-At', since it is not for the older copy if 'X-If-Delete-At' in req.headers: del req.headers['X-If-Delete-At'] break # handle DELETE request here in case it was modified return req.get_response(self.app)
def GET(self, env, start_response): """ Handle GET Bucket (List Objects) request """ if 'QUERY_STRING' in env: args = dict(urlparse.parse_qsl(env['QUERY_STRING'], 1)) else: args = {} if 'max-keys' in args: if args.get('max-keys').isdigit() is False: return get_err_response('InvalidArgument') if 'uploads' in args: # Pass it through, the s3multi upload helper will handle it. return self.app(env, start_response) max_keys = min(int(args.get('max-keys', MAX_BUCKET_LISTING)), MAX_BUCKET_LISTING) if 'acl' not in args: #acl request sent with format=json etc confuses swift env['QUERY_STRING'] = 'format=json&limit=%s' % (max_keys + 1) if 'marker' in args: env['QUERY_STRING'] += '&marker=%s' % quote(args['marker']) if 'prefix' in args: env['QUERY_STRING'] += '&prefix=%s' % quote(args['prefix']) if 'delimiter' in args: env['QUERY_STRING'] += '&delimiter=%s' % quote(args['delimiter']) body_iter = self._app_call(env) status = self._get_status_int() headers = dict(self._response_headers) if is_success(status) and 'acl' in args: return get_acl(self.account_name, headers) if 'versioning' in args: # Just report there is no versioning configured here. body = '<VersioningConfiguration %s/>' % AWS_XML_NS2 # S3 responses don't seem to have any Content-Type header on these # responses, but if they did, surely it would be application/xml return Response(body=body, content_type="application/xml") if status != HTTP_OK: if status in (HTTP_UNAUTHORIZED, HTTP_FORBIDDEN): return get_err_response('AccessDenied') elif status == HTTP_NOT_FOUND: return get_err_response('NoSuchBucket') else: return get_err_response('InvalidURI') if 'location' in args: body = '%s<LocationConstraint %s' % (XML_HEADER, AWS_XML_NS2) if self.location == 'US': body += '/>' else: body += ('>%s</LocationConstraint>' % self.location) return Response(body=body, content_type='application/xml') if 'logging' in args: # logging disabled body = '%s<BucketLoggingStatus %s />' % (XML_HEADER, AWS_XML_NS) return Response(body=body, content_type='application/xml') objects = loads(''.join(list(body_iter))) body = ( '%s<ListBucketResult %s>' '<Prefix>%s</Prefix>' '<Marker>%s</Marker>' '<Delimiter>%s</Delimiter>' '<IsTruncated>%s</IsTruncated>' '<MaxKeys>%s</MaxKeys>' '<Name>%s</Name>' '%s' '%s' '</ListBucketResult>' % (XML_HEADER, AWS_XML_NS3, xml_escape(args.get( 'prefix', '')), xml_escape(args.get('marker', '')), xml_escape(args.get('delimiter', '')), 'true' if max_keys > 0 and len(objects) == (max_keys + 1) else 'false', max_keys, xml_escape(self.container_name), "".join([ '<Contents><Key>%s</Key>' '<LastModified>%sZ</LastModified>' '<ETag>%s</ETag><Size>%s</Size>' '<StorageClass>STANDARD</StorageClass>' '<Owner><ID>%s</ID><DisplayName>' '%s</DisplayName></Owner></Contents>' % (xml_escape(unquote(i['name'])), i['last_modified'][:-3], i['hash'], i['bytes'], self.account_name, self.account_name) for i in objects[:max_keys] if 'subdir' not in i ]), "".join([ '<CommonPrefixes><Prefix>%s</Prefix>' '</CommonPrefixes>' % xml_escape(i['subdir']) for i in objects[:max_keys] if 'subdir' in i ]))) return Response(body=body, content_type='application/xml')
def write(self, data=b'', hdrs=None, parms=None, callback=None, cfg=None, return_resp=False): if hdrs is None: hdrs = {} if parms is None: parms = {} if cfg is None: cfg = {} block_size = 2**20 if all(hasattr(data, attr) for attr in ('flush', 'seek', 'fileno')): try: data.flush() data.seek(0) except IOError: pass self.size = int(os.fstat(data.fileno())[6]) else: data = io.BytesIO(data) self.size = data.seek(0, os.SEEK_END) data.seek(0) headers = self.make_headers(cfg=cfg) headers.update(hdrs) def try_request(): # rewind to be ready for another attempt data.seek(0) self.conn.put_start(self.path, hdrs=headers, parms=parms, cfg=cfg) transferred = 0 for buff in iter(lambda: data.read(block_size), b''): self.conn.put_data(buff) transferred += len(buff) if callable(callback): callback(transferred, self.size) self.conn.put_end() return self.conn.response try: self.response = self.conn.request_with_retry(try_request) except RequestError as e: raise ResponseError(self.conn.response, 'PUT', self.conn.make_path(self.path), details=str(e)) if not is_success(self.response.status): raise ResponseError(self.conn.response, 'PUT', self.conn.make_path(self.path)) try: data.seek(0) except IOError: pass self.md5 = self.compute_md5sum(data) if return_resp: return self.conn.response return True
def async_update(self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice): """ In Openstack Swift, this method is called by: * container_update (a no-op in gluster-swift) * delete_at_update (to PUT objects into .expiring_objects account) The Swift's version of async_update only sends the request to container-server to PUT the object. The container-server calls container_update method which makes an entry for the object in it's database. No actual object is created on disk. But in gluster-swift container_update is a no-op, so we'll have to PUT an actual object. We override async_update to create a container first and then the corresponding "tracker object" which tracks expired objects scheduled for deletion. """ headers_out['user-agent'] = 'obj-server %s' % os.getpid() if all([host, partition, contdevice]): # PUT the container. Send request directly to container-server container_path = '/%s/%s' % (account, container) try: with ConnectionTimeout(self.conn_timeout): ip, port = host.rsplit(':', 1) conn = http_connect(ip, port, contdevice, partition, op, container_path, headers_out) with Timeout(self.node_timeout): response = conn.getresponse() response.read() if not is_success(response.status): self.logger.error(_( 'async_update : ' 'ERROR Container update failed :%(status)d ' 'response from %(ip)s:%(port)s/%(dev)s'), {'status': response.status, 'ip': ip, 'port': port, 'dev': contdevice}) return except (Exception, Timeout): self.logger.exception(_( 'async_update : ' 'ERROR Container update failed :%(ip)s:%(port)s/%(dev)s'), {'ip': ip, 'port': port, 'dev': contdevice}) # PUT the tracker object. Send request directly to object-server object_path = '/%s/%s/%s' % (account, container, obj) headers_out['Content-Length'] = 0 headers_out['Content-Type'] = 'text/plain' try: with ConnectionTimeout(self.conn_timeout): # FIXME: Assuming that get_nodes returns single node part, nodes = self.get_object_ring().get_nodes(account, container, obj) ip = nodes[0]['ip'] port = nodes[0]['port'] objdevice = nodes[0]['device'] conn = http_connect(ip, port, objdevice, partition, op, object_path, headers_out) with Timeout(self.node_timeout): response = conn.getresponse() response.read() if is_success(response.status): return else: self.logger.error(_( 'async_update : ' 'ERROR Object PUT failed : %(status)d ' 'response from %(ip)s:%(port)s/%(dev)s'), {'status': response.status, 'ip': ip, 'port': port, 'dev': objdevice}) except (Exception, Timeout): self.logger.exception(_( 'async_update : ' 'ERROR Object PUT failed :%(ip)s:%(port)s/%(dev)s'), {'ip': ip, 'port': port, 'dev': objdevice}) return
def _fetch_and_merge_shard_ranges(self, http, broker): with Timeout(self.node_timeout): response = http.replicate('get_shard_ranges') if response and is_success(response.status): broker.merge_shard_ranges(json.loads( response.data.decode('ascii')))