def fast_forward(self, num_bytes): """ Will skip num_bytes into the current ranges. :params num_bytes: the number of bytes that have already been read on this request. This will change the Range header so that the next req will start where it left off. :raises NotImplementedError: if this is a multirange request :raises ValueError: if invalid range header :raises HTTPRequestedRangeNotSatisfiable: if begin + num_bytes > end of range """ if 'Range' in self.backend_headers: req_range = Range(self.backend_headers['Range']) if len(req_range.ranges) > 1: raise NotImplementedError() begin, end = req_range.ranges.pop() if begin is None: # this is a -50 range req (last 50 bytes of file) end -= num_bytes else: begin += num_bytes if end and begin > end: raise HTTPRequestedRangeNotSatisfiable() req_range.ranges = [(begin, end)] self.backend_headers['Range'] = str(req_range) else: self.backend_headers['Range'] = 'bytes=%d-' % num_bytes
def _manifest_get_response(self, req, content_length, response_headers, segments): self.first_byte, self.last_byte = None, None if req.range: byteranges = req.range.ranges_for_length(content_length) if len(byteranges) == 0: return HTTPRequestedRangeNotSatisfiable(request=req) elif len(byteranges) == 1: self.first_byte, self.last_byte = byteranges[0] # For some reason, swob.Range.ranges_for_length adds 1 to the # last byte's position. self.last_byte -= 1 else: req.range = None ver, account, _junk = req.split_path(3, 3, rest_with_last=True) plain_listing_iter = self._segment_listing_iterator( req, ver, account, segments) ratelimited_listing_iter = RateLimitedIterator( plain_listing_iter, self.slo.rate_limit_segments_per_sec, limit_after=self.slo.rate_limit_after_segment) # self._segment_listing_iterator gives us 3-tuples of (segment dict, # start byte, end byte), but SegmentedIterable wants (obj path, etag, # size, start byte, end byte), so we clean that up here segment_listing_iter = ( ("/{ver}/{acc}/{conobj}".format( ver=ver, acc=account, conobj=seg_dict['name'].lstrip('/')), seg_dict['hash'], int(seg_dict['bytes']), start_byte, end_byte) for seg_dict, start_byte, end_byte in ratelimited_listing_iter) segmented_iter = SegmentedIterable(req, self.slo.app, segment_listing_iter, name=req.path, logger=self.slo.logger, ua_suffix="SLO MultipartGET", swift_source="SLO", max_get_time=self.slo.max_get_time) try: segmented_iter.validate_first_segment() except (ListingIterError, SegmentError): # Copy from the SLO explanation in top of this file. # If any of the segments from the manifest are not found or # their Etag/Content Length no longer match the connection # will drop. In this case a 409 Conflict will be logged in # the proxy logs and the user will receive incomplete results. return HTTPConflict(request=req) response = Response(request=req, content_length=content_length, headers=response_headers, conditional_response=True, app_iter=segmented_iter) if req.range: response.headers.pop('Etag') return response
def _manifest_get_response(self, req, content_length, response_headers, segments): self.first_byte, self.last_byte = None, None if req.range: byteranges = req.range.ranges_for_length(content_length) if len(byteranges) == 0: return HTTPRequestedRangeNotSatisfiable(request=req) elif len(byteranges) == 1: self.first_byte, self.last_byte = byteranges[0] # For some reason, swob.Range.ranges_for_length adds 1 to the # last byte's position. self.last_byte -= 1 else: req.range = None ver, account, _junk = req.split_path(3, 3, rest_with_last=True) plain_listing_iter = self._segment_listing_iterator( req, ver, account, segments) ratelimited_listing_iter = RateLimitedIterator( plain_listing_iter, self.slo.rate_limit_segments_per_sec, limit_after=self.slo.rate_limit_after_segment) # self._segment_listing_iterator gives us 3-tuples of (segment dict, # start byte, end byte), but SegmentedIterable wants (obj path, etag, # size, start byte, end byte), so we clean that up here segment_listing_iter = ( ("/{ver}/{acc}/{conobj}".format( ver=ver, acc=account, conobj=seg_dict['name'].lstrip('/')), seg_dict['hash'], int(seg_dict['bytes']), start_byte, end_byte) for seg_dict, start_byte, end_byte in ratelimited_listing_iter) response = Response(request=req, content_length=content_length, headers=response_headers, conditional_response=True, app_iter=SegmentedIterable( req, self.slo.app, segment_listing_iter, name=req.path, logger=self.slo.logger, ua_suffix="SLO MultipartGET", swift_source="SLO", max_get_time=self.slo.max_get_time)) if req.range: response.headers.pop('Etag') return response
def GET(self): """ GET handler on object-server """ # The proxy may add a Range header in the case # where the execution is to be done on proxy only # (and X-Storlet-Range header exists) # Hence we allow having a Range header ONLY # if there is also X-Storlet-Range # Otherwise, running a Storlet together with # The HTTP Range header is not allowed if self.is_range_request and not self.is_storlet_range_request: raise HTTPRequestedRangeNotSatisfiable( b'Storlet execution with range header is not supported', request=self.request) orig_resp = self.request.get_response(self.app) if not orig_resp.is_success: return orig_resp # TODO(takashi): not sure manifest file should not be run with storlet not_runnable = any( [self.execute_on_proxy, self.execute_range_on_proxy, self.is_slo_get_request, self.is_slo_response(orig_resp)]) if not_runnable: # Storlet must be invoked on proxy as it is: # either an SLO # or storlet-range-request # or proxy only mode self.logger.debug( 'storlet_handler: invocation over %s to be executed on proxy' % self.request.path) return orig_resp else: # We apply here the Storlet: self.logger.debug( 'storlet_handler: invocation over %s to be executed locally' % self.request.path) return self.apply_storlet(orig_resp)
def get_or_head_response(self, req, x_object_manifest, response_headers=None): if response_headers is None: response_headers = self._response_headers container, obj_prefix = x_object_manifest.split('/', 1) container = unquote(container) obj_prefix = unquote(obj_prefix) version, account, _junk = req.split_path(2, 3, True) error_response, segments = self._get_container_listing( req, version, account, container, obj_prefix) if error_response: return error_response have_complete_listing = len(segments) < \ constraints.CONTAINER_LISTING_LIMIT first_byte = last_byte = None actual_content_length = None content_length_for_swob_range = None if req.range and len(req.range.ranges) == 1: content_length_for_swob_range = sum(o['bytes'] for o in segments) # This is a hack to handle suffix byte ranges (e.g. "bytes=-5"), # which we can't honor unless we have a complete listing. _junk, range_end = req.range.ranges_for_length(float("inf"))[0] # If this is all the segments, we know whether or not this # range request is satisfiable. # # Alternately, we may not have all the segments, but this range # falls entirely within the first page's segments, so we know # that it is satisfiable. if (have_complete_listing or range_end < content_length_for_swob_range): byteranges = req.range.ranges_for_length( content_length_for_swob_range) if not byteranges: headers = {'Accept-Ranges': 'bytes'} if have_complete_listing: headers['Content-Range'] = 'bytes */%d' % ( content_length_for_swob_range, ) return HTTPRequestedRangeNotSatisfiable(request=req, headers=headers) first_byte, last_byte = byteranges[0] # For some reason, swob.Range.ranges_for_length adds 1 to the # last byte's position. last_byte -= 1 actual_content_length = last_byte - first_byte + 1 else: # The range may or may not be satisfiable, but we can't tell # based on just one page of listing, and we're not going to go # get more pages because that would use up too many resources, # so we ignore the Range header and return the whole object. actual_content_length = None content_length_for_swob_range = None req.range = None response_headers = [(h, v) for h, v in response_headers if h.lower() not in ("content-length", "content-range")] if content_length_for_swob_range is not None: # Here, we have to give swob a big-enough content length so that # it can compute the actual content length based on the Range # header. This value will not be visible to the client; swob will # substitute its own Content-Length. # # Note: if the manifest points to at least CONTAINER_LISTING_LIMIT # segments, this may be less than the sum of all the segments' # sizes. However, it'll still be greater than the last byte in the # Range header, so it's good enough for swob. response_headers.append( ('Content-Length', str(content_length_for_swob_range))) elif have_complete_listing: actual_content_length = sum(o['bytes'] for o in segments) response_headers.append( ('Content-Length', str(actual_content_length))) if have_complete_listing: response_headers = [(h, v) for h, v in response_headers if h.lower() != "etag"] etag = md5() for seg_dict in segments: etag.update(seg_dict['hash'].strip('"')) response_headers.append(('Etag', '"%s"' % etag.hexdigest())) app_iter = None if req.method == 'GET': listing_iter = RateLimitedIterator( self._segment_listing_iterator(req, version, account, container, obj_prefix, segments, first_byte=first_byte, last_byte=last_byte), self.dlo.rate_limit_segments_per_sec, limit_after=self.dlo.rate_limit_after_segment) app_iter = SegmentedIterable( req, self.dlo.app, listing_iter, ua_suffix="DLO MultipartGET", swift_source="DLO", name=req.path, logger=self.logger, max_get_time=self.dlo.max_get_time, response_body_length=actual_content_length) try: app_iter.validate_first_segment() except (SegmentError, ListingIterError): return HTTPConflict(request=req) resp = Response(request=req, headers=response_headers, conditional_response=True, app_iter=app_iter) return resp
class CdnHandler(OriginBase): def __init__(self, app, conf, logger): OriginBase.__init__(self, app, conf, logger) self.logger = logger self.max_cdn_file_size = int( conf.get('max_cdn_file_size', 10 * 1024**3)) self.allowed_origin_remote_ips = [] remote_ips = conf.get('allowed_origin_remote_ips') if remote_ips: self.allowed_origin_remote_ips = \ [ip.strip() for ip in remote_ips.split(',') if ip.strip()] if not bool(conf.get('incoming_url_regex')): raise InvalidConfiguration('Invalid config for CdnHandler') self.cdn_regexes = [] for key, val in conf['incoming_url_regex'].items(): regex = re.compile(val) self.cdn_regexes.append(regex) def _getCacheHeaders(self, ttl): return { 'Expires': strftime("%a, %d %b %Y %H:%M:%S GMT", gmtime(time() + ttl)), 'Cache-Control': 'max-age=%d, public' % ttl } def _getCdnHeaders(self, req): headers = {'X-Web-Mode': 'True', 'User-Agent': 'SOS Origin'} for header in ['If-Modified-Since', 'If-Match', 'Range', 'If-Range']: if header in req.headers: headers[header] = req.headers[header] return headers def handle_request(self, env, req): if req.method not in ('GET', 'HEAD'): headers = self._getCacheHeaders(CACHE_BAD_URL) return HTTPMethodNotAllowed(request=req, headers=headers) if self.allowed_origin_remote_ips and \ req.remote_addr not in self.allowed_origin_remote_ips: raise OriginRequestNotAllowed( 'SOS Origin: Remote IP %s not allowed' % req.remote_addr) # allow earlier middleware to override hash and obj_name hsh = env.get('swift.cdn_hash') object_name = env.get('swift.cdn_object_name') if hsh is None or object_name is None: for regex in self.cdn_regexes: match_obj = regex.match(req.url) if match_obj: match_dict = match_obj.groupdict() if not hsh: hsh = match_dict.get('hash') if not object_name: object_name = match_dict.get('object_name') break if not hsh: self.logger.debug('Hash %s not found in %s' % (hsh, req.url)) headers = self._getCacheHeaders(CACHE_BAD_URL) return HTTPNotFound(request=req, headers=headers) if hsh.find('-') >= 0: hsh = hsh.split('-', 1)[1] try: cdn_obj_path = self.get_hsh_obj_path(hsh) except ValueError, e: self.logger.debug('get_hsh_obj_path error: %s' % e) headers = self._getCacheHeaders(CACHE_BAD_URL) return HTTPBadRequest(request=req, headers=headers) hash_data = self.get_cdn_data(env, cdn_obj_path) if hash_data and hash_data.cdn_enabled: # this is a cdn enabled container, proxy req to swift if env.get('swift.cdn_authorize'): auth_resp, ttl = env['swift.cdn_authorize']( env, hash_data.account.encode('utf-8')) if auth_resp: return auth_resp(request=req, headers=self._getCacheHeaders(ttl)) swift_path = quote('/v1/%s/%s/' % (hash_data.account.encode('utf-8'), hash_data.container.encode('utf-8'))) if object_name: swift_path += object_name headers = self._getCdnHeaders(req) env['swift.source'] = 'SOS' resp = make_pre_authed_request(env, req.method, swift_path, headers=headers, agent='SwiftOrigin', swift_source='SOS').get_response( self.app) if resp.status_int == 301 and 'Location' in resp.headers: loc_parsed = urlparse(resp.headers['Location']) acc_cont_path = '/v1/%s/%s' % (hash_data.account.encode( 'utf-8'), hash_data.container.encode('utf-8')) if loc_parsed.path.startswith(acc_cont_path): sos_loc = loc_parsed.path[len(acc_cont_path):] resp = SosResponse( headers=self._getCacheHeaders(hash_data.ttl)) resp.headers['Location'] = sos_loc resp.status = 301 return resp else: self.logger.exception( 'Unexpected Location header ' 'returned. %s does not begin with expected ' 'path: %s' % (loc_parsed.geturl(), acc_cont_path)) return HTTPInternalServerError('Unexpected Relocation') if resp.status_int == 304: return HTTPNotModified(request=req, headers=self._getCacheHeaders( hash_data.ttl)) if resp.status_int == 416: return HTTPRequestedRangeNotSatisfiable( request=req, headers=self._getCacheHeaders(CACHE_404)) if resp.status_int // 100 == 2 or resp.status_int == 404: if resp.content_length > self.max_cdn_file_size: return HTTPBadRequest( request=req, headers=self._getCacheHeaders(CACHE_404)) cdn_resp = Response(request=req, app_iter=resp.app_iter) cdn_resp.status = resp.status_int cdn_resp.headers.update(resp.headers) if resp.status_int == 404: cdn_resp.headers.update(self._getCacheHeaders(CACHE_404)) else: cdn_resp.headers.update( self._getCacheHeaders(hash_data.ttl)) return cdn_resp self.logger.error('Unexpected response from ' 'Swift: %s, %s: %s' % (resp.status, swift_path, resp.body[:40])) return HTTPNotFound(request=req, headers=self._getCacheHeaders(CACHE_404))