def _fetch_sub_slo_segments(self, req, version, acc, con, obj): """ Fetch the submanifest, parse it, and return it. Raise exception on failures. """ sub_req = make_subrequest( req.environ, path='/'.join(['', version, acc, con, obj]), method='GET', headers={'x-auth-token': req.headers.get('x-auth-token')}, agent='%(orig)s SLO MultipartGET', swift_source='SLO') sub_resp = sub_req.get_response(self.slo.app) if not is_success(sub_resp.status_int): close_if_possible(sub_resp.app_iter) raise ListingIterError( 'ERROR: while fetching %s, GET of submanifest %s ' 'failed with status %d' % (req.path, sub_req.path, sub_resp.status_int)) try: with closing_if_possible(sub_resp.app_iter): return json.loads(''.join(sub_resp.app_iter)) except ValueError as err: raise ListingIterError( 'ERROR: while fetching %s, JSON-decoding of submanifest %s ' 'failed with %s' % (req.path, sub_req.path, err))
def _listing_pages_iter(self, account_name, lcontainer, lprefix, env): marker = '' while True: lreq = make_pre_authed_request(env, method='GET', swift_source='VW', path='/v1/%s/%s' % (account_name, lcontainer)) lreq.environ['QUERY_STRING'] = \ 'format=json&prefix=%s&reverse=on&marker=%s' % ( quote(lprefix), quote(marker)) lresp = lreq.get_response(self.app) if not is_success(lresp.status_int): if lresp.status_int == HTTP_NOT_FOUND: raise ListingIterNotFound() elif is_client_error(lresp.status_int): raise HTTPPreconditionFailed() else: raise ListingIterError() if not lresp.body: break sublisting = json.loads(lresp.body) if not sublisting: break marker = sublisting[-1]['name'].encode('utf-8') yield sublisting
def _listing_pages_iter(self, lcontainer, lprefix, env): lpartition = self.app.container_ring.get_part( self.account_name, lcontainer) marker = '' while True: lreq = Request.blank('i will be overridden by env', environ=env) # Don't quote PATH_INFO, by WSGI spec lreq.environ['PATH_INFO'] = \ '/v1/%s/%s' % (self.account_name, lcontainer) lreq.environ['REQUEST_METHOD'] = 'GET' lreq.environ['QUERY_STRING'] = \ 'format=json&prefix=%s&marker=%s' % (quote(lprefix), quote(marker)) lresp = self.GETorHEAD_base( lreq, _('Container'), self.app.container_ring, lpartition, lreq.swift_entity_path) if 'swift.authorize' in env: lreq.acl = lresp.headers.get('x-container-read') aresp = env['swift.authorize'](lreq) if aresp: raise ListingIterNotAuthorized(aresp) if lresp.status_int == HTTP_NOT_FOUND: raise ListingIterNotFound() elif not is_success(lresp.status_int): raise ListingIterError() if not lresp.body: break sublisting = json.loads(lresp.body) if not sublisting: break marker = sublisting[-1]['name'].encode('utf-8') yield sublisting
def _segment_listing_iterator(self, req, version, account, container, prefix, segments, first_byte=None, last_byte=None): # It's sort of hokey that this thing takes in the first page of # segments as an argument, but we need to compute the etag and content # length from the first page, and it's better to have a hokey # interface than to make redundant requests. if first_byte is None: first_byte = 0 if last_byte is None: last_byte = float("inf") marker = '' while True: for segment in segments: seg_length = int(segment['bytes']) if first_byte >= seg_length: # don't need any bytes from this segment first_byte = max(first_byte - seg_length, -1) last_byte = max(last_byte - seg_length, -1) continue elif last_byte < 0: # no bytes are needed from this or any future segment break seg_name = segment['name'] if isinstance(seg_name, unicode): seg_name = seg_name.encode("utf-8") # (obj path, etag, size, first byte, last byte) yield ("/" + "/".join((version, account, container, seg_name)), # We deliberately omit the etag and size here; # SegmentedIterable will check size and etag if # specified, but we don't want it to. DLOs only care # that the objects' names match the specified prefix. None, None, (None if first_byte <= 0 else first_byte), (None if last_byte >= seg_length - 1 else last_byte)) first_byte = max(first_byte - seg_length, -1) last_byte = max(last_byte - seg_length, -1) if len(segments) < CONTAINER_LISTING_LIMIT: # a short page means that we're done with the listing break elif last_byte < 0: break marker = segments[-1]['name'] error_response, segments = self._get_container_listing( req, version, account, container, prefix, marker) if error_response: # we've already started sending the response body to the # client, so all we can do is raise an exception to make the # WSGI server close the connection early raise ListingIterError( "Got status %d listing container /%s/%s" % (error_response.status_int, account, container))
def _listing_pages_iter(self, account_name, lcontainer, lprefix, req, marker='', end_marker='', reverse=True): '''Get "pages" worth of objects that start with a prefix. The optional keyword arguments ``marker``, ``end_marker``, and ``reverse`` are used similar to how they are for containers. We're either coming: - directly from ``_listing_iter``, in which case none of the optional args are specified, or - from ``_in_proxy_reverse_listing``, in which case ``reverse`` is ``False`` and both ``marker`` and ``end_marker`` are specified (although they may still be blank). ''' while True: lreq = make_pre_authed_request( req.environ, method='GET', swift_source='VW', path=wsgi_quote('/v1/%s/%s' % (account_name, lcontainer))) lreq.environ['QUERY_STRING'] = \ 'prefix=%s&marker=%s' % (wsgi_quote(lprefix), wsgi_quote(marker)) if end_marker: lreq.environ['QUERY_STRING'] += '&end_marker=%s' % ( wsgi_quote(end_marker)) if reverse: lreq.environ['QUERY_STRING'] += '&reverse=on' lresp = lreq.get_response(self.app) if not is_success(lresp.status_int): close_if_possible(lresp.app_iter) if lresp.status_int == HTTP_NOT_FOUND: raise ListingIterNotFound() elif is_client_error(lresp.status_int): raise HTTPPreconditionFailed(request=req) else: raise ListingIterError() if not lresp.body: break sublisting = json.loads(lresp.body) if not sublisting: break # When using the ``reverse`` param, check that the listing is # actually reversed first_item = bytes_to_wsgi(sublisting[0]['name'].encode('utf-8')) last_item = bytes_to_wsgi(sublisting[-1]['name'].encode('utf-8')) page_is_after_marker = marker and first_item > marker if reverse and (first_item < last_item or page_is_after_marker): # Apparently there's at least one pre-2.6.0 container server yield self._in_proxy_reverse_listing( account_name, lcontainer, lprefix, req, marker, sublisting) return marker = last_item yield sublisting
def _byterange_listing_iterator(self, req, version, account, segments, first_byte, last_byte, cached_fetch_sub_slo_segments, recursion_depth=1): last_sub_path = None for seg_dict in segments: seg_length = self._segment_length(seg_dict) if first_byte >= seg_length: # don't need any bytes from this segment first_byte -= seg_length last_byte -= seg_length continue if last_byte < 0: # no bytes are needed from this or any future segment return seg_range = seg_dict.get('range') if seg_range is None: range_start, range_end = 0, seg_length - 1 else: # We already validated and supplied concrete values # for the range on upload range_start, range_end = map(int, seg_range.split('-')) if config_true_value(seg_dict.get('sub_slo')): # do this check here so that we can avoid fetching this last # manifest before raising the exception if recursion_depth >= self.max_slo_recursion_depth: raise ListingIterError("Max recursion depth exceeded") sub_path = get_valid_utf8_str(seg_dict['name']) sub_cont, sub_obj = split_path(sub_path, 2, 2, True) if last_sub_path != sub_path: sub_segments = cached_fetch_sub_slo_segments( req, version, account, sub_cont, sub_obj) last_sub_path = sub_path # Use the existing machinery to slice into the sub-SLO. for sub_seg_dict, sb, eb in self._byterange_listing_iterator( req, version, account, sub_segments, # This adjusts first_byte and last_byte to be # relative to the sub-SLO. range_start + max(0, first_byte), min(range_end, range_start + last_byte), cached_fetch_sub_slo_segments, recursion_depth=recursion_depth + 1): yield sub_seg_dict, sb, eb else: if isinstance(seg_dict['name'], six.text_type): seg_dict['name'] = seg_dict['name'].encode("utf-8") yield (seg_dict, max(0, first_byte) + range_start, min(range_end, range_start + last_byte)) first_byte -= seg_length last_byte -= seg_length
def _segment_listing_iterator(self, req, version, account, container, prefix, segments, first_byte=None, last_byte=None): ''' :param req: upstream request :param version: native :param account: native :param container: native :param prefix: native :param segments: array of dicts, with native strings :param first_byte: number :param last_byte: number ''' # It's sort of hokey that this thing takes in the first page of # segments as an argument, but we need to compute the etag and content # length from the first page, and it's better to have a hokey # interface than to make redundant requests. if first_byte is None: first_byte = 0 if last_byte is None: last_byte = float("inf") while True: for segment in segments: seg_length = int(segment['bytes']) if first_byte >= seg_length: # don't need any bytes from this segment first_byte = max(first_byte - seg_length, -1) last_byte = max(last_byte - seg_length, -1) continue elif last_byte < 0: # no bytes are needed from this or any future segment break seg_name = segment['name'] if six.PY2: seg_name = seg_name.encode("utf-8") # We deliberately omit the etag and size here; # SegmentedIterable will check size and etag if # specified, but we don't want it to. DLOs only care # that the objects' names match the specified prefix. # SegmentedIterable will instead check that the data read # from each segment matches the response headers. _path = "/".join(["", version, account, container, seg_name]) _first = None if first_byte <= 0 else first_byte _last = None if last_byte >= seg_length - 1 else last_byte yield { 'path': _path, 'first_byte': _first, 'last_byte': _last } first_byte = max(first_byte - seg_length, -1) last_byte = max(last_byte - seg_length, -1) if len(segments) < constraints.CONTAINER_LISTING_LIMIT: # a short page means that we're done with the listing break elif last_byte < 0: break marker = segments[-1]['name'] error_response, segments = self._get_container_listing( req, version, account, container, prefix, marker) if error_response: # we've already started sending the response body to the # client, so all we can do is raise an exception to make the # WSGI server close the connection early close_if_possible(error_response.app_iter) raise ListingIterError( "Got status %d listing container /%s/%s" % (error_response.status_int, account, container))
def _segment_listing_iterator(self, req, version, account, segments, recursion_depth=1): for seg_dict in segments: if config_true_value(seg_dict.get('sub_slo')): override_bytes_from_content_type(seg_dict, logger=self.slo.logger) # We handle the range stuff here so that we can be smart about # skipping unused submanifests. For example, if our first segment is a # submanifest referencing 50 MiB total, but start_byte falls in # the 51st MiB, then we can avoid fetching the first submanifest. # # If we were to make SegmentedIterable handle all the range # calculations, we would be unable to make this optimization. total_length = sum(self._segment_length(seg) for seg in segments) if self.first_byte is None: self.first_byte = 0 if self.last_byte is None: self.last_byte = total_length - 1 last_sub_path = None for seg_dict in segments: seg_length = self._segment_length(seg_dict) if self.first_byte >= seg_length: # don't need any bytes from this segment self.first_byte -= seg_length self.last_byte -= seg_length continue if self.last_byte < 0: # no bytes are needed from this or any future segment break seg_range = seg_dict.get('range') if seg_range is None: range_start, range_end = 0, seg_length - 1 else: # We already validated and supplied concrete values # for the range on upload range_start, range_end = map(int, seg_range.split('-')) if config_true_value(seg_dict.get('sub_slo')): # do this check here so that we can avoid fetching this last # manifest before raising the exception if recursion_depth >= self.max_slo_recursion_depth: raise ListingIterError("Max recursion depth exceeded") sub_path = get_valid_utf8_str(seg_dict['name']) sub_cont, sub_obj = split_path(sub_path, 2, 2, True) if last_sub_path != sub_path: sub_segments = self._fetch_sub_slo_segments( req, version, account, sub_cont, sub_obj) last_sub_path = sub_path # Use the existing machinery to slice into the sub-SLO. # This requires that we save off our current state, and # restore at the other end. orig_start, orig_end = self.first_byte, self.last_byte self.first_byte = range_start + max(0, self.first_byte) self.last_byte = min(range_end, range_start + self.last_byte) for sub_seg_dict, sb, eb in self._segment_listing_iterator( req, version, account, sub_segments, recursion_depth=recursion_depth + 1): yield sub_seg_dict, sb, eb # Restore the first/last state self.first_byte, self.last_byte = orig_start, orig_end else: if isinstance(seg_dict['name'], six.text_type): seg_dict['name'] = seg_dict['name'].encode("utf-8") yield (seg_dict, max(0, self.first_byte) + range_start, min(range_end, range_start + self.last_byte)) self.first_byte -= seg_length self.last_byte -= seg_length
def _segment_listing_iterator(self, req, version, account, segments, recursion_depth=1): for seg_dict in segments: if config_true_value(seg_dict.get('sub_slo')): override_bytes_from_content_type(seg_dict, logger=self.slo.logger) # We handle the range stuff here so that we can be smart about # skipping unused submanifests. For example, if our first segment is a # submanifest referencing 50 MiB total, but self.first_byte falls in # the 51st MiB, then we can avoid fetching the first submanifest. # # If we were to make SegmentedIterable handle all the range # calculations, we would be unable to make this optimization. total_length = sum(int(seg['bytes']) for seg in segments) if self.first_byte is None: self.first_byte = 0 if self.last_byte is None: self.last_byte = total_length - 1 for seg_dict in segments: seg_length = int(seg_dict['bytes']) if self.first_byte >= seg_length: # don't need any bytes from this segment self.first_byte = max(self.first_byte - seg_length, -1) self.last_byte = max(self.last_byte - seg_length, -1) continue if self.last_byte < 0: # no bytes are needed from this or any future segment break if config_true_value(seg_dict.get('sub_slo')): # do this check here so that we can avoid fetching this last # manifest before raising the exception if recursion_depth >= self.max_slo_recursion_depth: raise ListingIterError("Max recursion depth exceeded") sub_path = get_valid_utf8_str(seg_dict['name']) sub_cont, sub_obj = split_path(sub_path, 2, 2, True) sub_segments = self._fetch_sub_slo_segments( req, version, account, sub_cont, sub_obj) for sub_seg_dict, sb, eb in self._segment_listing_iterator( req, version, account, sub_segments, recursion_depth=recursion_depth + 1): yield sub_seg_dict, sb, eb else: if isinstance(seg_dict['name'], unicode): seg_dict['name'] = seg_dict['name'].encode("utf-8") seg_length = int(seg_dict['bytes']) yield (seg_dict, (None if self.first_byte <= 0 else self.first_byte), (None if self.last_byte >= seg_length - 1 else self.last_byte)) self.first_byte = max(self.first_byte - seg_length, -1) self.last_byte = max(self.last_byte - seg_length, -1)