def get_or_head_response(self, req, resp_headers, resp_iter): with closing_if_possible(resp_iter): resp_body = "".join(resp_iter) try: segments = json.loads(resp_body) except ValueError: segments = [] etag = md5() content_length = 0 for seg_dict in segments: if seg_dict.get("range"): etag.update("%s:%s;" % (seg_dict["hash"], seg_dict["range"])) else: etag.update(seg_dict["hash"]) if config_true_value(seg_dict.get("sub_slo")): override_bytes_from_content_type(seg_dict, logger=self.slo.logger) content_length += self._segment_length(seg_dict) response_headers = [(h, v) for h, v in resp_headers if h.lower() not in ("etag", "content-length")] response_headers.append(("Content-Length", str(content_length))) response_headers.append(("Etag", '"%s"' % etag.hexdigest())) if req.method == "HEAD": return self._manifest_head_response(req, response_headers) else: return self._manifest_get_response(req, content_length, response_headers, segments)
def get_or_head_response(self, req, resp_headers, resp_iter): segments = self._get_manifest_read(resp_iter) etag = md5() content_length = 0 for seg_dict in segments: if seg_dict.get('range'): etag.update('%s:%s;' % (seg_dict['hash'], seg_dict['range'])) else: etag.update(seg_dict['hash']) if config_true_value(seg_dict.get('sub_slo')): override_bytes_from_content_type( seg_dict, logger=self.slo.logger) content_length += self._segment_length(seg_dict) response_headers = [(h, v) for h, v in resp_headers if h.lower() not in ('etag', 'content-length')] response_headers.append(('Content-Length', str(content_length))) response_headers.append(('Etag', '"%s"' % etag.hexdigest())) if req.method == 'HEAD': return self._manifest_head_response(req, response_headers) else: return self._manifest_get_response( req, content_length, response_headers, segments)
def update_data_record(self, record, versions=False): if 'subdir' in record: return {'subdir': record['name']} props = record.get('properties', {}) # This metadata is added by encryption middleware. if 'x-object-sysmeta-container-update-override-etag' in props: hash_ = props['x-object-sysmeta-container-update-override-etag'] else: hash_ = record.get('hash') if hash_ is not None: hash_ = hash_.lower() response = { 'name': record['name'], 'bytes': record['size'], 'last_modified': Timestamp(record['mtime']).isoformat, 'content_type': record.get('mime_type', 'application/octet-stream') } if hash_: response['hash'] = hash_ else: response['hash'] = '' if record.get('deleted', False): response['content_type'] = DELETE_MARKER_CONTENT_TYPE if versions: response['version'] = record.get('version', 'null') override_bytes_from_content_type(response) return response
def update_data_record(self, record): """ Perform any mutations to container listing records that are common to all serialization formats, and returns it as a dict. Converts created time to iso timestamp. Replaces size with 'swift_bytes' content type parameter. :params record: object entry record :returns: modified record """ if isinstance(record, ShardRange): created = record.timestamp response = dict(record) else: (name, created, size, content_type, etag) = record[:5] name_ = name.decode('utf8') if six.PY2 else name if content_type is None: return {'subdir': name_} response = { 'bytes': size, 'hash': etag, 'name': name_, 'content_type': content_type } override_bytes_from_content_type(response, logger=self.logger) response['last_modified'] = Timestamp(created).isoformat return response
def get_or_head_response(self, req, resp_headers, resp_iter): with closing_if_possible(resp_iter): resp_body = ''.join(resp_iter) try: segments = json.loads(resp_body) except ValueError: segments = [] etag = md5() content_length = 0 for seg_dict in segments: if seg_dict.get('range'): etag.update('%s:%s;' % (seg_dict['hash'], seg_dict['range'])) else: etag.update(seg_dict['hash']) if config_true_value(seg_dict.get('sub_slo')): override_bytes_from_content_type( seg_dict, logger=self.slo.logger) content_length += self._segment_length(seg_dict) response_headers = [(h, v) for h, v in resp_headers if h.lower() not in ('etag', 'content-length')] response_headers.append(('Content-Length', str(content_length))) response_headers.append(('Etag', '"%s"' % etag.hexdigest())) if req.method == 'HEAD': return self._manifest_head_response(req, response_headers) else: return self._manifest_get_response( req, content_length, response_headers, segments)
def get_or_head_response(self, req, resp_headers, resp_iter): resp_body = ''.join(resp_iter) try: segments = json.loads(resp_body) except ValueError: segments = [] etag = md5() content_length = 0 for seg_dict in segments: etag.update(seg_dict['hash']) if config_true_value(seg_dict.get('sub_slo')): override_bytes_from_content_type(seg_dict, logger=self.slo.logger) content_length += int(seg_dict['bytes']) response_headers = [(h, v) for h, v in resp_headers if h.lower() not in ('etag', 'content-length')] response_headers.append(('Content-Length', str(content_length))) response_headers.append(('Etag', '"%s"' % etag.hexdigest())) if req.method == 'HEAD': return self._manifest_head_response(req, response_headers) else: return self._manifest_get_response(req, content_length, response_headers, segments)
def update_data_record(self, record, list_meta=False): """ Perform any mutations to container listing records that are common to all serialization formats, and returns it as a dict. Converts created time to iso timestamp. Replaces size with 'swift_bytes' content type parameter. :params record: object entry record :returns: modified record """ (name, created, size, content_type, etag, metadata) = record if content_type is None: return {'subdir': name} response = {'bytes': size, 'hash': etag, 'name': name, 'content_type': content_type} if list_meta: metadata = json.loads(metadata) utf8encodekeys(metadata) response['metadata'] = metadata last_modified = datetime.utcfromtimestamp(float(created)).isoformat() # python isoformat() doesn't include msecs when zero if len(last_modified) < len("1970-01-01T00:00:00.000000"): last_modified += ".000000" response['last_modified'] = last_modified override_bytes_from_content_type(response, logger=self.logger) return response
def update_data_record(self, record): """ Perform any mutations to container listing records that are common to all serialization formats, and returns it as a dict. Converts created time to iso timestamp. Replaces size with 'swift_bytes' content type parameter. :params record: object entry record :returns: modified record """ if isinstance(record, ShardRange): created = record.timestamp response = dict(record) else: (name, created, size, content_type, etag) = record[:5] name_ = name.decode('utf8') if six.PY2 else name if content_type is None: return {'subdir': name_} response = { 'bytes': size, 'hash': etag, 'name': name_, 'content_type': content_type} override_bytes_from_content_type(response, logger=self.logger) response['last_modified'] = Timestamp(created).isoformat return response
def get_or_head_response(self, req, resp_headers, resp_iter): segments = self._get_manifest_read(resp_iter) slo_etag = None content_length = None response_headers = [] for header, value in resp_headers: lheader = header.lower() if lheader not in ('etag', 'content-length'): response_headers.append((header, value)) if lheader == SYSMETA_SLO_ETAG: slo_etag = value elif lheader == SYSMETA_SLO_SIZE: # it's from sysmeta, so we don't worry about non-integer # values here content_length = int(value) # Prep to calculate content_length & etag if necessary if slo_etag is None: calculated_etag = md5() if content_length is None: calculated_content_length = 0 for seg_dict in segments: # Decode any inlined data; it's important that we do this *before* # calculating the segment length and etag if 'data' in seg_dict: seg_dict['raw_data'] = base64.b64decode(seg_dict.pop('data')) if slo_etag is None: if 'raw_data' in seg_dict: calculated_etag.update( md5(seg_dict['raw_data']).hexdigest()) elif seg_dict.get('range'): calculated_etag.update( '%s:%s;' % (seg_dict['hash'], seg_dict['range'])) else: calculated_etag.update(seg_dict['hash']) if content_length is None: if config_true_value(seg_dict.get('sub_slo')): override_bytes_from_content_type(seg_dict, logger=self.slo.logger) calculated_content_length += self._segment_length(seg_dict) if slo_etag is None: slo_etag = calculated_etag.hexdigest() if content_length is None: content_length = calculated_content_length response_headers.append(('Content-Length', str(content_length))) response_headers.append(('Etag', '"%s"' % slo_etag)) if req.method == 'HEAD': return self._manifest_head_response(req, response_headers) else: return self._manifest_get_response(req, content_length, response_headers, segments)
def _segment_listing_iterator(self, req, version, account, segments, recursion_depth=1): for seg_dict in segments: if config_true_value(seg_dict.get('sub_slo')): override_bytes_from_content_type(seg_dict, logger=self.slo.logger) # We handle the range stuff here so that we can be smart about # skipping unused submanifests. For example, if our first segment is a # submanifest referencing 50 MiB total, but self.first_byte falls in # the 51st MiB, then we can avoid fetching the first submanifest. # # If we were to make SegmentedIterable handle all the range # calculations, we would be unable to make this optimization. total_length = sum(int(seg['bytes']) for seg in segments) if self.first_byte is None: self.first_byte = 0 if self.last_byte is None: self.last_byte = total_length - 1 for seg_dict in segments: seg_length = int(seg_dict['bytes']) if self.first_byte >= seg_length: # don't need any bytes from this segment self.first_byte = max(self.first_byte - seg_length, -1) self.last_byte = max(self.last_byte - seg_length, -1) continue if self.last_byte < 0: # no bytes are needed from this or any future segment break if config_true_value(seg_dict.get('sub_slo')): # do this check here so that we can avoid fetching this last # manifest before raising the exception if recursion_depth >= self.max_slo_recursion_depth: raise ListingIterError("Max recursion depth exceeded") sub_path = get_valid_utf8_str(seg_dict['name']) sub_cont, sub_obj = split_path(sub_path, 2, 2, True) sub_segments = self._fetch_sub_slo_segments( req, version, account, sub_cont, sub_obj) for sub_seg_dict, sb, eb in self._segment_listing_iterator( req, version, account, sub_segments, recursion_depth=recursion_depth + 1): yield sub_seg_dict, sb, eb else: if isinstance(seg_dict['name'], unicode): seg_dict['name'] = seg_dict['name'].encode("utf-8") seg_length = int(seg_dict['bytes']) yield (seg_dict, (None if self.first_byte <= 0 else self.first_byte), (None if self.last_byte >= seg_length - 1 else self.last_byte)) self.first_byte = max(self.first_byte - seg_length, -1) self.last_byte = max(self.last_byte - seg_length, -1)
def update_data_record(self, record): if 'subdir' in record: return {'subdir': record['name']} sysmeta = extract_sysmeta(record.get('system_metadata', None)) response = {'name': record['name'], 'bytes': record['size'], 'hash': record['hash'].lower(), 'last_modified': Timestamp(record['ctime']).isoformat, 'content_type': sysmeta.get('mime-type', 'application/octet-stream')} override_bytes_from_content_type(response) return response
def update_data_record(self, record, versions=False): if 'subdir' in record: return {'subdir': record['name']} response = {'name': record['name'], 'bytes': record['size'], 'hash': record['hash'].lower(), 'last_modified': Timestamp(record['ctime']).isoformat, 'content_type': record.get( 'mime_type', 'application/octet-stream')} if record.get('deleted', False): response['content_type'] = DELETE_MARKER_CONTENT_TYPE if versions: response['version'] = record.get('version', 'null') override_bytes_from_content_type(response) return response
def get_or_head_response(self, req, resp_headers, resp_iter): segments = self._get_manifest_read(resp_iter) slo_etag = None content_length = None response_headers = [] for header, value in resp_headers: lheader = header.lower() if lheader == SYSMETA_SLO_ETAG: slo_etag = value elif lheader == SYSMETA_SLO_SIZE: content_length = value elif lheader not in ('etag', 'content-length'): response_headers.append((header, value)) if slo_etag is None or content_length is None: etag = md5() content_length = 0 for seg_dict in segments: if seg_dict.get('range'): etag.update('%s:%s;' % (seg_dict['hash'], seg_dict['range'])) else: etag.update(seg_dict['hash']) if config_true_value(seg_dict.get('sub_slo')): override_bytes_from_content_type( seg_dict, logger=self.slo.logger) content_length += self._segment_length(seg_dict) slo_etag = etag.hexdigest() response_headers.append(('Content-Length', str(content_length))) response_headers.append(('Etag', '"%s"' % slo_etag)) if req.method == 'HEAD': return self._manifest_head_response(req, response_headers) else: return self._manifest_get_response( req, content_length, response_headers, segments)
def get_or_head_response(self, req, resp_headers, resp_iter): segments = self._get_manifest_read(resp_iter) slo_etag = None content_length = None response_headers = [] for header, value in resp_headers: lheader = header.lower() if lheader == SYSMETA_SLO_ETAG: slo_etag = value elif lheader == SYSMETA_SLO_SIZE: content_length = value elif lheader not in ('etag', 'content-length'): response_headers.append((header, value)) if slo_etag is None or content_length is None: etag = md5() content_length = 0 for seg_dict in segments: if seg_dict.get('range'): etag.update('%s:%s;' % (seg_dict['hash'], seg_dict['range'])) else: etag.update(seg_dict['hash']) if config_true_value(seg_dict.get('sub_slo')): override_bytes_from_content_type(seg_dict, logger=self.slo.logger) content_length += self._segment_length(seg_dict) slo_etag = etag.hexdigest() response_headers.append(('Content-Length', str(content_length))) response_headers.append(('Etag', '"%s"' % slo_etag)) if req.method == 'HEAD': return self._manifest_head_response(req, response_headers) else: return self._manifest_get_response(req, content_length, response_headers, segments)
def _segment_listing_iterator(self, req, version, account, segments, byteranges): for seg_dict in segments: if config_true_value(seg_dict.get('sub_slo')): override_bytes_from_content_type(seg_dict, logger=self.slo.logger) # We handle the range stuff here so that we can be smart about # skipping unused submanifests. For example, if our first segment is a # submanifest referencing 50 MiB total, but start_byte falls in # the 51st MiB, then we can avoid fetching the first submanifest. # # If we were to make SegmentedIterable handle all the range # calculations, we would be unable to make this optimization. total_length = sum(self._segment_length(seg) for seg in segments) if not byteranges: byteranges = [(0, total_length - 1)] # Cache segments from sub-SLOs in case more than one byterange # includes data from a particular sub-SLO. We only cache a few sets # of segments so that a malicious user cannot build a giant SLO tree # and then GET it to run the proxy out of memory. # # LRUCache is a little awkward to use this way, but it beats doing # things manually. # # 20 is sort of an arbitrary choice; it's twice our max recursion # depth, so we know this won't expand memory requirements by too # much. cached_fetch_sub_slo_segments = \ LRUCache(maxsize=20)(self._fetch_sub_slo_segments) for first_byte, last_byte in byteranges: byterange_listing_iter = self._byterange_listing_iterator( req, version, account, segments, first_byte, last_byte, cached_fetch_sub_slo_segments) for seg_info in byterange_listing_iter: yield seg_info
def update_data_record(self, record, versions=False): if 'subdir' in record: return {'subdir': record['name']} props = record.get('properties', {}) # This metadata is added by encryption middleware. if 'x-object-sysmeta-container-update-override-etag' in props: hash_ = props['x-object-sysmeta-container-update-override-etag'] else: hash_ = record['hash'].lower() response = {'name': record['name'], 'bytes': record['size'], 'hash': hash_, 'last_modified': Timestamp(record['ctime']).isoformat, 'content_type': record.get( 'mime_type', 'application/octet-stream')} if record.get('deleted', False): response['content_type'] = DELETE_MARKER_CONTENT_TYPE if versions: response['version'] = record.get('version', 'null') override_bytes_from_content_type(response) return response
def _segment_listing_iterator(self, req, version, account, segments, recursion_depth=1): for seg_dict in segments: if config_true_value(seg_dict.get('sub_slo')): override_bytes_from_content_type(seg_dict, logger=self.slo.logger) # We handle the range stuff here so that we can be smart about # skipping unused submanifests. For example, if our first segment is a # submanifest referencing 50 MiB total, but start_byte falls in # the 51st MiB, then we can avoid fetching the first submanifest. # # If we were to make SegmentedIterable handle all the range # calculations, we would be unable to make this optimization. total_length = sum(self._segment_length(seg) for seg in segments) if self.first_byte is None: self.first_byte = 0 if self.last_byte is None: self.last_byte = total_length - 1 last_sub_path = None for seg_dict in segments: seg_length = self._segment_length(seg_dict) if self.first_byte >= seg_length: # don't need any bytes from this segment self.first_byte -= seg_length self.last_byte -= seg_length continue if self.last_byte < 0: # no bytes are needed from this or any future segment break range = seg_dict.get('range') if range is None: range_start, range_end = 0, seg_length - 1 else: # We already validated and supplied concrete values # for the range on upload range_start, range_end = map(int, range.split('-')) if config_true_value(seg_dict.get('sub_slo')): # do this check here so that we can avoid fetching this last # manifest before raising the exception if recursion_depth >= self.max_slo_recursion_depth: raise ListingIterError("Max recursion depth exceeded") sub_path = get_valid_utf8_str(seg_dict['name']) sub_cont, sub_obj = split_path(sub_path, 2, 2, True) if last_sub_path != sub_path: sub_segments = self._fetch_sub_slo_segments( req, version, account, sub_cont, sub_obj) last_sub_path = sub_path # Use the existing machinery to slice into the sub-SLO. # This requires that we save off our current state, and # restore at the other end. orig_start, orig_end = self.first_byte, self.last_byte self.first_byte = range_start + max(0, self.first_byte) self.last_byte = min(range_end, range_start + self.last_byte) for sub_seg_dict, sb, eb in self._segment_listing_iterator( req, version, account, sub_segments, recursion_depth=recursion_depth + 1): yield sub_seg_dict, sb, eb # Restore the first/last state self.first_byte, self.last_byte = orig_start, orig_end else: if isinstance(seg_dict['name'], six.text_type): seg_dict['name'] = seg_dict['name'].encode("utf-8") yield (seg_dict, max(0, self.first_byte) + range_start, min(range_end, range_start + self.last_byte)) self.first_byte -= seg_length self.last_byte -= seg_length
def _segment_listing_iterator(self, req, version, account, segments, recursion_depth=1): for seg_dict in segments: if config_true_value(seg_dict.get('sub_slo')): override_bytes_from_content_type(seg_dict, logger=self.slo.logger) # We handle the range stuff here so that we can be smart about # skipping unused submanifests. For example, if our first segment is a # submanifest referencing 50 MiB total, but start_byte falls in # the 51st MiB, then we can avoid fetching the first submanifest. # # If we were to make SegmentedIterable handle all the range # calculations, we would be unable to make this optimization. total_length = sum(self._segment_length(seg) for seg in segments) if self.first_byte is None: self.first_byte = 0 if self.last_byte is None: self.last_byte = total_length - 1 last_sub_path = None for seg_dict in segments: seg_length = self._segment_length(seg_dict) if self.first_byte >= seg_length: # don't need any bytes from this segment self.first_byte -= seg_length self.last_byte -= seg_length continue if self.last_byte < 0: # no bytes are needed from this or any future segment break seg_range = seg_dict.get('range') if seg_range is None: range_start, range_end = 0, seg_length - 1 else: # We already validated and supplied concrete values # for the range on upload range_start, range_end = map(int, seg_range.split('-')) if config_true_value(seg_dict.get('sub_slo')): # do this check here so that we can avoid fetching this last # manifest before raising the exception if recursion_depth >= self.max_slo_recursion_depth: raise ListingIterError("Max recursion depth exceeded") sub_path = get_valid_utf8_str(seg_dict['name']) sub_cont, sub_obj = split_path(sub_path, 2, 2, True) if last_sub_path != sub_path: sub_segments = self._fetch_sub_slo_segments( req, version, account, sub_cont, sub_obj) last_sub_path = sub_path # Use the existing machinery to slice into the sub-SLO. # This requires that we save off our current state, and # restore at the other end. orig_start, orig_end = self.first_byte, self.last_byte self.first_byte = range_start + max(0, self.first_byte) self.last_byte = min(range_end, range_start + self.last_byte) for sub_seg_dict, sb, eb in self._segment_listing_iterator( req, version, account, sub_segments, recursion_depth=recursion_depth + 1): yield sub_seg_dict, sb, eb # Restore the first/last state self.first_byte, self.last_byte = orig_start, orig_end else: if isinstance(seg_dict['name'], six.text_type): seg_dict['name'] = seg_dict['name'].encode("utf-8") yield (seg_dict, max(0, self.first_byte) + range_start, min(range_end, range_start + self.last_byte)) self.first_byte -= seg_length self.last_byte -= seg_length