def _gen_head_range_resp(self, req_range, resp): """ Swift doesn't handle Range header for HEAD requests. So, this method generates HEAD range response from HEAD response. S3 return HEAD range response, if the value of range satisfies the conditions which are described in the following document. - http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35 """ length = int(resp.headers.get('Content-Length')) try: content_range = Range(req_range) except ValueError: return resp ranges = content_range.ranges_for_length(length) if ranges == []: raise InvalidRange() elif ranges: if len(ranges) == 1: start, end = ranges[0] resp.headers['Content-Range'] = \ content_range_header_value(start, end, length) resp.headers['Content-Length'] = (end - start) resp.status = HTTP_PARTIAL_CONTENT return resp else: # TODO: It is necessary to confirm whether need to respond to # multi-part response.(e.g. bytes=0-10,20-30) pass return resp
def fast_forward(self, num_bytes): """ Will skip num_bytes into the current ranges. :params num_bytes: the number of bytes that have already been read on this request. This will change the Range header so that the next req will start where it left off. :raises NotImplementedError: if this is a multirange request :raises ValueError: if invalid range header :raises HTTPRequestedRangeNotSatisfiable: if begin + num_bytes > end of range """ if 'Range' in self.backend_headers: req_range = Range(self.backend_headers['Range']) if len(req_range.ranges) > 1: raise NotImplementedError() begin, end = req_range.ranges.pop() if begin is None: # this is a -50 range req (last 50 bytes of file) end -= num_bytes else: begin += num_bytes if end and begin > end: raise HTTPRequestedRangeNotSatisfiable() req_range.ranges = [(begin, end)] self.backend_headers['Range'] = str(req_range) else: self.backend_headers['Range'] = 'bytes=%d-' % num_bytes
def _gen_head_range_resp(self, req_range, resp): """ Swift doesn't handle Range header for HEAD requests. So, this mothod generates HEAD range response from HEAD response. S3 return HEAD range response, if the value of range satisfies the conditions which are described in the following document. - http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35 """ length = long(resp.headers.get('Content-Length')) try: content_range = Range(req_range) except ValueError: return resp ranges = content_range.ranges_for_length(length) if ranges == []: raise InvalidRange() elif ranges: if len(ranges) == 1: start, end = ranges[0] resp.headers['Content-Range'] = \ content_range_header_value(start, end, length) resp.headers['Content-Length'] = (end - start) resp.status = HTTP_PARTIAL_CONTENT return resp else: # TODO: It is necessary to confirm whether need to respond to # multi-part response.(e.g. bytes=0-10,20-30) pass return resp
def PUT(self, req): """ Handles Upload Part and Upload Part Copy. """ if 'uploadId' not in req.params: raise InvalidArgument('ResourceType', 'partNumber', 'Unexpected query string parameter') part_number = self.parse_part_number(req) upload_id = req.params['uploadId'] _check_upload_info(req, self.app, upload_id) req.container_name += MULTIUPLOAD_SUFFIX req.object_name = '%s/%s/%d' % (req.object_name, upload_id, part_number) req_timestamp = S3Timestamp.now() req.headers['X-Timestamp'] = req_timestamp.internal source_resp = req.check_copy_source(self.app) if 'X-Amz-Copy-Source' in req.headers and \ 'X-Amz-Copy-Source-Range' in req.headers: rng = req.headers['X-Amz-Copy-Source-Range'] header_valid = True try: rng_obj = Range(rng) if len(rng_obj.ranges) != 1: header_valid = False except ValueError: header_valid = False if not header_valid: err_msg = ('The x-amz-copy-source-range value must be of the ' 'form bytes=first-last where first and last are ' 'the zero-based offsets of the first and last ' 'bytes to copy') raise InvalidArgument('x-amz-source-range', rng, err_msg) source_size = int(source_resp.headers['Content-Length']) if not rng_obj.ranges_for_length(source_size): err_msg = ('Range specified is not valid for source object ' 'of size: %s' % source_size) raise InvalidArgument('x-amz-source-range', rng, err_msg) req.headers['Range'] = rng del req.headers['X-Amz-Copy-Source-Range'] resp = req.get_response(self.app) if 'X-Amz-Copy-Source' in req.headers: resp.append_copy_resp_body(req.controller_name, req_timestamp.s3xmlformat) resp.status = 200 return resp
def parse_input(raw_data): """ Given a request will parse the body and return a list of dictionaries :raises: HTTPException on parse errors :returns: a list of dictionaries on success """ try: parsed_data = json.loads(raw_data) except ValueError: raise HTTPBadRequest("Manifest must be valid json.") req_keys = set(['path', 'etag', 'size_bytes']) opt_keys = set(['range']) try: for seg_dict in parsed_data: if (not (req_keys <= set(seg_dict) <= req_keys | opt_keys) or '/' not in seg_dict['path'].lstrip('/')): raise HTTPBadRequest('Invalid SLO Manifest File') if seg_dict.get('range'): try: seg_dict['range'] = Range('bytes=%s' % seg_dict['range']) except ValueError: raise HTTPBadRequest('Invalid SLO Manifest File') except (AttributeError, TypeError): raise HTTPBadRequest('Invalid SLO Manifest File') return parsed_data
def _get_storlet_invocation_options(self, req): options = super(StorletObjectHandler, self).\ _get_storlet_invocation_options(req) # If the request is a storlet request with an simgle input range, we # pass range parameters to storlet gateway, to realize range handling # with keepling zero copy if self.is_storlet_range_request and \ not self.is_storlet_multiple_range_request: srange = Range(req.headers['X-Storlet-Range']) # As we should include the end byte in HTTP Range, here we +1 # for the end cursor so that we can treat it as general range # (include start, and exclude end) options['range_start'] = srange.ranges[0][0] options['range_end'] = srange.ranges[0][1] + 1 return options
def PUT(self, req): """ Handles Upload Part and Upload Part Copy. """ if 'uploadId' not in req.params: raise InvalidArgument('ResourceType', 'partNumber', 'Unexpected query string parameter') try: part_number = int(req.params['partNumber']) if part_number < 1 or self.conf.max_upload_part_num < part_number: raise Exception() except Exception: err_msg = 'Part number must be an integer between 1 and %d,' \ ' inclusive' % self.conf.max_upload_part_num raise InvalidArgument('partNumber', req.params['partNumber'], err_msg) upload_id = req.params['uploadId'] _get_upload_info(req, self.app, upload_id) req.container_name += MULTIUPLOAD_SUFFIX req.object_name = '%s/%s/%d' % (req.object_name, upload_id, part_number) req_timestamp = S3Timestamp.now() req.headers['X-Timestamp'] = req_timestamp.internal source_resp = req.check_copy_source(self.app) if 'X-Amz-Copy-Source' in req.headers and \ 'X-Amz-Copy-Source-Range' in req.headers: rng = req.headers['X-Amz-Copy-Source-Range'] header_valid = True try: rng_obj = Range(rng) if len(rng_obj.ranges) != 1: header_valid = False except ValueError: header_valid = False if not header_valid: err_msg = ('The x-amz-copy-source-range value must be of the ' 'form bytes=first-last where first and last are ' 'the zero-based offsets of the first and last ' 'bytes to copy') raise InvalidArgument('x-amz-source-range', rng, err_msg) source_size = int(source_resp.headers['Content-Length']) if not rng_obj.ranges_for_length(source_size): err_msg = ('Range specified is not valid for source object ' 'of size: %s' % source_size) raise InvalidArgument('x-amz-source-range', rng, err_msg) req.headers['Range'] = rng del req.headers['X-Amz-Copy-Source-Range'] if 'X-Amz-Copy-Source' in req.headers: # Clear some problematic headers that might be on the source req.headers.update({ sysmeta_header('object', 'etag'): '', 'X-Object-Sysmeta-Swift3-Etag': '', # for legacy data 'X-Object-Sysmeta-Slo-Etag': '', 'X-Object-Sysmeta-Slo-Size': '', get_container_update_override_key('etag'): '', }) resp = req.get_response(self.app) if 'X-Amz-Copy-Source' in req.headers: resp.append_copy_resp_body(req.controller_name, req_timestamp.s3xmlformat) resp.status = 200 return resp
def _coalesce_requests(self): pending_req = pending_etag = pending_size = None try: for seg_dict in self.listing_iter: if 'raw_data' in seg_dict: if pending_req: yield pending_req, pending_etag, pending_size to_yield = seg_dict['raw_data'][ seg_dict['first_byte']:seg_dict['last_byte'] + 1] yield to_yield, None, len(seg_dict['raw_data']) pending_req = pending_etag = pending_size = None continue seg_path, seg_etag, seg_size, first_byte, last_byte = ( seg_dict['path'], seg_dict.get('hash'), seg_dict.get('bytes'), seg_dict['first_byte'], seg_dict['last_byte']) if seg_size is not None: seg_size = int(seg_size) first_byte = first_byte or 0 go_to_end = last_byte is None or (seg_size is not None and last_byte == seg_size - 1) # The "multipart-manifest=get" query param ensures that the # segment is a plain old object, not some flavor of large # object; therefore, its etag is its MD5sum and hence we can # check it. path = seg_path + '?multipart-manifest=get' seg_req = make_subrequest( self.req.environ, path=path, method='GET', headers={ 'x-auth-token': self.req.headers.get('x-auth-token') }, agent=('%(orig)s ' + self.ua_suffix), swift_source=self.swift_source) seg_req_rangeval = None if first_byte != 0 or not go_to_end: seg_req_rangeval = "%s-%s" % (first_byte, '' if go_to_end else last_byte) seg_req.headers['Range'] = "bytes=" + seg_req_rangeval # We can only coalesce if paths match and we know the segment # size (so we can check that the ranges will be allowed) if pending_req and pending_req.path == seg_req.path and \ seg_size is not None: # Make a new Range object so that we don't goof up the # existing one in case of invalid ranges. Note that a # range set with too many individual byteranges is # invalid, so we can combine N valid byteranges and 1 # valid byterange and get an invalid range set. if pending_req.range: new_range_str = str(pending_req.range) else: new_range_str = "bytes=0-%d" % (seg_size - 1) if seg_req.range: new_range_str += "," + seg_req_rangeval else: new_range_str += ",0-%d" % (seg_size - 1) if Range(new_range_str).ranges_for_length(seg_size): # Good news! We can coalesce the requests pending_req.headers['Range'] = new_range_str continue # else, Too many ranges, or too much backtracking, or ... if pending_req: yield pending_req, pending_etag, pending_size pending_req = seg_req pending_etag = seg_etag pending_size = seg_size except ListingIterError: e_type, e_value, e_traceback = sys.exc_info() if pending_req: yield pending_req, pending_etag, pending_size six.reraise(e_type, e_value, e_traceback) if pending_req: yield pending_req, pending_etag, pending_size
def parse_and_validate_input(req_body, req_path): """ Given a request body, parses it and returns a list of dictionaries. The output structure is nearly the same as the input structure, but it is not an exact copy. Given a valid input dictionary `d_in`, its corresponding output dictionary `d_out` will be as follows: * d_out['etag'] == d_in['etag'] * d_out['path'] == d_in['path'] * d_in['size_bytes'] can be a string ("12") or an integer (12), but d_out['size_bytes'] is an integer. * (optional) d_in['range'] is a string of the form "M-N", "M-", or "-N", where M and N are non-negative integers. d_out['range'] is the corresponding swob.Range object. If d_in does not have a key 'range', neither will d_out. :raises: HTTPException on parse errors or semantic errors (e.g. bogus JSON structure, syntactically invalid ranges) :returns: a list of dictionaries on success """ try: parsed_data = json.loads(req_body) except ValueError: raise HTTPBadRequest("Manifest must be valid JSON.\n") if not isinstance(parsed_data, list): raise HTTPBadRequest("Manifest must be a list.\n") # If we got here, req_path refers to an object, so this won't ever raise # ValueError. vrs, account, _junk = split_path(req_path, 3, 3, True) errors = [] for seg_index, seg_dict in enumerate(parsed_data): if not isinstance(seg_dict, dict): errors.append("Index %d: not a JSON object" % seg_index) continue missing_keys = [k for k in REQUIRED_SLO_KEYS if k not in seg_dict] if missing_keys: errors.append( "Index %d: missing keys %s" % (seg_index, ", ".join('"%s"' % (mk, ) for mk in sorted(missing_keys)))) continue extraneous_keys = [k for k in seg_dict if k not in ALLOWED_SLO_KEYS] if extraneous_keys: errors.append( "Index %d: extraneous keys %s" % (seg_index, ", ".join('"%s"' % (ek, ) for ek in sorted(extraneous_keys)))) continue if not isinstance(seg_dict['path'], six.string_types): errors.append("Index %d: \"path\" must be a string" % seg_index) continue if not (seg_dict['etag'] is None or isinstance(seg_dict['etag'], six.string_types)): errors.append("Index %d: \"etag\" must be a string or null" % seg_index) continue if '/' not in seg_dict['path'].strip('/'): errors.append( "Index %d: path does not refer to an object. Path must be of " "the form /container/object." % seg_index) continue seg_size = seg_dict['size_bytes'] if seg_size is not None: try: seg_size = int(seg_size) seg_dict['size_bytes'] = seg_size except (TypeError, ValueError): errors.append("Index %d: invalid size_bytes" % seg_index) continue if seg_size < 1: errors.append("Index %d: too small; each segment must be " "at least 1 byte." % (seg_index, )) continue obj_path = '/'.join(['', vrs, account, seg_dict['path'].lstrip('/')]) if req_path == quote(obj_path): errors.append( "Index %d: manifest must not include itself as a segment" % seg_index) continue if seg_dict.get('range'): try: seg_dict['range'] = Range('bytes=%s' % seg_dict['range']) except ValueError: errors.append("Index %d: invalid range" % seg_index) continue if len(seg_dict['range'].ranges) > 1: errors.append("Index %d: multiple ranges (only one allowed)" % seg_index) continue # If the user *told* us the object's size, we can check range # satisfiability right now. If they lied about the size, we'll # fail that validation later. if (seg_size is not None and len(seg_dict['range'].ranges_for_length(seg_size)) != 1): errors.append("Index %d: unsatisfiable range" % seg_index) continue if errors: error_message = "".join(e + "\n" for e in errors) raise HTTPBadRequest(error_message, headers={"Content-Type": "text/plain"}) return parsed_data
def _coalesce_requests(self): start_time = time.time() pending_req = None pending_etag = None pending_size = None try: for seg_path, seg_etag, seg_size, first_byte, last_byte \ in self.listing_iter: first_byte = first_byte or 0 go_to_end = last_byte is None or (seg_size is not None and last_byte == seg_size - 1) if time.time() - start_time > self.max_get_time: raise SegmentError('ERROR: While processing manifest %s, ' 'max LO GET time of %ds exceeded' % (self.name, self.max_get_time)) # Make sure that the segment is a plain old object, not some # flavor of large object, so that we can check its MD5. path = seg_path + '?multipart-manifest=get' seg_req = make_subrequest( self.req.environ, path=path, method='GET', headers={ 'x-auth-token': self.req.headers.get('x-auth-token') }, agent=('%(orig)s ' + self.ua_suffix), swift_source=self.swift_source) if first_byte != 0 or not go_to_end: seg_req.headers['Range'] = "bytes=%s-%s" % ( first_byte, '' if go_to_end else last_byte) # We can only coalesce if paths match and we know the segment # size (so we can check that the ranges will be allowed) if pending_req and pending_req.path == seg_req.path and \ seg_size is not None: new_range = '%s,%s' % ( pending_req.headers.get('Range', 'bytes=0-%s' % (seg_size - 1)), seg_req.headers['Range'].split('bytes=')[1]) if Range(new_range).ranges_for_length(seg_size): # Good news! We can coalesce the requests pending_req.headers['Range'] = new_range continue # else, Too many ranges, or too much backtracking, or ... if pending_req: yield pending_req, pending_etag, pending_size pending_req = seg_req pending_etag = seg_etag pending_size = seg_size except ListingIterError: e_type, e_value, e_traceback = sys.exc_info() if time.time() - start_time > self.max_get_time: raise SegmentError('ERROR: While processing manifest %s, ' 'max LO GET time of %ds exceeded' % (self.name, self.max_get_time)) if pending_req: yield pending_req, pending_etag, pending_size six.reraise(e_type, e_value, e_traceback) if time.time() - start_time > self.max_get_time: raise SegmentError('ERROR: While processing manifest %s, ' 'max LO GET time of %ds exceeded' % (self.name, self.max_get_time)) if pending_req: yield pending_req, pending_etag, pending_size
def PUT(self, req): """ Handles Upload Part and Upload Part Copy. """ if 'uploadId' not in req.params: raise InvalidArgument('ResourceType', 'partNumber', 'Unexpected query string parameter') try: part_number = int(req.params['partNumber']) if part_number < 1 or CONF.max_upload_part_num < part_number: raise Exception() except Exception: err_msg = 'Part number must be an integer between 1 and %d,' \ ' inclusive' % CONF.max_upload_part_num raise InvalidArgument('partNumber', req.params['partNumber'], err_msg) data = req.body upload_id = req.params['uploadId'] _check_upload_info(req, self.app, upload_id) req.container_name += MULTIUPLOAD_SUFFIX req.object_name = '%s/%s/%d' % (req.object_name, upload_id, part_number) req_timestamp = OssTimestamp.now() req.headers['X-Timestamp'] = req_timestamp.internal source_resp = req.check_copy_source(self.app) if 'x-oss-copy-source' in req.headers and \ 'x-oss-copy-source-range' in req.headers: rng = req.headers['x-oss-copy-source-range'] header_valid = True try: rng_obj = Range(rng) if len(rng_obj.ranges) != 1: header_valid = False except ValueError: header_valid = False if not header_valid: err_msg = ('The x-oss-copy-source-range value must be of the ' 'form bytes=first-last where first and last are ' 'the zero-based offsets of the first and last ' 'bytes to copy') raise InvalidArgument('x-oss-source-range', rng, err_msg) source_size = int(source_resp.headers['Content-Length']) if not rng_obj.ranges_for_length(source_size): err_msg = ('Range specified is not valid for source object ' 'of size: %s' % source_size) raise InvalidArgument('x-oss-source-range', rng, err_msg) req.headers['range'] = rng del req.headers['x-oss-copy-source-range'] resp = req.get_response(self.app) do_crc64 = crcmod.mkCrcFun(0x142F0E1EBA9EA3693L, initCrc=0L, xorOut=0xffffffffffffffffL, rev=True) if 'x-oss-copy-source' in req.headers: resp.append_copy_resp_body(req.controller_name, req_timestamp.ossxmlformat) resp.status = 200 resp.headers['x-oss-hash-crc64ecma']=do_crc64(data) return resp
def PUT(self, req): """ Handles Upload Part and Upload Part Copy. """ if 'uploadId' not in req.params: raise InvalidArgument('ResourceType', 'partNumber', 'Unexpected query string parameter') try: part_number = int(req.params['partNumber']) if part_number < 1 or CONF.max_upload_part_num < part_number: raise Exception() except Exception: err_msg = 'Part number must be an integer between 1 and %d,' \ ' inclusive' % CONF.max_upload_part_num raise InvalidArgument('partNumber', req.params['partNumber'], err_msg) upload_id = req.params['uploadId'] _check_upload_info(req, self.app, upload_id) req.container_name += MULTIUPLOAD_SUFFIX req.object_name = '%s/%s/%d' % (req.object_name, upload_id, part_number) req_timestamp = S3Timestamp.now() req.headers['X-Timestamp'] = req_timestamp.internal source_resp = req.check_copy_source(self.app) if 'X-Amz-Copy-Source' in req.headers and \ 'X-Amz-Copy-Source-Range' in req.headers: rng = req.headers['X-Amz-Copy-Source-Range'] header_valid = True try: rng_obj = Range(rng) if len(rng_obj.ranges) != 1: header_valid = False except ValueError: header_valid = False if not header_valid: err_msg = ('The x-amz-copy-source-range value must be of the ' 'form bytes=first-last where first and last are ' 'the zero-based offsets of the first and last ' 'bytes to copy') raise InvalidArgument('x-amz-source-range', rng, err_msg) source_size = int(source_resp.headers['Content-Length']) if not rng_obj.ranges_for_length(source_size): err_msg = ('Range specified is not valid for source object ' 'of size: %s' % source_size) raise InvalidArgument('x-amz-source-range', rng, err_msg) req.headers['Range'] = rng del req.headers['X-Amz-Copy-Source-Range'] resp = req.get_response(self.app) if 'X-Amz-Copy-Source' in req.headers: resp.append_copy_resp_body(req.controller_name, req_timestamp.s3xmlformat) resp.status = 200 return resp
def is_storlet_multiple_range_request(self): if not self.is_storlet_range_request: return False r = self.request.headers['X-Storlet-Range'] return len(Range(r).ranges) > 1