def _byte_counting_iter(self): # Checks that we give the client the right number of bytes. Raises # SegmentError if the number of bytes is wrong. bytes_left = self.response_body_length for seg_name, chunk in self._requests_to_bytes_iter(): if bytes_left is None: yield chunk elif bytes_left >= len(chunk): yield chunk bytes_left -= len(chunk) else: yield chunk[:bytes_left] bytes_left -= len(chunk) raise SegmentError( 'Too many bytes for %(name)s; truncating in ' '%(seg)s with %(left)d bytes left' % { 'name': self.name, 'seg': seg_name, 'left': bytes_left }) if bytes_left: raise SegmentError('Not enough bytes for %s; closing connection' % self.name)
def _time_limited_iter(self): # Makes sure a GET response doesn't take more than self.max_get_time # seconds to process. Raises an exception if things take too long. start_time = time.time() for chunk in self._byte_counting_iter(): now = time.time() yield chunk if now - start_time > self.max_get_time: raise SegmentError('While processing manifest %s, ' 'max LO GET time of %ds exceeded' % (self.name, self.max_get_time))
def _internal_iter(self): bytes_left = self.response_body_length try: for seg_req, seg_etag, seg_size in self._coalesce_requests(): seg_resp = seg_req.get_response(self.app) if not is_success(seg_resp.status_int): close_if_possible(seg_resp.app_iter) raise SegmentError( 'While processing manifest %s, ' 'got %d while retrieving %s' % (self.name, seg_resp.status_int, seg_req.path)) elif ((seg_etag and (seg_resp.etag != seg_etag)) or (seg_size and (seg_resp.content_length != seg_size) and not seg_req.range)): # The content-length check is for security reasons. Seems # possible that an attacker could upload a >1mb object and # then replace it with a much smaller object with same # etag. Then create a big nested SLO that calls that # object many times which would hammer our obj servers. If # this is a range request, don't check content-length # because it won't match. close_if_possible(seg_resp.app_iter) raise SegmentError( 'Object segment no longer valid: ' '%(path)s etag: %(r_etag)s != %(s_etag)s or ' '%(r_size)s != %(s_size)s.' % { 'path': seg_req.path, 'r_etag': seg_resp.etag, 'r_size': seg_resp.content_length, 's_etag': seg_etag, 's_size': seg_size }) else: self.current_resp = seg_resp seg_hash = None if seg_resp.etag and not seg_req.headers.get('Range'): # Only calculate the MD5 if it we can use it to validate seg_hash = hashlib.md5() document_iters = maybe_multipart_byteranges_to_document_iters( seg_resp.app_iter, seg_resp.headers['Content-Type']) for chunk in itertools.chain.from_iterable(document_iters): if seg_hash: seg_hash.update(chunk) if bytes_left is None: yield chunk elif bytes_left >= len(chunk): yield chunk bytes_left -= len(chunk) else: yield chunk[:bytes_left] bytes_left -= len(chunk) close_if_possible(seg_resp.app_iter) raise SegmentError( 'Too many bytes for %(name)s; truncating in ' '%(seg)s with %(left)d bytes left' % { 'name': self.name, 'seg': seg_req.path, 'left': bytes_left }) close_if_possible(seg_resp.app_iter) if seg_hash and seg_hash.hexdigest() != seg_resp.etag: raise SegmentError( "Bad MD5 checksum in %(name)s for %(seg)s: headers had" " %(etag)s, but object MD5 was actually %(actual)s" % { 'seg': seg_req.path, 'etag': seg_resp.etag, 'name': self.name, 'actual': seg_hash.hexdigest() }) if bytes_left: raise SegmentError( 'Not enough bytes for %s; closing connection' % self.name) except (ListingIterError, SegmentError) as err: self.logger.error(err) if not self.validated_first_segment: raise finally: if self.current_resp: close_if_possible(self.current_resp.app_iter)
def _requests_to_bytes_iter(self): # Take the requests out of self._coalesce_requests, actually make # the requests, and generate the bytes from the responses. # # Yields 2-tuples (segment-name, byte-chunk). The segment name is # used for logging. for data_or_req, seg_etag, seg_size in self._coalesce_requests(): if isinstance(data_or_req, bytes): # ugly, awful overloading yield ('data segment', data_or_req) continue seg_req = data_or_req seg_resp = seg_req.get_response(self.app) if not is_success(seg_resp.status_int): close_if_possible(seg_resp.app_iter) raise SegmentError( 'While processing manifest %s, ' 'got %d while retrieving %s' % (self.name, seg_resp.status_int, seg_req.path)) elif ( (seg_etag and (seg_resp.etag != seg_etag)) or (seg_size and (seg_resp.content_length != seg_size) and not seg_req.range)): # The content-length check is for security reasons. Seems # possible that an attacker could upload a >1mb object and # then replace it with a much smaller object with same # etag. Then create a big nested SLO that calls that # object many times which would hammer our obj servers. If # this is a range request, don't check content-length # because it won't match. close_if_possible(seg_resp.app_iter) raise SegmentError( 'Object segment no longer valid: ' '%(path)s etag: %(r_etag)s != %(s_etag)s or ' '%(r_size)s != %(s_size)s.' % { 'path': seg_req.path, 'r_etag': seg_resp.etag, 'r_size': seg_resp.content_length, 's_etag': seg_etag, 's_size': seg_size }) else: self.current_resp = seg_resp seg_hash = None if seg_resp.etag and not seg_req.headers.get('Range'): # Only calculate the MD5 if it we can use it to validate seg_hash = hashlib.md5() document_iters = maybe_multipart_byteranges_to_document_iters( seg_resp.app_iter, seg_resp.headers['Content-Type']) for chunk in itertools.chain.from_iterable(document_iters): if seg_hash: seg_hash.update(chunk) yield (seg_req.path, chunk) close_if_possible(seg_resp.app_iter) if seg_hash and seg_hash.hexdigest() != seg_resp.etag: raise SegmentError( "Bad MD5 checksum in %(name)s for %(seg)s: headers had" " %(etag)s, but object MD5 was actually %(actual)s" % { 'seg': seg_req.path, 'etag': seg_resp.etag, 'name': self.name, 'actual': seg_hash.hexdigest() })
def _coalesce_requests(self): start_time = time.time() pending_req = None pending_etag = None pending_size = None try: for seg_path, seg_etag, seg_size, first_byte, last_byte \ in self.listing_iter: first_byte = first_byte or 0 go_to_end = last_byte is None or (seg_size is not None and last_byte == seg_size - 1) if time.time() - start_time > self.max_get_time: raise SegmentError('While processing manifest %s, ' 'max LO GET time of %ds exceeded' % (self.name, self.max_get_time)) # The "multipart-manifest=get" query param ensures that the # segment is a plain old object, not some flavor of large # object; therefore, its etag is its MD5sum and hence we can # check it. path = seg_path + '?multipart-manifest=get' seg_req = make_subrequest( self.req.environ, path=path, method='GET', headers={ 'x-auth-token': self.req.headers.get('x-auth-token') }, agent=('%(orig)s ' + self.ua_suffix), swift_source=self.swift_source) seg_req_rangeval = None if first_byte != 0 or not go_to_end: seg_req_rangeval = "%s-%s" % (first_byte, '' if go_to_end else last_byte) seg_req.headers['Range'] = "bytes=" + seg_req_rangeval # We can only coalesce if paths match and we know the segment # size (so we can check that the ranges will be allowed) if pending_req and pending_req.path == seg_req.path and \ seg_size is not None: # Make a new Range object so that we don't goof up the # existing one in case of invalid ranges. Note that a # range set with too many individual byteranges is # invalid, so we can combine N valid byteranges and 1 # valid byterange and get an invalid range set. if pending_req.range: new_range_str = str(pending_req.range) else: new_range_str = "bytes=0-%d" % (seg_size - 1) if seg_req.range: new_range_str += "," + seg_req_rangeval else: new_range_str += ",0-%d" % (seg_size - 1) if Range(new_range_str).ranges_for_length(seg_size): # Good news! We can coalesce the requests pending_req.headers['Range'] = new_range_str continue # else, Too many ranges, or too much backtracking, or ... if pending_req: yield pending_req, pending_etag, pending_size pending_req = seg_req pending_etag = seg_etag pending_size = seg_size except ListingIterError: e_type, e_value, e_traceback = sys.exc_info() if time.time() - start_time > self.max_get_time: raise SegmentError('While processing manifest %s, ' 'max LO GET time of %ds exceeded' % (self.name, self.max_get_time)) if pending_req: yield pending_req, pending_etag, pending_size six.reraise(e_type, e_value, e_traceback) if time.time() - start_time > self.max_get_time: raise SegmentError('While processing manifest %s, ' 'max LO GET time of %ds exceeded' % (self.name, self.max_get_time)) if pending_req: yield pending_req, pending_etag, pending_size
def _coalesce_requests(self): start_time = time.time() pending_req = None pending_etag = None pending_size = None try: for seg_path, seg_etag, seg_size, first_byte, last_byte \ in self.listing_iter: first_byte = first_byte or 0 go_to_end = last_byte is None or (seg_size is not None and last_byte == seg_size - 1) if time.time() - start_time > self.max_get_time: raise SegmentError('ERROR: While processing manifest %s, ' 'max LO GET time of %ds exceeded' % (self.name, self.max_get_time)) # Make sure that the segment is a plain old object, not some # flavor of large object, so that we can check its MD5. path = seg_path + '?multipart-manifest=get' seg_req = make_subrequest( self.req.environ, path=path, method='GET', headers={ 'x-auth-token': self.req.headers.get('x-auth-token') }, agent=('%(orig)s ' + self.ua_suffix), swift_source=self.swift_source) if first_byte != 0 or not go_to_end: seg_req.headers['Range'] = "bytes=%s-%s" % ( first_byte, '' if go_to_end else last_byte) # We can only coalesce if paths match and we know the segment # size (so we can check that the ranges will be allowed) if pending_req and pending_req.path == seg_req.path and \ seg_size is not None: new_range = '%s,%s' % ( pending_req.headers.get('Range', 'bytes=0-%s' % (seg_size - 1)), seg_req.headers['Range'].split('bytes=')[1]) if Range(new_range).ranges_for_length(seg_size): # Good news! We can coalesce the requests pending_req.headers['Range'] = new_range continue # else, Too many ranges, or too much backtracking, or ... if pending_req: yield pending_req, pending_etag, pending_size pending_req = seg_req pending_etag = seg_etag pending_size = seg_size except ListingIterError: e_type, e_value, e_traceback = sys.exc_info() if time.time() - start_time > self.max_get_time: raise SegmentError('ERROR: While processing manifest %s, ' 'max LO GET time of %ds exceeded' % (self.name, self.max_get_time)) if pending_req: yield pending_req, pending_etag, pending_size six.reraise(e_type, e_value, e_traceback) if time.time() - start_time > self.max_get_time: raise SegmentError('ERROR: While processing manifest %s, ' 'max LO GET time of %ds exceeded' % (self.name, self.max_get_time)) if pending_req: yield pending_req, pending_etag, pending_size
def _internal_iter(self): start_time = time.time() bytes_left = self.response_body_length try: for seg_path, seg_etag, seg_size, first_byte, last_byte \ in self.listing_iter: if time.time() - start_time > self.max_get_time: raise SegmentError('ERROR: While processing manifest %s, ' 'max LO GET time of %ds exceeded' % (self.name, self.max_get_time)) # Make sure that the segment is a plain old object, not some # flavor of large object, so that we can check its MD5. path = seg_path + '?multipart-manifest=get' seg_req = make_subrequest( self.req.environ, path=path, method='GET', headers={ 'x-auth-token': self.req.headers.get('x-auth-token') }, agent=('%(orig)s ' + self.ua_suffix), swift_source=self.swift_source) if first_byte is not None or last_byte is not None: seg_req.headers['Range'] = "bytes=%s-%s" % ( # The 0 is to avoid having a range like "bytes=-10", # which actually means the *last* 10 bytes. '0' if first_byte is None else first_byte, '' if last_byte is None else last_byte) seg_resp = seg_req.get_response(self.app) if not is_success(seg_resp.status_int): close_if_possible(seg_resp.app_iter) raise SegmentError( 'ERROR: While processing manifest %s, ' 'got %d while retrieving %s' % (self.name, seg_resp.status_int, seg_path)) elif ((seg_etag and (seg_resp.etag != seg_etag)) or (seg_size and (seg_resp.content_length != seg_size) and not seg_req.range)): # The content-length check is for security reasons. Seems # possible that an attacker could upload a >1mb object and # then replace it with a much smaller object with same # etag. Then create a big nested SLO that calls that # object many times which would hammer our obj servers. If # this is a range request, don't check content-length # because it won't match. close_if_possible(seg_resp.app_iter) raise SegmentError( 'Object segment no longer valid: ' '%(path)s etag: %(r_etag)s != %(s_etag)s or ' '%(r_size)s != %(s_size)s.' % { 'path': seg_req.path, 'r_etag': seg_resp.etag, 'r_size': seg_resp.content_length, 's_etag': seg_etag, 's_size': seg_size }) seg_hash = hashlib.md5() for chunk in seg_resp.app_iter: seg_hash.update(chunk) if bytes_left is None: yield chunk elif bytes_left >= len(chunk): yield chunk bytes_left -= len(chunk) else: yield chunk[:bytes_left] bytes_left -= len(chunk) close_if_possible(seg_resp.app_iter) raise SegmentError( 'Too many bytes for %(name)s; truncating in ' '%(seg)s with %(left)d bytes left' % { 'name': self.name, 'seg': seg_req.path, 'left': bytes_left }) close_if_possible(seg_resp.app_iter) if seg_resp.etag and seg_hash.hexdigest() != seg_resp.etag \ and first_byte is None and last_byte is None: raise SegmentError( "Bad MD5 checksum in %(name)s for %(seg)s: headers had" " %(etag)s, but object MD5 was actually %(actual)s" % { 'seg': seg_req.path, 'etag': seg_resp.etag, 'name': self.name, 'actual': seg_hash.hexdigest() }) if bytes_left: raise SegmentError( 'Not enough bytes for %s; closing connection' % self.name) except (ListingIterError, SegmentError): self.logger.exception( _('ERROR: An error occurred ' 'while retrieving segments')) raise
def __iter__(self): start_time = time.time() have_yielded_data = False if self.response and self.response.content_length: bytes_left = int(self.response.content_length) else: bytes_left = None try: for seg_path, seg_etag, seg_size, first_byte, last_byte \ in self.listing_iter: if time.time() - start_time > self.max_get_time: raise SegmentError('ERROR: While processing manifest %s, ' 'max LO GET time of %ds exceeded' % (self.name, self.max_get_time)) # Make sure that the segment is a plain old object, not some # flavor of large object, so that we can check its MD5. path = seg_path + '?multipart-manifest=get' seg_req = make_subrequest( self.req.environ, path=path, method='GET', headers={ 'x-auth-token': self.req.headers.get('x-auth-token') }, agent=('%(orig)s ' + self.ua_suffix), swift_source=self.swift_source) if first_byte is not None or last_byte is not None: seg_req.headers['Range'] = "bytes=%s-%s" % ( # The 0 is to avoid having a range like "bytes=-10", # which actually means the *last* 10 bytes. '0' if first_byte is None else first_byte, '' if last_byte is None else last_byte) seg_resp = seg_req.get_response(self.app) if not is_success(seg_resp.status_int): close_if_possible(seg_resp.app_iter) raise SegmentError( 'ERROR: While processing manifest %s, ' 'got %d while retrieving %s' % (self.name, seg_resp.status_int, seg_path)) elif ((seg_etag and (seg_resp.etag != seg_etag)) or (seg_size and (seg_resp.content_length != seg_size) and not seg_req.range)): # The content-length check is for security reasons. Seems # possible that an attacker could upload a >1mb object and # then replace it with a much smaller object with same # etag. Then create a big nested SLO that calls that # object many times which would hammer our obj servers. If # this is a range request, don't check content-length # because it won't match. close_if_possible(seg_resp.app_iter) raise SegmentError( 'Object segment no longer valid: ' '%(path)s etag: %(r_etag)s != %(s_etag)s or ' '%(r_size)s != %(s_size)s.' % { 'path': seg_req.path, 'r_etag': seg_resp.etag, 'r_size': seg_resp.content_length, 's_etag': seg_etag, 's_size': seg_size }) seg_hash = hashlib.md5() for chunk in seg_resp.app_iter: seg_hash.update(chunk) have_yielded_data = True if bytes_left is None: yield chunk elif bytes_left >= len(chunk): yield chunk bytes_left -= len(chunk) else: yield chunk[:bytes_left] bytes_left -= len(chunk) close_if_possible(seg_resp.app_iter) raise SegmentError( 'Too many bytes for %(name)s; truncating in ' '%(seg)s with %(left)d bytes left' % { 'name': self.name, 'seg': seg_req.path, 'left': bytes_left }) close_if_possible(seg_resp.app_iter) if seg_resp.etag and seg_hash.hexdigest() != seg_resp.etag \ and first_byte is None and last_byte is None: raise SegmentError( "Bad MD5 checksum in %(name)s for %(seg)s: headers had" " %(etag)s, but object MD5 was actually %(actual)s" % { 'seg': seg_req.path, 'etag': seg_resp.etag, 'name': self.name, 'actual': seg_hash.hexdigest() }) if bytes_left: raise SegmentError( 'Not enough bytes for %s; closing connection' % self.name) except ListingIterError as err: # I have to save this error because yielding the ' ' below clears # the exception from the current stack frame. excinfo = sys.exc_info() self.logger.exception('ERROR: While processing manifest %s, %s', self.name, err) # Normally, exceptions before any data has been yielded will # cause Eventlet to send a 5xx response. In this particular # case of ListingIterError we don't want that and we'd rather # just send the normal 2xx response and then hang up early # since 5xx codes are often used to judge Service Level # Agreements and this ListingIterError indicates the user has # created an invalid condition. if not have_yielded_data: yield ' ' raise excinfo except SegmentError as err: self.logger.exception(err) # This doesn't actually change the response status (we're too # late for that), but this does make it to the logs. if self.response: self.response.status = HTTP_SERVICE_UNAVAILABLE raise
def _requests_to_bytes_iter(self): # Take the requests out of self._coalesce_requests, actually make # the requests, and generate the bytes from the responses. # # Yields 2-tuples (segment-name, byte-chunk). The segment name is # used for logging. for data_or_req, seg_etag, seg_size in self._coalesce_requests(): if isinstance(data_or_req, bytes): # ugly, awful overloading yield ('data segment', data_or_req) continue seg_req = data_or_req seg_resp = seg_req.get_response(self.app) if not is_success(seg_resp.status_int): # Error body should be short body = seg_resp.body if not six.PY2: body = body.decode('utf8') msg = 'While processing manifest %s, got %d (%s) ' \ 'while retrieving %s' % ( self.name, seg_resp.status_int, body if len(body) <= 60 else body[:57] + '...', seg_req.path) if is_server_error(seg_resp.status_int): self.logger.error(msg) raise HTTPServiceUnavailable(request=seg_req, content_type='text/plain') raise SegmentError(msg) elif ( (seg_etag and (seg_resp.etag != seg_etag)) or (seg_size and (seg_resp.content_length != seg_size) and not seg_req.range)): # The content-length check is for security reasons. Seems # possible that an attacker could upload a >1mb object and # then replace it with a much smaller object with same # etag. Then create a big nested SLO that calls that # object many times which would hammer our obj servers. If # this is a range request, don't check content-length # because it won't match. close_if_possible(seg_resp.app_iter) raise SegmentError( 'Object segment no longer valid: ' '%(path)s etag: %(r_etag)s != %(s_etag)s or ' '%(r_size)s != %(s_size)s.' % { 'path': seg_req.path, 'r_etag': seg_resp.etag, 'r_size': seg_resp.content_length, 's_etag': seg_etag, 's_size': seg_size }) else: self.current_resp = seg_resp resp_len = 0 seg_hash = None if seg_resp.etag and not seg_req.headers.get('Range'): # Only calculate the MD5 if it we can use it to validate seg_hash = md5(usedforsecurity=False) document_iters = maybe_multipart_byteranges_to_document_iters( seg_resp.app_iter, seg_resp.headers['Content-Type']) for chunk in itertools.chain.from_iterable(document_iters): if seg_hash: seg_hash.update(chunk) resp_len += len(chunk) yield (seg_req.path, chunk) close_if_possible(seg_resp.app_iter) if seg_hash: if resp_len != seg_resp.content_length: raise SegmentError( "Bad response length for %(seg)s as part of %(name)s: " "headers had %(from_headers)s, but response length " "was actually %(actual)s" % { 'seg': seg_req.path, 'from_headers': seg_resp.content_length, 'name': self.name, 'actual': resp_len }) if seg_hash.hexdigest() != seg_resp.etag: raise SegmentError( "Bad MD5 checksum for %(seg)s as part of %(name)s: " "headers had %(etag)s, but object MD5 was actually " "%(actual)s" % { 'seg': seg_req.path, 'etag': seg_resp.etag, 'name': self.name, 'actual': seg_hash.hexdigest() })