def _requests_to_bytes_iter(self): # Take the requests out of self._coalesce_requests, actually make # the requests, and generate the bytes from the responses. # # Yields 2-tuples (segment-name, byte-chunk). The segment name is # used for logging. for data_or_req, seg_etag, seg_size in self._coalesce_requests(): if isinstance(data_or_req, bytes): # ugly, awful overloading yield ('data segment', data_or_req) continue seg_req = data_or_req seg_resp = seg_req.get_response(self.app) if not is_success(seg_resp.status_int): close_if_possible(seg_resp.app_iter) raise SegmentError( 'While processing manifest %s, ' 'got %d while retrieving %s' % (self.name, seg_resp.status_int, seg_req.path)) elif ( (seg_etag and (seg_resp.etag != seg_etag)) or (seg_size and (seg_resp.content_length != seg_size) and not seg_req.range)): # The content-length check is for security reasons. Seems # possible that an attacker could upload a >1mb object and # then replace it with a much smaller object with same # etag. Then create a big nested SLO that calls that # object many times which would hammer our obj servers. If # this is a range request, don't check content-length # because it won't match. close_if_possible(seg_resp.app_iter) raise SegmentError( 'Object segment no longer valid: ' '%(path)s etag: %(r_etag)s != %(s_etag)s or ' '%(r_size)s != %(s_size)s.' % { 'path': seg_req.path, 'r_etag': seg_resp.etag, 'r_size': seg_resp.content_length, 's_etag': seg_etag, 's_size': seg_size }) else: self.current_resp = seg_resp seg_hash = None if seg_resp.etag and not seg_req.headers.get('Range'): # Only calculate the MD5 if it we can use it to validate seg_hash = hashlib.md5() document_iters = maybe_multipart_byteranges_to_document_iters( seg_resp.app_iter, seg_resp.headers['Content-Type']) for chunk in itertools.chain.from_iterable(document_iters): if seg_hash: seg_hash.update(chunk) yield (seg_req.path, chunk) close_if_possible(seg_resp.app_iter) if seg_hash and seg_hash.hexdigest() != seg_resp.etag: raise SegmentError( "Bad MD5 checksum in %(name)s for %(seg)s: headers had" " %(etag)s, but object MD5 was actually %(actual)s" % { 'seg': seg_req.path, 'etag': seg_resp.etag, 'name': self.name, 'actual': seg_hash.hexdigest() })
def _internal_iter(self): bytes_left = self.response_body_length try: for seg_req, seg_etag, seg_size in self._coalesce_requests(): seg_resp = seg_req.get_response(self.app) if not is_success(seg_resp.status_int): close_if_possible(seg_resp.app_iter) raise SegmentError( 'While processing manifest %s, ' 'got %d while retrieving %s' % (self.name, seg_resp.status_int, seg_req.path)) elif ((seg_etag and (seg_resp.etag != seg_etag)) or (seg_size and (seg_resp.content_length != seg_size) and not seg_req.range)): # The content-length check is for security reasons. Seems # possible that an attacker could upload a >1mb object and # then replace it with a much smaller object with same # etag. Then create a big nested SLO that calls that # object many times which would hammer our obj servers. If # this is a range request, don't check content-length # because it won't match. close_if_possible(seg_resp.app_iter) raise SegmentError( 'Object segment no longer valid: ' '%(path)s etag: %(r_etag)s != %(s_etag)s or ' '%(r_size)s != %(s_size)s.' % { 'path': seg_req.path, 'r_etag': seg_resp.etag, 'r_size': seg_resp.content_length, 's_etag': seg_etag, 's_size': seg_size }) else: self.current_resp = seg_resp seg_hash = None if seg_resp.etag and not seg_req.headers.get('Range'): # Only calculate the MD5 if it we can use it to validate seg_hash = hashlib.md5() document_iters = maybe_multipart_byteranges_to_document_iters( seg_resp.app_iter, seg_resp.headers['Content-Type']) for chunk in itertools.chain.from_iterable(document_iters): if seg_hash: seg_hash.update(chunk) if bytes_left is None: yield chunk elif bytes_left >= len(chunk): yield chunk bytes_left -= len(chunk) else: yield chunk[:bytes_left] bytes_left -= len(chunk) close_if_possible(seg_resp.app_iter) raise SegmentError( 'Too many bytes for %(name)s; truncating in ' '%(seg)s with %(left)d bytes left' % { 'name': self.name, 'seg': seg_req.path, 'left': bytes_left }) close_if_possible(seg_resp.app_iter) if seg_hash and seg_hash.hexdigest() != seg_resp.etag: raise SegmentError( "Bad MD5 checksum in %(name)s for %(seg)s: headers had" " %(etag)s, but object MD5 was actually %(actual)s" % { 'seg': seg_req.path, 'etag': seg_resp.etag, 'name': self.name, 'actual': seg_hash.hexdigest() }) if bytes_left: raise SegmentError( 'Not enough bytes for %s; closing connection' % self.name) except (ListingIterError, SegmentError) as err: self.logger.error(err) if not self.validated_first_segment: raise finally: if self.current_resp: close_if_possible(self.current_resp.app_iter)
def _requests_to_bytes_iter(self): # Take the requests out of self._coalesce_requests, actually make # the requests, and generate the bytes from the responses. # # Yields 2-tuples (segment-name, byte-chunk). The segment name is # used for logging. for data_or_req, seg_etag, seg_size in self._coalesce_requests(): if isinstance(data_or_req, bytes): # ugly, awful overloading yield ('data segment', data_or_req) continue seg_req = data_or_req seg_resp = seg_req.get_response(self.app) if not is_success(seg_resp.status_int): # Error body should be short body = seg_resp.body if not six.PY2: body = body.decode('utf8') msg = 'While processing manifest %s, got %d (%s) ' \ 'while retrieving %s' % ( self.name, seg_resp.status_int, body if len(body) <= 60 else body[:57] + '...', seg_req.path) if is_server_error(seg_resp.status_int): self.logger.error(msg) raise HTTPServiceUnavailable(request=seg_req, content_type='text/plain') raise SegmentError(msg) elif ( (seg_etag and (seg_resp.etag != seg_etag)) or (seg_size and (seg_resp.content_length != seg_size) and not seg_req.range)): # The content-length check is for security reasons. Seems # possible that an attacker could upload a >1mb object and # then replace it with a much smaller object with same # etag. Then create a big nested SLO that calls that # object many times which would hammer our obj servers. If # this is a range request, don't check content-length # because it won't match. close_if_possible(seg_resp.app_iter) raise SegmentError( 'Object segment no longer valid: ' '%(path)s etag: %(r_etag)s != %(s_etag)s or ' '%(r_size)s != %(s_size)s.' % { 'path': seg_req.path, 'r_etag': seg_resp.etag, 'r_size': seg_resp.content_length, 's_etag': seg_etag, 's_size': seg_size }) else: self.current_resp = seg_resp resp_len = 0 seg_hash = None if seg_resp.etag and not seg_req.headers.get('Range'): # Only calculate the MD5 if it we can use it to validate seg_hash = md5(usedforsecurity=False) document_iters = maybe_multipart_byteranges_to_document_iters( seg_resp.app_iter, seg_resp.headers['Content-Type']) for chunk in itertools.chain.from_iterable(document_iters): if seg_hash: seg_hash.update(chunk) resp_len += len(chunk) yield (seg_req.path, chunk) close_if_possible(seg_resp.app_iter) if seg_hash: if resp_len != seg_resp.content_length: raise SegmentError( "Bad response length for %(seg)s as part of %(name)s: " "headers had %(from_headers)s, but response length " "was actually %(actual)s" % { 'seg': seg_req.path, 'from_headers': seg_resp.content_length, 'name': self.name, 'actual': resp_len }) if seg_hash.hexdigest() != seg_resp.etag: raise SegmentError( "Bad MD5 checksum for %(seg)s as part of %(name)s: " "headers had %(etag)s, but object MD5 was actually " "%(actual)s" % { 'seg': seg_req.path, 'etag': seg_resp.etag, 'name': self.name, 'actual': seg_hash.hexdigest() })
def _internal_iter(self): bytes_left = self.response_body_length try: for seg_req, seg_etag, seg_size in self._coalesce_requests(): seg_resp = seg_req.get_response(self.app) if not is_success(seg_resp.status_int): close_if_possible(seg_resp.app_iter) raise SegmentError( 'While processing manifest %s, ' 'got %d while retrieving %s' % (self.name, seg_resp.status_int, seg_req.path)) elif ((seg_etag and (seg_resp.etag != seg_etag)) or (seg_size and (seg_resp.content_length != seg_size) and not seg_req.range)): # The content-length check is for security reasons. Seems # possible that an attacker could upload a >1mb object and # then replace it with a much smaller object with same # etag. Then create a big nested SLO that calls that # object many times which would hammer our obj servers. If # this is a range request, don't check content-length # because it won't match. close_if_possible(seg_resp.app_iter) raise SegmentError( 'Object segment no longer valid: ' '%(path)s etag: %(r_etag)s != %(s_etag)s or ' '%(r_size)s != %(s_size)s.' % {'path': seg_req.path, 'r_etag': seg_resp.etag, 'r_size': seg_resp.content_length, 's_etag': seg_etag, 's_size': seg_size}) else: self.current_resp = seg_resp seg_hash = None if seg_resp.etag and not seg_req.headers.get('Range'): # Only calculate the MD5 if it we can use it to validate seg_hash = hashlib.md5() document_iters = maybe_multipart_byteranges_to_document_iters( seg_resp.app_iter, seg_resp.headers['Content-Type']) for chunk in itertools.chain.from_iterable(document_iters): if seg_hash: seg_hash.update(chunk) if bytes_left is None: yield chunk elif bytes_left >= len(chunk): yield chunk bytes_left -= len(chunk) else: yield chunk[:bytes_left] bytes_left -= len(chunk) close_if_possible(seg_resp.app_iter) raise SegmentError( 'Too many bytes for %(name)s; truncating in ' '%(seg)s with %(left)d bytes left' % {'name': self.name, 'seg': seg_req.path, 'left': bytes_left}) close_if_possible(seg_resp.app_iter) if seg_hash and seg_hash.hexdigest() != seg_resp.etag: raise SegmentError( "Bad MD5 checksum in %(name)s for %(seg)s: headers had" " %(etag)s, but object MD5 was actually %(actual)s" % {'seg': seg_req.path, 'etag': seg_resp.etag, 'name': self.name, 'actual': seg_hash.hexdigest()}) if bytes_left: raise SegmentError( 'Not enough bytes for %s; closing connection' % self.name) except (ListingIterError, SegmentError) as err: self.logger.error(err) if not self.validated_first_segment: raise finally: if self.current_resp: close_if_possible(self.current_resp.app_iter)
def _requests_to_bytes_iter(self): # Take the requests out of self._coalesce_requests, actually make # the requests, and generate the bytes from the responses. # # Yields 2-tuples (segment-name, byte-chunk). The segment name is # used for logging. for data_or_req, seg_etag, seg_size in self._coalesce_requests(): if isinstance(data_or_req, bytes): # ugly, awful overloading yield ('data segment', data_or_req) continue seg_req = data_or_req seg_resp = seg_req.get_response(self.app) if not is_success(seg_resp.status_int): close_if_possible(seg_resp.app_iter) raise SegmentError( 'While processing manifest %s, ' 'got %d while retrieving %s' % (self.name, seg_resp.status_int, seg_req.path)) elif ((seg_etag and (seg_resp.etag != seg_etag)) or (seg_size and (seg_resp.content_length != seg_size) and not seg_req.range)): # The content-length check is for security reasons. Seems # possible that an attacker could upload a >1mb object and # then replace it with a much smaller object with same # etag. Then create a big nested SLO that calls that # object many times which would hammer our obj servers. If # this is a range request, don't check content-length # because it won't match. close_if_possible(seg_resp.app_iter) raise SegmentError( 'Object segment no longer valid: ' '%(path)s etag: %(r_etag)s != %(s_etag)s or ' '%(r_size)s != %(s_size)s.' % {'path': seg_req.path, 'r_etag': seg_resp.etag, 'r_size': seg_resp.content_length, 's_etag': seg_etag, 's_size': seg_size}) else: self.current_resp = seg_resp seg_hash = None if seg_resp.etag and not seg_req.headers.get('Range'): # Only calculate the MD5 if it we can use it to validate seg_hash = hashlib.md5() document_iters = maybe_multipart_byteranges_to_document_iters( seg_resp.app_iter, seg_resp.headers['Content-Type']) for chunk in itertools.chain.from_iterable(document_iters): if seg_hash: seg_hash.update(chunk) yield (seg_req.path, chunk) close_if_possible(seg_resp.app_iter) if seg_hash and seg_hash.hexdigest() != seg_resp.etag: raise SegmentError( "Bad MD5 checksum in %(name)s for %(seg)s: headers had" " %(etag)s, but object MD5 was actually %(actual)s" % {'seg': seg_req.path, 'etag': seg_resp.etag, 'name': self.name, 'actual': seg_hash.hexdigest()})