Example #1
0
    def _byte_counting_iter(self):
        # Checks that we give the client the right number of bytes. Raises
        # SegmentError if the number of bytes is wrong.
        bytes_left = self.response_body_length

        for seg_name, chunk in self._requests_to_bytes_iter():
            if bytes_left is None:
                yield chunk
            elif bytes_left >= len(chunk):
                yield chunk
                bytes_left -= len(chunk)
            else:
                yield chunk[:bytes_left]
                bytes_left -= len(chunk)
                raise SegmentError(
                    'Too many bytes for %(name)s; truncating in '
                    '%(seg)s with %(left)d bytes left' % {
                        'name': self.name,
                        'seg': seg_name,
                        'left': bytes_left
                    })

        if bytes_left:
            raise SegmentError('Not enough bytes for %s; closing connection' %
                               self.name)
Example #2
0
 def _time_limited_iter(self):
     # Makes sure a GET response doesn't take more than self.max_get_time
     # seconds to process. Raises an exception if things take too long.
     start_time = time.time()
     for chunk in self._byte_counting_iter():
         now = time.time()
         yield chunk
         if now - start_time > self.max_get_time:
             raise SegmentError('While processing manifest %s, '
                                'max LO GET time of %ds exceeded' %
                                (self.name, self.max_get_time))
Example #3
0
    def _internal_iter(self):
        bytes_left = self.response_body_length

        try:
            for seg_req, seg_etag, seg_size in self._coalesce_requests():
                seg_resp = seg_req.get_response(self.app)
                if not is_success(seg_resp.status_int):
                    close_if_possible(seg_resp.app_iter)
                    raise SegmentError(
                        'While processing manifest %s, '
                        'got %d while retrieving %s' %
                        (self.name, seg_resp.status_int, seg_req.path))

                elif ((seg_etag and (seg_resp.etag != seg_etag))
                      or (seg_size and (seg_resp.content_length != seg_size)
                          and not seg_req.range)):
                    # The content-length check is for security reasons. Seems
                    # possible that an attacker could upload a >1mb object and
                    # then replace it with a much smaller object with same
                    # etag. Then create a big nested SLO that calls that
                    # object many times which would hammer our obj servers. If
                    # this is a range request, don't check content-length
                    # because it won't match.
                    close_if_possible(seg_resp.app_iter)
                    raise SegmentError(
                        'Object segment no longer valid: '
                        '%(path)s etag: %(r_etag)s != %(s_etag)s or '
                        '%(r_size)s != %(s_size)s.' % {
                            'path': seg_req.path,
                            'r_etag': seg_resp.etag,
                            'r_size': seg_resp.content_length,
                            's_etag': seg_etag,
                            's_size': seg_size
                        })
                else:
                    self.current_resp = seg_resp

                seg_hash = None
                if seg_resp.etag and not seg_req.headers.get('Range'):
                    # Only calculate the MD5 if it we can use it to validate
                    seg_hash = hashlib.md5()

                document_iters = maybe_multipart_byteranges_to_document_iters(
                    seg_resp.app_iter, seg_resp.headers['Content-Type'])

                for chunk in itertools.chain.from_iterable(document_iters):
                    if seg_hash:
                        seg_hash.update(chunk)

                    if bytes_left is None:
                        yield chunk
                    elif bytes_left >= len(chunk):
                        yield chunk
                        bytes_left -= len(chunk)
                    else:
                        yield chunk[:bytes_left]
                        bytes_left -= len(chunk)
                        close_if_possible(seg_resp.app_iter)
                        raise SegmentError(
                            'Too many bytes for %(name)s; truncating in '
                            '%(seg)s with %(left)d bytes left' % {
                                'name': self.name,
                                'seg': seg_req.path,
                                'left': bytes_left
                            })
                close_if_possible(seg_resp.app_iter)

                if seg_hash and seg_hash.hexdigest() != seg_resp.etag:
                    raise SegmentError(
                        "Bad MD5 checksum in %(name)s for %(seg)s: headers had"
                        " %(etag)s, but object MD5 was actually %(actual)s" % {
                            'seg': seg_req.path,
                            'etag': seg_resp.etag,
                            'name': self.name,
                            'actual': seg_hash.hexdigest()
                        })

            if bytes_left:
                raise SegmentError(
                    'Not enough bytes for %s; closing connection' % self.name)
        except (ListingIterError, SegmentError) as err:
            self.logger.error(err)
            if not self.validated_first_segment:
                raise
        finally:
            if self.current_resp:
                close_if_possible(self.current_resp.app_iter)
Example #4
0
    def _requests_to_bytes_iter(self):
        # Take the requests out of self._coalesce_requests, actually make
        # the requests, and generate the bytes from the responses.
        #
        # Yields 2-tuples (segment-name, byte-chunk). The segment name is
        # used for logging.
        for data_or_req, seg_etag, seg_size in self._coalesce_requests():
            if isinstance(data_or_req, bytes):  # ugly, awful overloading
                yield ('data segment', data_or_req)
                continue
            seg_req = data_or_req
            seg_resp = seg_req.get_response(self.app)
            if not is_success(seg_resp.status_int):
                close_if_possible(seg_resp.app_iter)
                raise SegmentError(
                    'While processing manifest %s, '
                    'got %d while retrieving %s' %
                    (self.name, seg_resp.status_int, seg_req.path))

            elif (
                (seg_etag and (seg_resp.etag != seg_etag)) or
                (seg_size and
                 (seg_resp.content_length != seg_size) and not seg_req.range)):
                # The content-length check is for security reasons. Seems
                # possible that an attacker could upload a >1mb object and
                # then replace it with a much smaller object with same
                # etag. Then create a big nested SLO that calls that
                # object many times which would hammer our obj servers. If
                # this is a range request, don't check content-length
                # because it won't match.
                close_if_possible(seg_resp.app_iter)
                raise SegmentError(
                    'Object segment no longer valid: '
                    '%(path)s etag: %(r_etag)s != %(s_etag)s or '
                    '%(r_size)s != %(s_size)s.' % {
                        'path': seg_req.path,
                        'r_etag': seg_resp.etag,
                        'r_size': seg_resp.content_length,
                        's_etag': seg_etag,
                        's_size': seg_size
                    })
            else:
                self.current_resp = seg_resp

            seg_hash = None
            if seg_resp.etag and not seg_req.headers.get('Range'):
                # Only calculate the MD5 if it we can use it to validate
                seg_hash = hashlib.md5()

            document_iters = maybe_multipart_byteranges_to_document_iters(
                seg_resp.app_iter, seg_resp.headers['Content-Type'])

            for chunk in itertools.chain.from_iterable(document_iters):
                if seg_hash:
                    seg_hash.update(chunk)
                yield (seg_req.path, chunk)
            close_if_possible(seg_resp.app_iter)

            if seg_hash and seg_hash.hexdigest() != seg_resp.etag:
                raise SegmentError(
                    "Bad MD5 checksum in %(name)s for %(seg)s: headers had"
                    " %(etag)s, but object MD5 was actually %(actual)s" % {
                        'seg': seg_req.path,
                        'etag': seg_resp.etag,
                        'name': self.name,
                        'actual': seg_hash.hexdigest()
                    })
Example #5
0
    def _coalesce_requests(self):
        start_time = time.time()
        pending_req = None
        pending_etag = None
        pending_size = None
        try:
            for seg_path, seg_etag, seg_size, first_byte, last_byte \
                    in self.listing_iter:
                first_byte = first_byte or 0
                go_to_end = last_byte is None or (seg_size is not None and
                                                  last_byte == seg_size - 1)
                if time.time() - start_time > self.max_get_time:
                    raise SegmentError('While processing manifest %s, '
                                       'max LO GET time of %ds exceeded' %
                                       (self.name, self.max_get_time))
                # The "multipart-manifest=get" query param ensures that the
                # segment is a plain old object, not some flavor of large
                # object; therefore, its etag is its MD5sum and hence we can
                # check it.
                path = seg_path + '?multipart-manifest=get'
                seg_req = make_subrequest(
                    self.req.environ,
                    path=path,
                    method='GET',
                    headers={
                        'x-auth-token': self.req.headers.get('x-auth-token')
                    },
                    agent=('%(orig)s ' + self.ua_suffix),
                    swift_source=self.swift_source)

                seg_req_rangeval = None
                if first_byte != 0 or not go_to_end:
                    seg_req_rangeval = "%s-%s" % (first_byte, ''
                                                  if go_to_end else last_byte)
                    seg_req.headers['Range'] = "bytes=" + seg_req_rangeval

                # We can only coalesce if paths match and we know the segment
                # size (so we can check that the ranges will be allowed)
                if pending_req and pending_req.path == seg_req.path and \
                        seg_size is not None:

                    # Make a new Range object so that we don't goof up the
                    # existing one in case of invalid ranges. Note that a
                    # range set with too many individual byteranges is
                    # invalid, so we can combine N valid byteranges and 1
                    # valid byterange and get an invalid range set.
                    if pending_req.range:
                        new_range_str = str(pending_req.range)
                    else:
                        new_range_str = "bytes=0-%d" % (seg_size - 1)

                    if seg_req.range:
                        new_range_str += "," + seg_req_rangeval
                    else:
                        new_range_str += ",0-%d" % (seg_size - 1)

                    if Range(new_range_str).ranges_for_length(seg_size):
                        # Good news! We can coalesce the requests
                        pending_req.headers['Range'] = new_range_str
                        continue
                    # else, Too many ranges, or too much backtracking, or ...

                if pending_req:
                    yield pending_req, pending_etag, pending_size
                pending_req = seg_req
                pending_etag = seg_etag
                pending_size = seg_size

        except ListingIterError:
            e_type, e_value, e_traceback = sys.exc_info()
            if time.time() - start_time > self.max_get_time:
                raise SegmentError('While processing manifest %s, '
                                   'max LO GET time of %ds exceeded' %
                                   (self.name, self.max_get_time))
            if pending_req:
                yield pending_req, pending_etag, pending_size
            six.reraise(e_type, e_value, e_traceback)

        if time.time() - start_time > self.max_get_time:
            raise SegmentError('While processing manifest %s, '
                               'max LO GET time of %ds exceeded' %
                               (self.name, self.max_get_time))
        if pending_req:
            yield pending_req, pending_etag, pending_size
Example #6
0
    def _coalesce_requests(self):
        start_time = time.time()
        pending_req = None
        pending_etag = None
        pending_size = None
        try:
            for seg_path, seg_etag, seg_size, first_byte, last_byte \
                    in self.listing_iter:
                first_byte = first_byte or 0
                go_to_end = last_byte is None or (seg_size is not None and
                                                  last_byte == seg_size - 1)
                if time.time() - start_time > self.max_get_time:
                    raise SegmentError('ERROR: While processing manifest %s, '
                                       'max LO GET time of %ds exceeded' %
                                       (self.name, self.max_get_time))
                # Make sure that the segment is a plain old object, not some
                # flavor of large object, so that we can check its MD5.
                path = seg_path + '?multipart-manifest=get'
                seg_req = make_subrequest(
                    self.req.environ,
                    path=path,
                    method='GET',
                    headers={
                        'x-auth-token': self.req.headers.get('x-auth-token')
                    },
                    agent=('%(orig)s ' + self.ua_suffix),
                    swift_source=self.swift_source)

                if first_byte != 0 or not go_to_end:
                    seg_req.headers['Range'] = "bytes=%s-%s" % (
                        first_byte, '' if go_to_end else last_byte)

                # We can only coalesce if paths match and we know the segment
                # size (so we can check that the ranges will be allowed)
                if pending_req and pending_req.path == seg_req.path and \
                        seg_size is not None:
                    new_range = '%s,%s' % (
                        pending_req.headers.get('Range', 'bytes=0-%s' %
                                                (seg_size - 1)),
                        seg_req.headers['Range'].split('bytes=')[1])
                    if Range(new_range).ranges_for_length(seg_size):
                        # Good news! We can coalesce the requests
                        pending_req.headers['Range'] = new_range
                        continue
                    # else, Too many ranges, or too much backtracking, or ...

                if pending_req:
                    yield pending_req, pending_etag, pending_size
                pending_req = seg_req
                pending_etag = seg_etag
                pending_size = seg_size

        except ListingIterError:
            e_type, e_value, e_traceback = sys.exc_info()
            if time.time() - start_time > self.max_get_time:
                raise SegmentError('ERROR: While processing manifest %s, '
                                   'max LO GET time of %ds exceeded' %
                                   (self.name, self.max_get_time))
            if pending_req:
                yield pending_req, pending_etag, pending_size
            six.reraise(e_type, e_value, e_traceback)

        if time.time() - start_time > self.max_get_time:
            raise SegmentError('ERROR: While processing manifest %s, '
                               'max LO GET time of %ds exceeded' %
                               (self.name, self.max_get_time))
        if pending_req:
            yield pending_req, pending_etag, pending_size
Example #7
0
    def _internal_iter(self):
        start_time = time.time()
        bytes_left = self.response_body_length

        try:
            for seg_path, seg_etag, seg_size, first_byte, last_byte \
                    in self.listing_iter:
                if time.time() - start_time > self.max_get_time:
                    raise SegmentError('ERROR: While processing manifest %s, '
                                       'max LO GET time of %ds exceeded' %
                                       (self.name, self.max_get_time))
                # Make sure that the segment is a plain old object, not some
                # flavor of large object, so that we can check its MD5.
                path = seg_path + '?multipart-manifest=get'
                seg_req = make_subrequest(
                    self.req.environ,
                    path=path,
                    method='GET',
                    headers={
                        'x-auth-token': self.req.headers.get('x-auth-token')
                    },
                    agent=('%(orig)s ' + self.ua_suffix),
                    swift_source=self.swift_source)
                if first_byte is not None or last_byte is not None:
                    seg_req.headers['Range'] = "bytes=%s-%s" % (
                        # The 0 is to avoid having a range like "bytes=-10",
                        # which actually means the *last* 10 bytes.
                        '0' if first_byte is None else first_byte,
                        '' if last_byte is None else last_byte)

                seg_resp = seg_req.get_response(self.app)
                if not is_success(seg_resp.status_int):
                    close_if_possible(seg_resp.app_iter)
                    raise SegmentError(
                        'ERROR: While processing manifest %s, '
                        'got %d while retrieving %s' %
                        (self.name, seg_resp.status_int, seg_path))

                elif ((seg_etag and (seg_resp.etag != seg_etag))
                      or (seg_size and (seg_resp.content_length != seg_size)
                          and not seg_req.range)):
                    # The content-length check is for security reasons. Seems
                    # possible that an attacker could upload a >1mb object and
                    # then replace it with a much smaller object with same
                    # etag. Then create a big nested SLO that calls that
                    # object many times which would hammer our obj servers. If
                    # this is a range request, don't check content-length
                    # because it won't match.
                    close_if_possible(seg_resp.app_iter)
                    raise SegmentError(
                        'Object segment no longer valid: '
                        '%(path)s etag: %(r_etag)s != %(s_etag)s or '
                        '%(r_size)s != %(s_size)s.' % {
                            'path': seg_req.path,
                            'r_etag': seg_resp.etag,
                            'r_size': seg_resp.content_length,
                            's_etag': seg_etag,
                            's_size': seg_size
                        })

                seg_hash = hashlib.md5()
                for chunk in seg_resp.app_iter:
                    seg_hash.update(chunk)
                    if bytes_left is None:
                        yield chunk
                    elif bytes_left >= len(chunk):
                        yield chunk
                        bytes_left -= len(chunk)
                    else:
                        yield chunk[:bytes_left]
                        bytes_left -= len(chunk)
                        close_if_possible(seg_resp.app_iter)
                        raise SegmentError(
                            'Too many bytes for %(name)s; truncating in '
                            '%(seg)s with %(left)d bytes left' % {
                                'name': self.name,
                                'seg': seg_req.path,
                                'left': bytes_left
                            })
                close_if_possible(seg_resp.app_iter)

                if seg_resp.etag and seg_hash.hexdigest() != seg_resp.etag \
                   and first_byte is None and last_byte is None:
                    raise SegmentError(
                        "Bad MD5 checksum in %(name)s for %(seg)s: headers had"
                        " %(etag)s, but object MD5 was actually %(actual)s" % {
                            'seg': seg_req.path,
                            'etag': seg_resp.etag,
                            'name': self.name,
                            'actual': seg_hash.hexdigest()
                        })

            if bytes_left:
                raise SegmentError(
                    'Not enough bytes for %s; closing connection' % self.name)
        except (ListingIterError, SegmentError):
            self.logger.exception(
                _('ERROR: An error occurred '
                  'while retrieving segments'))
            raise
Example #8
0
    def __iter__(self):
        start_time = time.time()
        have_yielded_data = False

        if self.response and self.response.content_length:
            bytes_left = int(self.response.content_length)
        else:
            bytes_left = None

        try:
            for seg_path, seg_etag, seg_size, first_byte, last_byte \
                    in self.listing_iter:
                if time.time() - start_time > self.max_get_time:
                    raise SegmentError('ERROR: While processing manifest %s, '
                                       'max LO GET time of %ds exceeded' %
                                       (self.name, self.max_get_time))
                # Make sure that the segment is a plain old object, not some
                # flavor of large object, so that we can check its MD5.
                path = seg_path + '?multipart-manifest=get'
                seg_req = make_subrequest(
                    self.req.environ,
                    path=path,
                    method='GET',
                    headers={
                        'x-auth-token': self.req.headers.get('x-auth-token')
                    },
                    agent=('%(orig)s ' + self.ua_suffix),
                    swift_source=self.swift_source)
                if first_byte is not None or last_byte is not None:
                    seg_req.headers['Range'] = "bytes=%s-%s" % (
                        # The 0 is to avoid having a range like "bytes=-10",
                        # which actually means the *last* 10 bytes.
                        '0' if first_byte is None else first_byte,
                        '' if last_byte is None else last_byte)

                seg_resp = seg_req.get_response(self.app)
                if not is_success(seg_resp.status_int):
                    close_if_possible(seg_resp.app_iter)
                    raise SegmentError(
                        'ERROR: While processing manifest %s, '
                        'got %d while retrieving %s' %
                        (self.name, seg_resp.status_int, seg_path))

                elif ((seg_etag and (seg_resp.etag != seg_etag))
                      or (seg_size and (seg_resp.content_length != seg_size)
                          and not seg_req.range)):
                    # The content-length check is for security reasons. Seems
                    # possible that an attacker could upload a >1mb object and
                    # then replace it with a much smaller object with same
                    # etag. Then create a big nested SLO that calls that
                    # object many times which would hammer our obj servers. If
                    # this is a range request, don't check content-length
                    # because it won't match.
                    close_if_possible(seg_resp.app_iter)
                    raise SegmentError(
                        'Object segment no longer valid: '
                        '%(path)s etag: %(r_etag)s != %(s_etag)s or '
                        '%(r_size)s != %(s_size)s.' % {
                            'path': seg_req.path,
                            'r_etag': seg_resp.etag,
                            'r_size': seg_resp.content_length,
                            's_etag': seg_etag,
                            's_size': seg_size
                        })

                seg_hash = hashlib.md5()
                for chunk in seg_resp.app_iter:
                    seg_hash.update(chunk)
                    have_yielded_data = True
                    if bytes_left is None:
                        yield chunk
                    elif bytes_left >= len(chunk):
                        yield chunk
                        bytes_left -= len(chunk)
                    else:
                        yield chunk[:bytes_left]
                        bytes_left -= len(chunk)
                        close_if_possible(seg_resp.app_iter)
                        raise SegmentError(
                            'Too many bytes for %(name)s; truncating in '
                            '%(seg)s with %(left)d bytes left' % {
                                'name': self.name,
                                'seg': seg_req.path,
                                'left': bytes_left
                            })
                close_if_possible(seg_resp.app_iter)

                if seg_resp.etag and seg_hash.hexdigest() != seg_resp.etag \
                   and first_byte is None and last_byte is None:
                    raise SegmentError(
                        "Bad MD5 checksum in %(name)s for %(seg)s: headers had"
                        " %(etag)s, but object MD5 was actually %(actual)s" % {
                            'seg': seg_req.path,
                            'etag': seg_resp.etag,
                            'name': self.name,
                            'actual': seg_hash.hexdigest()
                        })

            if bytes_left:
                raise SegmentError(
                    'Not enough bytes for %s; closing connection' % self.name)

        except ListingIterError as err:
            # I have to save this error because yielding the ' ' below clears
            # the exception from the current stack frame.
            excinfo = sys.exc_info()
            self.logger.exception('ERROR: While processing manifest %s, %s',
                                  self.name, err)
            # Normally, exceptions before any data has been yielded will
            # cause Eventlet to send a 5xx response. In this particular
            # case of ListingIterError we don't want that and we'd rather
            # just send the normal 2xx response and then hang up early
            # since 5xx codes are often used to judge Service Level
            # Agreements and this ListingIterError indicates the user has
            # created an invalid condition.
            if not have_yielded_data:
                yield ' '
            raise excinfo
        except SegmentError as err:
            self.logger.exception(err)
            # This doesn't actually change the response status (we're too
            # late for that), but this does make it to the logs.
            if self.response:
                self.response.status = HTTP_SERVICE_UNAVAILABLE
            raise
    def _requests_to_bytes_iter(self):
        # Take the requests out of self._coalesce_requests, actually make
        # the requests, and generate the bytes from the responses.
        #
        # Yields 2-tuples (segment-name, byte-chunk). The segment name is
        # used for logging.
        for data_or_req, seg_etag, seg_size in self._coalesce_requests():
            if isinstance(data_or_req, bytes):  # ugly, awful overloading
                yield ('data segment', data_or_req)
                continue
            seg_req = data_or_req
            seg_resp = seg_req.get_response(self.app)
            if not is_success(seg_resp.status_int):
                # Error body should be short
                body = seg_resp.body
                if not six.PY2:
                    body = body.decode('utf8')
                msg = 'While processing manifest %s, got %d (%s) ' \
                    'while retrieving %s' % (
                        self.name, seg_resp.status_int,
                        body if len(body) <= 60 else body[:57] + '...',
                        seg_req.path)
                if is_server_error(seg_resp.status_int):
                    self.logger.error(msg)
                    raise HTTPServiceUnavailable(request=seg_req,
                                                 content_type='text/plain')
                raise SegmentError(msg)
            elif (
                (seg_etag and (seg_resp.etag != seg_etag)) or
                (seg_size and
                 (seg_resp.content_length != seg_size) and not seg_req.range)):
                # The content-length check is for security reasons. Seems
                # possible that an attacker could upload a >1mb object and
                # then replace it with a much smaller object with same
                # etag. Then create a big nested SLO that calls that
                # object many times which would hammer our obj servers. If
                # this is a range request, don't check content-length
                # because it won't match.
                close_if_possible(seg_resp.app_iter)
                raise SegmentError(
                    'Object segment no longer valid: '
                    '%(path)s etag: %(r_etag)s != %(s_etag)s or '
                    '%(r_size)s != %(s_size)s.' % {
                        'path': seg_req.path,
                        'r_etag': seg_resp.etag,
                        'r_size': seg_resp.content_length,
                        's_etag': seg_etag,
                        's_size': seg_size
                    })
            else:
                self.current_resp = seg_resp

            resp_len = 0
            seg_hash = None
            if seg_resp.etag and not seg_req.headers.get('Range'):
                # Only calculate the MD5 if it we can use it to validate
                seg_hash = md5(usedforsecurity=False)

            document_iters = maybe_multipart_byteranges_to_document_iters(
                seg_resp.app_iter, seg_resp.headers['Content-Type'])

            for chunk in itertools.chain.from_iterable(document_iters):
                if seg_hash:
                    seg_hash.update(chunk)
                    resp_len += len(chunk)
                yield (seg_req.path, chunk)
            close_if_possible(seg_resp.app_iter)

            if seg_hash:
                if resp_len != seg_resp.content_length:
                    raise SegmentError(
                        "Bad response length for %(seg)s as part of %(name)s: "
                        "headers had %(from_headers)s, but response length "
                        "was actually %(actual)s" % {
                            'seg': seg_req.path,
                            'from_headers': seg_resp.content_length,
                            'name': self.name,
                            'actual': resp_len
                        })
                if seg_hash.hexdigest() != seg_resp.etag:
                    raise SegmentError(
                        "Bad MD5 checksum for %(seg)s as part of %(name)s: "
                        "headers had %(etag)s, but object MD5 was actually "
                        "%(actual)s" % {
                            'seg': seg_req.path,
                            'etag': seg_resp.etag,
                            'name': self.name,
                            'actual': seg_hash.hexdigest()
                        })