Beispiel #1
0
    def fast_forward(self, num_bytes):
        """
        Will skip num_bytes into the current ranges.
        :params num_bytes: the number of bytes that have already been read on
                           this request. This will change the Range header
                           so that the next req will start where it left off.
        :raises NotImplementedError: if this is a multirange request
        :raises ValueError: if invalid range header
        :raises HTTPRequestedRangeNotSatisfiable: if begin + num_bytes
                                                  > end of range
        """
        if 'Range' in self.backend_headers:
            req_range = Range(self.backend_headers['Range'])

            if len(req_range.ranges) > 1:
                raise NotImplementedError()
            begin, end = req_range.ranges.pop()
            if begin is None:
                # this is a -50 range req (last 50 bytes of file)
                end -= num_bytes
            else:
                begin += num_bytes
            if end and begin > end:
                raise HTTPRequestedRangeNotSatisfiable()
            req_range.ranges = [(begin, end)]
            self.backend_headers['Range'] = str(req_range)
        else:
            self.backend_headers['Range'] = 'bytes=%d-' % num_bytes
Beispiel #2
0
    def _manifest_get_response(self, req, content_length, response_headers,
                               segments):
        self.first_byte, self.last_byte = None, None
        if req.range:
            byteranges = req.range.ranges_for_length(content_length)
            if len(byteranges) == 0:
                return HTTPRequestedRangeNotSatisfiable(request=req)
            elif len(byteranges) == 1:
                self.first_byte, self.last_byte = byteranges[0]
                # For some reason, swob.Range.ranges_for_length adds 1 to the
                # last byte's position.
                self.last_byte -= 1
            else:
                req.range = None

        ver, account, _junk = req.split_path(3, 3, rest_with_last=True)
        plain_listing_iter = self._segment_listing_iterator(
            req, ver, account, segments)

        ratelimited_listing_iter = RateLimitedIterator(
            plain_listing_iter,
            self.slo.rate_limit_segments_per_sec,
            limit_after=self.slo.rate_limit_after_segment)

        # self._segment_listing_iterator gives us 3-tuples of (segment dict,
        # start byte, end byte), but SegmentedIterable wants (obj path, etag,
        # size, start byte, end byte), so we clean that up here
        segment_listing_iter = (
            ("/{ver}/{acc}/{conobj}".format(
                ver=ver, acc=account, conobj=seg_dict['name'].lstrip('/')),
             seg_dict['hash'], int(seg_dict['bytes']), start_byte, end_byte)
            for seg_dict, start_byte, end_byte in ratelimited_listing_iter)

        segmented_iter = SegmentedIterable(req,
                                           self.slo.app,
                                           segment_listing_iter,
                                           name=req.path,
                                           logger=self.slo.logger,
                                           ua_suffix="SLO MultipartGET",
                                           swift_source="SLO",
                                           max_get_time=self.slo.max_get_time)

        try:
            segmented_iter.validate_first_segment()
        except (ListingIterError, SegmentError):
            # Copy from the SLO explanation in top of this file.
            # If any of the segments from the manifest are not found or
            # their Etag/Content Length no longer match the connection
            # will drop. In this case a 409 Conflict will be logged in
            # the proxy logs and the user will receive incomplete results.
            return HTTPConflict(request=req)

        response = Response(request=req,
                            content_length=content_length,
                            headers=response_headers,
                            conditional_response=True,
                            app_iter=segmented_iter)
        if req.range:
            response.headers.pop('Etag')
        return response
Beispiel #3
0
    def _manifest_get_response(self, req, content_length, response_headers,
                               segments):
        self.first_byte, self.last_byte = None, None
        if req.range:
            byteranges = req.range.ranges_for_length(content_length)
            if len(byteranges) == 0:
                return HTTPRequestedRangeNotSatisfiable(request=req)
            elif len(byteranges) == 1:
                self.first_byte, self.last_byte = byteranges[0]
                # For some reason, swob.Range.ranges_for_length adds 1 to the
                # last byte's position.
                self.last_byte -= 1
            else:
                req.range = None

        ver, account, _junk = req.split_path(3, 3, rest_with_last=True)
        plain_listing_iter = self._segment_listing_iterator(
            req, ver, account, segments)

        ratelimited_listing_iter = RateLimitedIterator(
            plain_listing_iter,
            self.slo.rate_limit_segments_per_sec,
            limit_after=self.slo.rate_limit_after_segment)

        # self._segment_listing_iterator gives us 3-tuples of (segment dict,
        # start byte, end byte), but SegmentedIterable wants (obj path, etag,
        # size, start byte, end byte), so we clean that up here
        segment_listing_iter = (
            ("/{ver}/{acc}/{conobj}".format(
                ver=ver, acc=account, conobj=seg_dict['name'].lstrip('/')),
             seg_dict['hash'], int(seg_dict['bytes']), start_byte, end_byte)
            for seg_dict, start_byte, end_byte in ratelimited_listing_iter)

        response = Response(request=req,
                            content_length=content_length,
                            headers=response_headers,
                            conditional_response=True,
                            app_iter=SegmentedIterable(
                                req,
                                self.slo.app,
                                segment_listing_iter,
                                name=req.path,
                                logger=self.slo.logger,
                                ua_suffix="SLO MultipartGET",
                                swift_source="SLO",
                                max_get_time=self.slo.max_get_time))
        if req.range:
            response.headers.pop('Etag')
        return response
Beispiel #4
0
    def GET(self):
        """
        GET handler on object-server
        """
        # The proxy may add a Range header in the case
        # where the execution is to be done on proxy only
        # (and X-Storlet-Range header exists)
        # Hence we allow having a Range header ONLY
        # if there is also X-Storlet-Range
        # Otherwise, running a Storlet together with
        # The HTTP Range header is not allowed
        if self.is_range_request and not self.is_storlet_range_request:
            raise HTTPRequestedRangeNotSatisfiable(
                b'Storlet execution with range header is not supported',
                request=self.request)

        orig_resp = self.request.get_response(self.app)

        if not orig_resp.is_success:
            return orig_resp

        # TODO(takashi): not sure manifest file should not be run with storlet
        not_runnable = any(
            [self.execute_on_proxy,
             self.execute_range_on_proxy,
             self.is_slo_get_request,
             self.is_slo_response(orig_resp)])

        if not_runnable:
            # Storlet must be invoked on proxy as it is:
            # either an SLO
            # or storlet-range-request
            # or proxy only mode
            self.logger.debug(
                'storlet_handler: invocation over %s to be executed on proxy'
                % self.request.path)
            return orig_resp
        else:
            # We apply here the Storlet:
            self.logger.debug(
                'storlet_handler: invocation over %s to be executed locally'
                % self.request.path)
            return self.apply_storlet(orig_resp)
Beispiel #5
0
    def get_or_head_response(self,
                             req,
                             x_object_manifest,
                             response_headers=None):
        if response_headers is None:
            response_headers = self._response_headers

        container, obj_prefix = x_object_manifest.split('/', 1)
        container = unquote(container)
        obj_prefix = unquote(obj_prefix)

        version, account, _junk = req.split_path(2, 3, True)
        error_response, segments = self._get_container_listing(
            req, version, account, container, obj_prefix)
        if error_response:
            return error_response
        have_complete_listing = len(segments) < \
            constraints.CONTAINER_LISTING_LIMIT

        first_byte = last_byte = None
        actual_content_length = None
        content_length_for_swob_range = None
        if req.range and len(req.range.ranges) == 1:
            content_length_for_swob_range = sum(o['bytes'] for o in segments)

            # This is a hack to handle suffix byte ranges (e.g. "bytes=-5"),
            # which we can't honor unless we have a complete listing.
            _junk, range_end = req.range.ranges_for_length(float("inf"))[0]

            # If this is all the segments, we know whether or not this
            # range request is satisfiable.
            #
            # Alternately, we may not have all the segments, but this range
            # falls entirely within the first page's segments, so we know
            # that it is satisfiable.
            if (have_complete_listing
                    or range_end < content_length_for_swob_range):
                byteranges = req.range.ranges_for_length(
                    content_length_for_swob_range)
                if not byteranges:
                    headers = {'Accept-Ranges': 'bytes'}
                    if have_complete_listing:
                        headers['Content-Range'] = 'bytes */%d' % (
                            content_length_for_swob_range, )
                    return HTTPRequestedRangeNotSatisfiable(request=req,
                                                            headers=headers)
                first_byte, last_byte = byteranges[0]
                # For some reason, swob.Range.ranges_for_length adds 1 to the
                # last byte's position.
                last_byte -= 1
                actual_content_length = last_byte - first_byte + 1
            else:
                # The range may or may not be satisfiable, but we can't tell
                # based on just one page of listing, and we're not going to go
                # get more pages because that would use up too many resources,
                # so we ignore the Range header and return the whole object.
                actual_content_length = None
                content_length_for_swob_range = None
                req.range = None

        response_headers = [(h, v) for h, v in response_headers
                            if h.lower() not in ("content-length",
                                                 "content-range")]

        if content_length_for_swob_range is not None:
            # Here, we have to give swob a big-enough content length so that
            # it can compute the actual content length based on the Range
            # header. This value will not be visible to the client; swob will
            # substitute its own Content-Length.
            #
            # Note: if the manifest points to at least CONTAINER_LISTING_LIMIT
            # segments, this may be less than the sum of all the segments'
            # sizes. However, it'll still be greater than the last byte in the
            # Range header, so it's good enough for swob.
            response_headers.append(
                ('Content-Length', str(content_length_for_swob_range)))
        elif have_complete_listing:
            actual_content_length = sum(o['bytes'] for o in segments)
            response_headers.append(
                ('Content-Length', str(actual_content_length)))

        if have_complete_listing:
            response_headers = [(h, v) for h, v in response_headers
                                if h.lower() != "etag"]
            etag = md5()
            for seg_dict in segments:
                etag.update(seg_dict['hash'].strip('"'))
            response_headers.append(('Etag', '"%s"' % etag.hexdigest()))

        app_iter = None
        if req.method == 'GET':
            listing_iter = RateLimitedIterator(
                self._segment_listing_iterator(req,
                                               version,
                                               account,
                                               container,
                                               obj_prefix,
                                               segments,
                                               first_byte=first_byte,
                                               last_byte=last_byte),
                self.dlo.rate_limit_segments_per_sec,
                limit_after=self.dlo.rate_limit_after_segment)

            app_iter = SegmentedIterable(
                req,
                self.dlo.app,
                listing_iter,
                ua_suffix="DLO MultipartGET",
                swift_source="DLO",
                name=req.path,
                logger=self.logger,
                max_get_time=self.dlo.max_get_time,
                response_body_length=actual_content_length)

            try:
                app_iter.validate_first_segment()
            except (SegmentError, ListingIterError):
                return HTTPConflict(request=req)

        resp = Response(request=req,
                        headers=response_headers,
                        conditional_response=True,
                        app_iter=app_iter)

        return resp
Beispiel #6
0
class CdnHandler(OriginBase):
    def __init__(self, app, conf, logger):
        OriginBase.__init__(self, app, conf, logger)
        self.logger = logger
        self.max_cdn_file_size = int(
            conf.get('max_cdn_file_size', 10 * 1024**3))
        self.allowed_origin_remote_ips = []
        remote_ips = conf.get('allowed_origin_remote_ips')
        if remote_ips:
            self.allowed_origin_remote_ips = \
                [ip.strip() for ip in remote_ips.split(',') if ip.strip()]
        if not bool(conf.get('incoming_url_regex')):
            raise InvalidConfiguration('Invalid config for CdnHandler')
        self.cdn_regexes = []
        for key, val in conf['incoming_url_regex'].items():
            regex = re.compile(val)
            self.cdn_regexes.append(regex)

    def _getCacheHeaders(self, ttl):
        return {
            'Expires': strftime("%a, %d %b %Y %H:%M:%S GMT",
                                gmtime(time() + ttl)),
            'Cache-Control': 'max-age=%d, public' % ttl
        }

    def _getCdnHeaders(self, req):
        headers = {'X-Web-Mode': 'True', 'User-Agent': 'SOS Origin'}
        for header in ['If-Modified-Since', 'If-Match', 'Range', 'If-Range']:
            if header in req.headers:
                headers[header] = req.headers[header]
        return headers

    def handle_request(self, env, req):
        if req.method not in ('GET', 'HEAD'):
            headers = self._getCacheHeaders(CACHE_BAD_URL)
            return HTTPMethodNotAllowed(request=req, headers=headers)
        if self.allowed_origin_remote_ips and \
                req.remote_addr not in self.allowed_origin_remote_ips:
            raise OriginRequestNotAllowed(
                'SOS Origin: Remote IP %s not allowed' % req.remote_addr)

        # allow earlier middleware to override hash and obj_name
        hsh = env.get('swift.cdn_hash')
        object_name = env.get('swift.cdn_object_name')
        if hsh is None or object_name is None:
            for regex in self.cdn_regexes:
                match_obj = regex.match(req.url)
                if match_obj:
                    match_dict = match_obj.groupdict()
                    if not hsh:
                        hsh = match_dict.get('hash')
                    if not object_name:
                        object_name = match_dict.get('object_name')
                    break
        if not hsh:
            self.logger.debug('Hash %s not found in %s' % (hsh, req.url))
            headers = self._getCacheHeaders(CACHE_BAD_URL)
            return HTTPNotFound(request=req, headers=headers)
        if hsh.find('-') >= 0:
            hsh = hsh.split('-', 1)[1]
        try:
            cdn_obj_path = self.get_hsh_obj_path(hsh)
        except ValueError, e:
            self.logger.debug('get_hsh_obj_path error: %s' % e)
            headers = self._getCacheHeaders(CACHE_BAD_URL)
            return HTTPBadRequest(request=req, headers=headers)
        hash_data = self.get_cdn_data(env, cdn_obj_path)
        if hash_data and hash_data.cdn_enabled:
            # this is a cdn enabled container, proxy req to swift
            if env.get('swift.cdn_authorize'):
                auth_resp, ttl = env['swift.cdn_authorize'](
                    env, hash_data.account.encode('utf-8'))
                if auth_resp:
                    return auth_resp(request=req,
                                     headers=self._getCacheHeaders(ttl))
            swift_path = quote('/v1/%s/%s/' %
                               (hash_data.account.encode('utf-8'),
                                hash_data.container.encode('utf-8')))
            if object_name:
                swift_path += object_name
            headers = self._getCdnHeaders(req)
            env['swift.source'] = 'SOS'
            resp = make_pre_authed_request(env,
                                           req.method,
                                           swift_path,
                                           headers=headers,
                                           agent='SwiftOrigin',
                                           swift_source='SOS').get_response(
                                               self.app)
            if resp.status_int == 301 and 'Location' in resp.headers:
                loc_parsed = urlparse(resp.headers['Location'])
                acc_cont_path = '/v1/%s/%s' % (hash_data.account.encode(
                    'utf-8'), hash_data.container.encode('utf-8'))
                if loc_parsed.path.startswith(acc_cont_path):
                    sos_loc = loc_parsed.path[len(acc_cont_path):]
                    resp = SosResponse(
                        headers=self._getCacheHeaders(hash_data.ttl))
                    resp.headers['Location'] = sos_loc
                    resp.status = 301
                    return resp
                else:
                    self.logger.exception(
                        'Unexpected Location header '
                        'returned.  %s does not begin with expected '
                        'path: %s' % (loc_parsed.geturl(), acc_cont_path))
                    return HTTPInternalServerError('Unexpected Relocation')
            if resp.status_int == 304:
                return HTTPNotModified(request=req,
                                       headers=self._getCacheHeaders(
                                           hash_data.ttl))
            if resp.status_int == 416:
                return HTTPRequestedRangeNotSatisfiable(
                    request=req, headers=self._getCacheHeaders(CACHE_404))
            if resp.status_int // 100 == 2 or resp.status_int == 404:
                if resp.content_length > self.max_cdn_file_size:
                    return HTTPBadRequest(
                        request=req, headers=self._getCacheHeaders(CACHE_404))
                cdn_resp = Response(request=req, app_iter=resp.app_iter)
                cdn_resp.status = resp.status_int
                cdn_resp.headers.update(resp.headers)
                if resp.status_int == 404:
                    cdn_resp.headers.update(self._getCacheHeaders(CACHE_404))
                else:
                    cdn_resp.headers.update(
                        self._getCacheHeaders(hash_data.ttl))
                return cdn_resp
            self.logger.error('Unexpected response from '
                              'Swift: %s, %s: %s' %
                              (resp.status, swift_path, resp.body[:40]))
        return HTTPNotFound(request=req,
                            headers=self._getCacheHeaders(CACHE_404))