Esempio n. 1
0
 def __init__(self,
              app,
              conf,
              max_manifest_segments=DEFAULT_MAX_MANIFEST_SEGMENTS,
              max_manifest_size=DEFAULT_MAX_MANIFEST_SIZE):
     self.conf = conf
     self.app = app
     self.logger = get_logger(conf, log_route='slo')
     self.max_manifest_segments = max_manifest_segments
     self.max_manifest_size = max_manifest_size
     self.max_get_time = int(self.conf.get('max_get_time', 86400))
     self.rate_limit_under_size = int(
         self.conf.get('rate_limit_under_size',
                       DEFAULT_RATE_LIMIT_UNDER_SIZE))
     self.rate_limit_after_segment = int(
         self.conf.get('rate_limit_after_segment', '10'))
     self.rate_limit_segments_per_sec = int(
         self.conf.get('rate_limit_segments_per_sec', '1'))
     self.concurrency = min(1000,
                            max(0, int(self.conf.get('concurrency', '2'))))
     delete_concurrency = int(
         self.conf.get('delete_concurrency', self.concurrency))
     self.bulk_deleter = Bulk(app, {},
                              delete_concurrency=delete_concurrency,
                              logger=self.logger)
Esempio n. 2
0
 def __init__(self, app, conf):
     self.conf = conf
     self.app = app
     self.logger = get_logger(conf, log_route='slo')
     self.max_manifest_segments = int(self.conf.get('max_manifest_segments',
                                      1000))
     self.max_manifest_size = int(self.conf.get('max_manifest_size',
                                  1024 * 1024 * 2))
     self.min_segment_size = int(self.conf.get('min_segment_size',
                                 1024 * 1024))
     self.bulk_deleter = Bulk(app, {})
Esempio n. 3
0
 def __init__(self, app, conf):
     self.conf = conf
     self.app = app
     self.logger = get_logger(conf, log_route="slo")
     self.max_manifest_segments = int(self.conf.get("max_manifest_segments", 1000))
     self.max_manifest_size = int(self.conf.get("max_manifest_size", 1024 * 1024 * 2))
     self.min_segment_size = int(self.conf.get("min_segment_size", 1024 * 1024))
     self.bulk_deleter = Bulk(app, {"max_deletes_per_request": self.max_manifest_segments})
Esempio n. 4
0
 def __init__(self,
              app,
              conf,
              min_segment_size=DEFAULT_MIN_SEGMENT_SIZE,
              max_manifest_segments=DEFAULT_MAX_MANIFEST_SEGMENTS,
              max_manifest_size=DEFAULT_MAX_MANIFEST_SIZE):
     self.conf = conf
     self.app = app
     self.logger = get_logger(conf, log_route='slo')
     self.max_manifest_segments = max_manifest_segments
     self.max_manifest_size = max_manifest_size
     self.min_segment_size = min_segment_size
     self.max_get_time = int(self.conf.get('max_get_time', 86400))
     self.rate_limit_after_segment = int(
         self.conf.get('rate_limit_after_segment', '10'))
     self.rate_limit_segments_per_sec = int(
         self.conf.get('rate_limit_segments_per_sec', '0'))
     self.bulk_deleter = Bulk(app, {}, logger=self.logger)
Esempio n. 5
0
 def __init__(self, app, conf, min_segment_size=DEFAULT_MIN_SEGMENT_SIZE,
              max_manifest_segments=DEFAULT_MAX_MANIFEST_SEGMENTS,
              max_manifest_size=DEFAULT_MAX_MANIFEST_SIZE):
     self.conf = conf
     self.app = app
     self.logger = get_logger(conf, log_route='slo')
     self.max_manifest_segments = max_manifest_segments
     self.max_manifest_size = max_manifest_size
     self.min_segment_size = min_segment_size
     self.max_get_time = int(self.conf.get('max_get_time', 86400))
     self.rate_limit_after_segment = int(self.conf.get(
         'rate_limit_after_segment', '10'))
     self.rate_limit_segments_per_sec = int(self.conf.get(
         'rate_limit_segments_per_sec', '0'))
     self.bulk_deleter = Bulk(app, {}, logger=self.logger)
Esempio n. 6
0
 def __init__(self, app, conf):
     self.conf = conf
     self.app = app
     self.logger = get_logger(conf, log_route='slo')
     self.max_manifest_segments = int(self.conf.get('max_manifest_segments',
                                      1000))
     self.max_manifest_size = int(self.conf.get('max_manifest_size',
                                  1024 * 1024 * 2))
     self.min_segment_size = int(self.conf.get('min_segment_size',
                                 1024 * 1024))
     self.max_get_time = int(self.conf.get('max_get_time', 86400))
     self.rate_limit_after_segment = int(self.conf.get(
         'rate_limit_after_segment', '10'))
     self.rate_limit_segments_per_sec = int(self.conf.get(
         'rate_limit_segments_per_sec', '0'))
     self.bulk_deleter = Bulk(app, {})
Esempio n. 7
0
 def __init__(self, app, conf,
              max_manifest_segments=DEFAULT_MAX_MANIFEST_SEGMENTS,
              max_manifest_size=DEFAULT_MAX_MANIFEST_SIZE):
     self.conf = conf
     self.app = app
     self.logger = get_logger(conf, log_route='slo')
     self.max_manifest_segments = max_manifest_segments
     self.max_manifest_size = max_manifest_size
     self.max_get_time = int(self.conf.get('max_get_time', 86400))
     self.rate_limit_under_size = int(self.conf.get(
         'rate_limit_under_size', DEFAULT_RATE_LIMIT_UNDER_SIZE))
     self.rate_limit_after_segment = int(self.conf.get(
         'rate_limit_after_segment', '10'))
     self.rate_limit_segments_per_sec = int(self.conf.get(
         'rate_limit_segments_per_sec', '1'))
     delete_concurrency = int(self.conf.get('delete_concurrency', '2'))
     self.bulk_deleter = Bulk(
         app, {}, delete_concurrency=delete_concurrency, logger=self.logger)
Esempio n. 8
0
class StaticLargeObject(object):
    """
    StaticLargeObject Middleware

    See above for a full description.

    The proxy logs created for any subrequests made will have swift.source set
    to "SLO".

    :param app: The next WSGI filter or app in the paste.deploy chain.
    :param conf: The configuration dict for the middleware.
    """

    def __init__(self, app, conf, min_segment_size=DEFAULT_MIN_SEGMENT_SIZE,
                 max_manifest_segments=DEFAULT_MAX_MANIFEST_SEGMENTS,
                 max_manifest_size=DEFAULT_MAX_MANIFEST_SIZE):
        self.conf = conf
        self.app = app
        self.logger = get_logger(conf, log_route='slo')
        self.max_manifest_segments = max_manifest_segments
        self.max_manifest_size = max_manifest_size
        self.min_segment_size = min_segment_size
        self.max_get_time = int(self.conf.get('max_get_time', 86400))
        self.rate_limit_after_segment = int(self.conf.get(
            'rate_limit_after_segment', '10'))
        self.rate_limit_segments_per_sec = int(self.conf.get(
            'rate_limit_segments_per_sec', '0'))
        self.bulk_deleter = Bulk(app, {}, logger=self.logger)

    def handle_multipart_get_or_head(self, req, start_response):
        """
        Handles the GET or HEAD of a SLO manifest.

        The response body (only on GET, of course) will consist of the
        concatenation of the segments.

        :params req: a swob.Request with a path referencing an object
        :raises: HttpException on errors
        """
        return SloGetContext(self).handle_slo_get_or_head(req, start_response)

    def copy_hook(self, inner_hook):

        def slo_hook(source_req, source_resp, sink_req):
            x_slo = source_resp.headers.get('X-Static-Large-Object')
            if (config_true_value(x_slo)
                    and source_req.params.get('multipart-manifest') != 'get'
                    and 'swift.post_as_copy' not in source_req.environ):
                source_resp = SloGetContext(self).get_or_head_response(
                    source_req, source_resp.headers.items(),
                    source_resp.app_iter)
            return inner_hook(source_req, source_resp, sink_req)

        return slo_hook

    def handle_multipart_put(self, req, start_response):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input. Uses WSGIContext to
        call self and start_response and returns a WSGI iterator.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app(req.environ, start_response)
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge(
                "Manifest File > %d bytes" % self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be COPY requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_and_validate_input(
            req.body_file.read(self.max_manifest_size),
            req.path, self.min_segment_size)
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number of segments must be <= %d' %
                self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        slo_etag = md5()
        last_obj_path = None
        for index, seg_dict in enumerate(parsed_data):
            obj_name = seg_dict['path']
            if isinstance(obj_name, six.text_type):
                obj_name = obj_name.encode('utf-8')
            obj_path = '/'.join(['', vrs, account, obj_name.lstrip('/')])

            new_env = req.environ.copy()
            new_env['PATH_INFO'] = obj_path
            new_env['REQUEST_METHOD'] = 'HEAD'
            new_env['swift.source'] = 'SLO'
            del(new_env['wsgi.input'])
            del(new_env['QUERY_STRING'])
            new_env['CONTENT_LENGTH'] = 0
            new_env['HTTP_USER_AGENT'] = \
                '%s MultipartPUT' % req.environ.get('HTTP_USER_AGENT')
            if obj_path != last_obj_path:
                last_obj_path = obj_path
                head_seg_resp = \
                    Request.blank(obj_path, new_env).get_response(self)

            if head_seg_resp.is_success:
                segment_length = head_seg_resp.content_length
                if seg_dict.get('range'):
                    # Since we now know the length, we can normalize the
                    # range. We know that there is exactly one range
                    # requested since we checked that earlier in
                    # parse_and_validate_input().
                    ranges = seg_dict['range'].ranges_for_length(
                        head_seg_resp.content_length)

                    if not ranges:
                        problem_segments.append([quote(obj_name),
                                                 'Unsatisfiable Range'])
                    elif ranges == [(0, head_seg_resp.content_length)]:
                        # Just one range, and it exactly matches the object.
                        # Why'd we do this again?
                        del seg_dict['range']
                        segment_length = head_seg_resp.content_length
                    else:
                        rng = ranges[0]
                        seg_dict['range'] = '%d-%d' % (rng[0], rng[1] - 1)
                        segment_length = rng[1] - rng[0]

                if segment_length < self.min_segment_size and \
                        index < len(parsed_data) - 1:
                    problem_segments.append(
                        [quote(obj_name),
                         'Too small; each segment, except the last, must be '
                         'at least %d bytes.' % self.min_segment_size])
                total_size += segment_length
                if seg_dict['size_bytes'] is not None and \
                        seg_dict['size_bytes'] != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_name), 'Size Mismatch'])
                if seg_dict['etag'] is None or \
                        seg_dict['etag'] == head_seg_resp.etag:
                    if seg_dict.get('range'):
                        slo_etag.update('%s:%s;' % (head_seg_resp.etag,
                                                    seg_dict['range']))
                    else:
                        slo_etag.update(head_seg_resp.etag)
                else:
                    problem_segments.append([quote(obj_name), 'Etag Mismatch'])
                if head_seg_resp.last_modified:
                    last_modified = head_seg_resp.last_modified
                else:
                    # shouldn't happen
                    last_modified = datetime.now()

                last_modified_formatted = \
                    last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
                seg_data = {'name': '/' + seg_dict['path'].lstrip('/'),
                            'bytes': head_seg_resp.content_length,
                            'hash': head_seg_resp.etag,
                            'content_type': head_seg_resp.content_type,
                            'last_modified': last_modified_formatted}
                if seg_dict.get('range'):
                    seg_data['range'] = seg_dict['range']

                if config_true_value(
                        head_seg_resp.headers.get('X-Static-Large-Object')):
                    seg_data['sub_slo'] = True
                data_for_storage.append(seg_data)

            else:
                problem_segments.append([quote(obj_name),
                                         head_seg_resp.status])
        if problem_segments:
            resp_body = get_response_body(
                out_content_type, {}, problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)
        env = req.environ

        if not env.get('CONTENT_TYPE'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream'
        env['swift.content_type_overridden'] = True
        env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size
        env['HTTP_X_STATIC_LARGE_OBJECT'] = 'True'
        json_data = json.dumps(data_for_storage)
        if six.PY3:
            json_data = json_data.encode('utf-8')
        env['CONTENT_LENGTH'] = str(len(json_data))
        env['wsgi.input'] = BytesIO(json_data)

        slo_put_context = SloPutContext(self, slo_etag)
        return slo_put_context.handle_slo_put(req, start_response)

    def get_segments_to_delete_iter(self, req):
        """
        A generator function to be used to delete all the segments and
        sub-segments referenced in a manifest.

        :params req: a swob.Request with an SLO manifest in path
        :raises HTTPPreconditionFailed: on invalid UTF8 in request path
        :raises HTTPBadRequest: on too many buffered sub segments and
                                on invalid SLO manifest path
        """
        if not check_utf8(req.path_info):
            raise HTTPPreconditionFailed(
                request=req, body='Invalid UTF8 or contains NULL')
        vrs, account, container, obj = req.split_path(4, 4, True)

        segments = [{
            'sub_slo': True,
            'name': ('/%s/%s' % (container, obj)).decode('utf-8')}]
        while segments:
            if len(segments) > MAX_BUFFERED_SLO_SEGMENTS:
                raise HTTPBadRequest(
                    'Too many buffered slo segments to delete.')
            seg_data = segments.pop(0)
            if seg_data.get('sub_slo'):
                try:
                    segments.extend(
                        self.get_slo_segments(seg_data['name'], req))
                except HTTPException as err:
                    # allow bulk delete response to report errors
                    seg_data['error'] = {'code': err.status_int,
                                         'message': err.body}

                # add manifest back to be deleted after segments
                seg_data['sub_slo'] = False
                segments.append(seg_data)
            else:
                seg_data['name'] = seg_data['name'].encode('utf-8')
                yield seg_data

    def get_slo_segments(self, obj_name, req):
        """
        Performs a swob.Request and returns the SLO manifest's segments.

        :raises HTTPServerError: on unable to load obj_name or
                                 on unable to load the SLO manifest data.
        :raises HTTPBadRequest: on not an SLO manifest
        :raises HTTPNotFound: on SLO manifest not found
        :returns: SLO manifest's segments
        """
        vrs, account, _junk = req.split_path(2, 3, True)
        new_env = req.environ.copy()
        new_env['REQUEST_METHOD'] = 'GET'
        del(new_env['wsgi.input'])
        new_env['QUERY_STRING'] = 'multipart-manifest=get'
        new_env['CONTENT_LENGTH'] = 0
        new_env['HTTP_USER_AGENT'] = \
            '%s MultipartDELETE' % new_env.get('HTTP_USER_AGENT')
        new_env['swift.source'] = 'SLO'
        new_env['PATH_INFO'] = (
            '/%s/%s/%s' % (vrs, account, obj_name.lstrip('/'))
        ).encode('utf-8')
        resp = Request.blank('', new_env).get_response(self.app)

        if resp.is_success:
            if config_true_value(resp.headers.get('X-Static-Large-Object')):
                try:
                    return json.loads(resp.body)
                except ValueError:
                    raise HTTPServerError('Unable to load SLO manifest')
            else:
                raise HTTPBadRequest('Not an SLO manifest')
        elif resp.status_int == HTTP_NOT_FOUND:
            raise HTTPNotFound('SLO manifest not found')
        elif resp.status_int == HTTP_UNAUTHORIZED:
            raise HTTPUnauthorized('401 Unauthorized')
        else:
            raise HTTPServerError('Unable to load SLO manifest or segment.')

    def handle_multipart_delete(self, req):
        """
        Will delete all the segments in the SLO manifest and then, if
        successful, will delete the manifest file.

        :params req: a swob.Request with an obj in path
        :returns: swob.Response whose app_iter set to Bulk.handle_delete_iter
        """
        req.headers['Content-Type'] = None  # Ignore content-type from client
        resp = HTTPOk(request=req)
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if out_content_type:
            resp.content_type = out_content_type
        resp.app_iter = self.bulk_deleter.handle_delete_iter(
            req, objs_to_delete=self.get_segments_to_delete_iter(req),
            user_agent='MultipartDELETE', swift_source='SLO',
            out_content_type=out_content_type)
        return resp

    def __call__(self, env, start_response):
        """
        WSGI entry point
        """
        req = Request(env)
        try:
            vrs, account, container, obj = req.split_path(4, 4, True)
        except ValueError:
            return self.app(env, start_response)

        # install our COPY-callback hook
        env['swift.copy_hook'] = self.copy_hook(
            env.get('swift.copy_hook',
                    lambda src_req, src_resp, sink_req: src_resp))

        try:
            if req.method == 'PUT' and \
                    req.params.get('multipart-manifest') == 'put':
                return self.handle_multipart_put(req, start_response)
            if req.method == 'DELETE' and \
                    req.params.get('multipart-manifest') == 'delete':
                return self.handle_multipart_delete(req)(env, start_response)
            if req.method == 'GET' or req.method == 'HEAD':
                return self.handle_multipart_get_or_head(req, start_response)
            if 'X-Static-Large-Object' in req.headers:
                raise HTTPBadRequest(
                    request=req,
                    body='X-Static-Large-Object is a reserved header. '
                    'To create a static large object add query param '
                    'multipart-manifest=put.')
        except HTTPException as err_resp:
            return err_resp(env, start_response)

        return self.app(env, start_response)
Esempio n. 9
0
class StaticLargeObject(object):
    """
    StaticLargeObject Middleware

    See above for a full description.

    The proxy logs created for any subrequests made will have swift.source set
    to "SLO".

    :param app: The next WSGI filter or app in the paste.deploy chain.
    :param conf: The configuration dict for the middleware.
    """
    def __init__(self,
                 app,
                 conf,
                 max_manifest_segments=DEFAULT_MAX_MANIFEST_SEGMENTS,
                 max_manifest_size=DEFAULT_MAX_MANIFEST_SIZE):
        self.conf = conf
        self.app = app
        self.logger = get_logger(conf, log_route='slo')
        self.max_manifest_segments = max_manifest_segments
        self.max_manifest_size = max_manifest_size
        self.max_get_time = int(self.conf.get('max_get_time', 86400))
        self.rate_limit_under_size = int(
            self.conf.get('rate_limit_under_size',
                          DEFAULT_RATE_LIMIT_UNDER_SIZE))
        self.rate_limit_after_segment = int(
            self.conf.get('rate_limit_after_segment', '10'))
        self.rate_limit_segments_per_sec = int(
            self.conf.get('rate_limit_segments_per_sec', '1'))
        self.concurrency = min(1000,
                               max(0, int(self.conf.get('concurrency', '2'))))
        delete_concurrency = int(
            self.conf.get('delete_concurrency', self.concurrency))
        self.bulk_deleter = Bulk(app, {},
                                 delete_concurrency=delete_concurrency,
                                 logger=self.logger)

    def handle_multipart_get_or_head(self, req, start_response):
        """
        Handles the GET or HEAD of a SLO manifest.

        The response body (only on GET, of course) will consist of the
        concatenation of the segments.

        :params req: a swob.Request with a path referencing an object
        :raises: HttpException on errors
        """
        return SloGetContext(self).handle_slo_get_or_head(req, start_response)

    def handle_multipart_put(self, req, start_response):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input. Uses WSGIContext to
        call self and start_response and returns a WSGI iterator.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app(req.environ, start_response)
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge("Manifest File > %d bytes" %
                                            self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be COPY requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_and_validate_input(
            req.body_file.read(self.max_manifest_size), req.path)
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number of segments must be <= %d' %
                self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        path2indices = defaultdict(list)
        for index, seg_dict in enumerate(parsed_data):
            path2indices[seg_dict['path']].append(index)

        def do_head(obj_name):
            obj_path = '/'.join(
                ['', vrs, account,
                 get_valid_utf8_str(obj_name).lstrip('/')])

            sub_req = make_subrequest(
                req.environ,
                path=obj_path + '?',  # kill the query string
                method='HEAD',
                headers={'x-auth-token': req.headers.get('x-auth-token')},
                agent='%(orig)s SLO MultipartPUT',
                swift_source='SLO')
            return obj_name, sub_req.get_response(self)

        def validate_seg_dict(seg_dict, head_seg_resp):
            if not head_seg_resp.is_success:
                problem_segments.append(
                    [quote(obj_name), head_seg_resp.status])
                return 0, None

            segment_length = head_seg_resp.content_length
            if seg_dict.get('range'):
                # Since we now know the length, we can normalize the
                # range. We know that there is exactly one range
                # requested since we checked that earlier in
                # parse_and_validate_input().
                ranges = seg_dict['range'].ranges_for_length(
                    head_seg_resp.content_length)

                if not ranges:
                    problem_segments.append(
                        [quote(obj_name), 'Unsatisfiable Range'])
                elif ranges == [(0, head_seg_resp.content_length)]:
                    # Just one range, and it exactly matches the object.
                    # Why'd we do this again?
                    del seg_dict['range']
                    segment_length = head_seg_resp.content_length
                else:
                    rng = ranges[0]
                    seg_dict['range'] = '%d-%d' % (rng[0], rng[1] - 1)
                    segment_length = rng[1] - rng[0]

            if segment_length < 1:
                problem_segments.append([
                    quote(obj_name),
                    'Too small; each segment must be at least 1 byte.'
                ])
            if seg_dict['size_bytes'] is not None and \
                    seg_dict['size_bytes'] != head_seg_resp.content_length:
                problem_segments.append([quote(obj_name), 'Size Mismatch'])
            if seg_dict['etag'] is not None and \
                    seg_dict['etag'] != head_seg_resp.etag:
                problem_segments.append([quote(obj_name), 'Etag Mismatch'])
            if head_seg_resp.last_modified:
                last_modified = head_seg_resp.last_modified
            else:
                # shouldn't happen
                last_modified = datetime.now()

            last_modified_formatted = \
                last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
            seg_data = {
                'name': '/' + seg_dict['path'].lstrip('/'),
                'bytes': head_seg_resp.content_length,
                'hash': head_seg_resp.etag,
                'content_type': head_seg_resp.content_type,
                'last_modified': last_modified_formatted
            }
            if seg_dict.get('range'):
                seg_data['range'] = seg_dict['range']
            if config_true_value(
                    head_seg_resp.headers.get('X-Static-Large-Object')):
                seg_data['sub_slo'] = True
            return segment_length, seg_data

        data_for_storage = [None] * len(parsed_data)
        with StreamingPile(self.concurrency) as pile:
            for obj_name, resp in pile.asyncstarmap(
                    do_head, ((path, ) for path in path2indices)):
                for i in path2indices[obj_name]:
                    segment_length, seg_data = validate_seg_dict(
                        parsed_data[i], resp)
                    data_for_storage[i] = seg_data
                    total_size += segment_length

        if problem_segments:
            resp_body = get_response_body(out_content_type, {},
                                          problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)

        slo_etag = md5()
        for seg_data in data_for_storage:
            if seg_data.get('range'):
                slo_etag.update('%s:%s;' %
                                (seg_data['hash'], seg_data['range']))
            else:
                slo_etag.update(seg_data['hash'])

        slo_etag = slo_etag.hexdigest()
        req.headers.update({
            SYSMETA_SLO_ETAG: slo_etag,
            SYSMETA_SLO_SIZE: total_size,
            'X-Static-Large-Object': 'True',
        })

        json_data = json.dumps(data_for_storage)
        if six.PY3:
            json_data = json_data.encode('utf-8')
        req.body = json_data

        env = req.environ
        if not env.get('CONTENT_TYPE'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream'
        env['swift.content_type_overridden'] = True
        env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size

        def start_response_wrapper(status, headers, exc_info=None):
            for i, (header, _value) in enumerate(headers):
                if header.lower() == 'etag':
                    headers[i] = ('Etag', '"%s"' % slo_etag)
                    break
            return start_response(status, headers, exc_info)

        return self.app(env, start_response_wrapper)

    def get_segments_to_delete_iter(self, req):
        """
        A generator function to be used to delete all the segments and
        sub-segments referenced in a manifest.

        :params req: a swob.Request with an SLO manifest in path
        :raises HTTPPreconditionFailed: on invalid UTF8 in request path
        :raises HTTPBadRequest: on too many buffered sub segments and
                                on invalid SLO manifest path
        """
        if not check_utf8(req.path_info):
            raise HTTPPreconditionFailed(request=req,
                                         body='Invalid UTF8 or contains NULL')
        vrs, account, container, obj = req.split_path(4, 4, True)

        segments = [{
            'sub_slo': True,
            'name': ('/%s/%s' % (container, obj)).decode('utf-8')
        }]
        while segments:
            if len(segments) > MAX_BUFFERED_SLO_SEGMENTS:
                raise HTTPBadRequest(
                    'Too many buffered slo segments to delete.')
            seg_data = segments.pop(0)
            if seg_data.get('sub_slo'):
                try:
                    segments.extend(
                        self.get_slo_segments(seg_data['name'], req))
                except HTTPException as err:
                    # allow bulk delete response to report errors
                    seg_data['error'] = {
                        'code': err.status_int,
                        'message': err.body
                    }

                # add manifest back to be deleted after segments
                seg_data['sub_slo'] = False
                segments.append(seg_data)
            else:
                seg_data['name'] = seg_data['name'].encode('utf-8')
                yield seg_data

    def get_slo_segments(self, obj_name, req):
        """
        Performs a swob.Request and returns the SLO manifest's segments.

        :raises HTTPServerError: on unable to load obj_name or
                                 on unable to load the SLO manifest data.
        :raises HTTPBadRequest: on not an SLO manifest
        :raises HTTPNotFound: on SLO manifest not found
        :returns: SLO manifest's segments
        """
        vrs, account, _junk = req.split_path(2, 3, True)
        new_env = req.environ.copy()
        new_env['REQUEST_METHOD'] = 'GET'
        del (new_env['wsgi.input'])
        new_env['QUERY_STRING'] = 'multipart-manifest=get'
        new_env['CONTENT_LENGTH'] = 0
        new_env['HTTP_USER_AGENT'] = \
            '%s MultipartDELETE' % new_env.get('HTTP_USER_AGENT')
        new_env['swift.source'] = 'SLO'
        new_env['PATH_INFO'] = (
            '/%s/%s/%s' % (vrs, account, obj_name.lstrip('/'))).encode('utf-8')
        resp = Request.blank('', new_env).get_response(self.app)

        if resp.is_success:
            if config_true_value(resp.headers.get('X-Static-Large-Object')):
                try:
                    return json.loads(resp.body)
                except ValueError:
                    raise HTTPServerError('Unable to load SLO manifest')
            else:
                raise HTTPBadRequest('Not an SLO manifest')
        elif resp.status_int == HTTP_NOT_FOUND:
            raise HTTPNotFound('SLO manifest not found')
        elif resp.status_int == HTTP_UNAUTHORIZED:
            raise HTTPUnauthorized('401 Unauthorized')
        else:
            raise HTTPServerError('Unable to load SLO manifest or segment.')

    def handle_multipart_delete(self, req):
        """
        Will delete all the segments in the SLO manifest and then, if
        successful, will delete the manifest file.

        :params req: a swob.Request with an obj in path
        :returns: swob.Response whose app_iter set to Bulk.handle_delete_iter
        """
        req.headers['Content-Type'] = None  # Ignore content-type from client
        resp = HTTPOk(request=req)
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if out_content_type:
            resp.content_type = out_content_type
        resp.app_iter = self.bulk_deleter.handle_delete_iter(
            req,
            objs_to_delete=self.get_segments_to_delete_iter(req),
            user_agent='MultipartDELETE',
            swift_source='SLO',
            out_content_type=out_content_type)
        return resp

    def __call__(self, env, start_response):
        """
        WSGI entry point
        """
        if env.get('swift.slo_override'):
            return self.app(env, start_response)

        req = Request(env)
        try:
            vrs, account, container, obj = req.split_path(4, 4, True)
        except ValueError:
            return self.app(env, start_response)

        try:
            if req.method == 'PUT' and \
                    req.params.get('multipart-manifest') == 'put':
                return self.handle_multipart_put(req, start_response)
            if req.method == 'DELETE' and \
                    req.params.get('multipart-manifest') == 'delete':
                return self.handle_multipart_delete(req)(env, start_response)
            if req.method == 'GET' or req.method == 'HEAD':
                return self.handle_multipart_get_or_head(req, start_response)
            if 'X-Static-Large-Object' in req.headers:
                raise HTTPBadRequest(
                    request=req,
                    body='X-Static-Large-Object is a reserved header. '
                    'To create a static large object add query param '
                    'multipart-manifest=put.')
        except HTTPException as err_resp:
            return err_resp(env, start_response)

        return self.app(env, start_response)
Esempio n. 10
0
class StaticLargeObject(object):
    """
    StaticLargeObject Middleware

    See above for a full description.

    The proxy logs created for any subrequests made will have swift.source set
    to "SLO".

    :param app: The next WSGI filter or app in the paste.deploy chain.
    :param conf: The configuration dict for the middleware.
    """

    def __init__(self, app, conf):
        self.conf = conf
        self.app = app
        self.logger = get_logger(conf, log_route='slo')
        self.max_manifest_segments = int(self.conf.get('max_manifest_segments',
                                         1000))
        self.max_manifest_size = int(self.conf.get('max_manifest_size',
                                     1024 * 1024 * 2))
        self.min_segment_size = int(self.conf.get('min_segment_size',
                                    1024 * 1024))
        self.bulk_deleter = Bulk(
            app, {'max_deletes_per_request': self.max_manifest_segments})

    def handle_multipart_put(self, req):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge(
                "Manifest File > %d bytes" % self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be Copy requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_input(req.body_file.read(self.max_manifest_size))
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number segments must be <= %d' % self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        for index, seg_dict in enumerate(parsed_data):
            obj_path = '/'.join(
                ['', vrs, account, seg_dict['path'].lstrip('/')])
            try:
                seg_size = int(seg_dict['size_bytes'])
            except (ValueError, TypeError):
                raise HTTPBadRequest('Invalid Manifest File')
            if seg_size < self.min_segment_size and \
                    (index == 0 or index < len(parsed_data) - 1):
                raise HTTPBadRequest(
                    'Each segment, except the last, must be larger than '
                    '%d bytes.' % self.min_segment_size)

            new_env = req.environ.copy()
            if isinstance(obj_path, unicode):
                obj_path = obj_path.encode('utf-8')
            new_env['PATH_INFO'] = obj_path
            new_env['REQUEST_METHOD'] = 'HEAD'
            new_env['swift.source'] = 'SLO'
            del(new_env['wsgi.input'])
            del(new_env['QUERY_STRING'])
            new_env['CONTENT_LENGTH'] = 0
            new_env['HTTP_USER_AGENT'] = \
                '%s MultipartPUT' % req.environ.get('HTTP_USER_AGENT')
            head_seg_resp = \
                Request.blank(obj_path, new_env).get_response(self.app)
            if head_seg_resp.status_int // 100 == 2:
                total_size += seg_size
                if seg_size != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_path), 'Size Mismatch'])
                if seg_dict['etag'] != head_seg_resp.etag:
                    problem_segments.append([quote(obj_path), 'Etag Mismatch'])
                if head_seg_resp.last_modified:
                    last_modified = head_seg_resp.last_modified
                else:
                    # shouldn't happen
                    last_modified = datetime.now()

                last_modified_formatted = \
                    last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
                data_for_storage.append(
                    {'name': '/' + seg_dict['path'].lstrip('/'),
                     'bytes': seg_size,
                     'hash': seg_dict['etag'],
                     'content_type': head_seg_resp.content_type,
                     'last_modified': last_modified_formatted})

            else:
                problem_segments.append([quote(obj_path),
                                         head_seg_resp.status])
        if problem_segments:
            resp_body = get_response_body(
                out_content_type, {}, problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)
        env = req.environ

        if not env.get('CONTENT_TYPE'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream'
        env['swift.content_type_overriden'] = True
        env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size
        env['HTTP_X_STATIC_LARGE_OBJECT'] = 'True'
        json_data = json.dumps(data_for_storage)
        env['CONTENT_LENGTH'] = str(len(json_data))
        env['wsgi.input'] = StringIO(json_data)
        return self.app

    def handle_multipart_delete(self, req):
        """
        Will delete all the segments in the SLO manifest and then, if
        successful, will delete the manifest file.
        :params req: a swob.Request with an obj in path
        :raises HTTPServerError: on invalid manifest
        :returns: swob.Response on failure, otherwise self.app
        """
        new_env = req.environ.copy()
        new_env['REQUEST_METHOD'] = 'GET'
        del(new_env['wsgi.input'])
        new_env['QUERY_STRING'] = 'multipart-manifest=get'
        new_env['CONTENT_LENGTH'] = 0
        new_env['HTTP_USER_AGENT'] = \
            '%s MultipartDELETE' % req.environ.get('HTTP_USER_AGENT')
        new_env['swift.source'] = 'SLO'
        get_man_resp = \
            Request.blank('', new_env).get_response(self.app)
        if get_man_resp.status_int // 100 == 2:
            if not config_true_value(
                    get_man_resp.headers.get('X-Static-Large-Object')):
                raise HTTPBadRequest('Not an SLO manifest')
            try:
                manifest = json.loads(get_man_resp.body)
            except ValueError:
                raise HTTPServerError('Invalid manifest file')
            delete_resp = self.bulk_deleter.handle_delete(
                req,
                objs_to_delete=[o['name'].encode('utf-8') for o in manifest],
                user_agent='MultipartDELETE', swift_source='SLO')
            if delete_resp.status_int // 100 == 2:
                # delete the manifest file itself
                return self.app
            else:
                return delete_resp
        return get_man_resp

    @wsgify
    def __call__(self, req):
        """
        WSGI entry point
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app
        if obj:
            if req.method == 'PUT' and \
                    req.params.get('multipart-manifest') == 'put':
                return self.handle_multipart_put(req)
            if req.method == 'DELETE' and \
                    req.params.get('multipart-manifest') == 'delete':
                return self.handle_multipart_delete(req)
            if 'X-Static-Large-Object' in req.headers:
                raise HTTPBadRequest(
                    request=req,
                    body='X-Static-Large-Object is a reserved header. '
                    'To create a static large object add query param '
                    'multipart-manifest=put.')

        return self.app
Esempio n. 11
0
class StaticLargeObject(object):
    """
    StaticLargeObject Middleware

    See above for a full description.

    The proxy logs created for any subrequests made will have swift.source set
    to "SLO".

    :param app: The next WSGI filter or app in the paste.deploy chain.
    :param conf: The configuration dict for the middleware.
    """
    def __init__(self,
                 app,
                 conf,
                 min_segment_size=DEFAULT_MIN_SEGMENT_SIZE,
                 max_manifest_segments=DEFAULT_MAX_MANIFEST_SEGMENTS,
                 max_manifest_size=DEFAULT_MAX_MANIFEST_SIZE):
        self.conf = conf
        self.app = app
        self.logger = get_logger(conf, log_route='slo')
        self.max_manifest_segments = max_manifest_segments
        self.max_manifest_size = max_manifest_size
        self.min_segment_size = min_segment_size
        self.max_get_time = int(self.conf.get('max_get_time', 86400))
        self.rate_limit_after_segment = int(
            self.conf.get('rate_limit_after_segment', '10'))
        self.rate_limit_segments_per_sec = int(
            self.conf.get('rate_limit_segments_per_sec', '0'))
        self.bulk_deleter = Bulk(app, {}, logger=self.logger)

    def handle_multipart_get_or_head(self, req, start_response):
        """
        Handles the GET or HEAD of a SLO manifest.

        The response body (only on GET, of course) will consist of the
        concatenation of the segments.

        :params req: a swob.Request with a path referencing an object
        :raises: HttpException on errors
        """
        return SloGetContext(self).handle_slo_get_or_head(req, start_response)

    def copy_hook(self, inner_hook):
        def slo_hook(source_req, source_resp, sink_req):
            x_slo = source_resp.headers.get('X-Static-Large-Object')
            if (config_true_value(x_slo)
                    and source_req.params.get('multipart-manifest') != 'get'):
                source_resp = SloGetContext(self).get_or_head_response(
                    source_req, source_resp.headers.items(),
                    source_resp.app_iter)
            return inner_hook(source_req, source_resp, sink_req)

        return slo_hook

    def handle_multipart_put(self, req, start_response):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input. Uses WSGIContext to
        call self and start_response and returns a WSGI iterator.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app(req.environ, start_response)
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge("Manifest File > %d bytes" %
                                            self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be COPY requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_input(req.body_file.read(self.max_manifest_size))
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number of segments must be <= %d' %
                self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        slo_etag = md5()
        for index, seg_dict in enumerate(parsed_data):
            obj_name = seg_dict['path']
            if isinstance(obj_name, unicode):
                obj_name = obj_name.encode('utf-8')
            obj_path = '/'.join(['', vrs, account, obj_name.lstrip('/')])
            try:
                seg_size = int(seg_dict['size_bytes'])
            except (ValueError, TypeError):
                raise HTTPBadRequest('Invalid Manifest File')
            if seg_size < self.min_segment_size and \
                    (index == 0 or index < len(parsed_data) - 1):
                raise HTTPBadRequest(
                    'Each segment, except the last, must be at least '
                    '%d bytes.' % self.min_segment_size)

            new_env = req.environ.copy()
            new_env['PATH_INFO'] = obj_path
            new_env['REQUEST_METHOD'] = 'HEAD'
            new_env['swift.source'] = 'SLO'
            del (new_env['wsgi.input'])
            del (new_env['QUERY_STRING'])
            new_env['CONTENT_LENGTH'] = 0
            new_env['HTTP_USER_AGENT'] = \
                '%s MultipartPUT' % req.environ.get('HTTP_USER_AGENT')
            head_seg_resp = \
                Request.blank(obj_path, new_env).get_response(self)
            if head_seg_resp.is_success:
                total_size += seg_size
                if seg_size != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_name), 'Size Mismatch'])
                if seg_dict['etag'] == head_seg_resp.etag:
                    slo_etag.update(seg_dict['etag'])
                else:
                    problem_segments.append([quote(obj_name), 'Etag Mismatch'])
                if head_seg_resp.last_modified:
                    last_modified = head_seg_resp.last_modified
                else:
                    # shouldn't happen
                    last_modified = datetime.now()

                last_modified_formatted = \
                    last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
                seg_data = {
                    'name': '/' + seg_dict['path'].lstrip('/'),
                    'bytes': seg_size,
                    'hash': seg_dict['etag'],
                    'content_type': head_seg_resp.content_type,
                    'last_modified': last_modified_formatted
                }
                if config_true_value(
                        head_seg_resp.headers.get('X-Static-Large-Object')):
                    seg_data['sub_slo'] = True
                data_for_storage.append(seg_data)

            else:
                problem_segments.append(
                    [quote(obj_name), head_seg_resp.status])
        if problem_segments:
            resp_body = get_response_body(out_content_type, {},
                                          problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)
        env = req.environ

        if not env.get('CONTENT_TYPE'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream'
        env['swift.content_type_overridden'] = True
        env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size
        env['HTTP_X_STATIC_LARGE_OBJECT'] = 'True'
        json_data = json.dumps(data_for_storage)
        env['CONTENT_LENGTH'] = str(len(json_data))
        env['wsgi.input'] = StringIO(json_data)

        slo_put_context = SloPutContext(self, slo_etag)
        return slo_put_context.handle_slo_put(req, start_response)

    def get_segments_to_delete_iter(self, req):
        """
        A generator function to be used to delete all the segments and
        sub-segments referenced in a manifest.

        :params req: a swob.Request with an SLO manifest in path
        :raises HTTPPreconditionFailed: on invalid UTF8 in request path
        :raises HTTPBadRequest: on too many buffered sub segments and
                                on invalid SLO manifest path
        """
        if not check_utf8(req.path_info):
            raise HTTPPreconditionFailed(request=req,
                                         body='Invalid UTF8 or contains NULL')
        vrs, account, container, obj = req.split_path(4, 4, True)

        segments = [{
            'sub_slo': True,
            'name': ('/%s/%s' % (container, obj)).decode('utf-8')
        }]
        while segments:
            if len(segments) > MAX_BUFFERED_SLO_SEGMENTS:
                raise HTTPBadRequest(
                    'Too many buffered slo segments to delete.')
            seg_data = segments.pop(0)
            if seg_data.get('sub_slo'):
                try:
                    segments.extend(
                        self.get_slo_segments(seg_data['name'], req))
                except HTTPException as err:
                    # allow bulk delete response to report errors
                    seg_data['error'] = {
                        'code': err.status_int,
                        'message': err.body
                    }

                # add manifest back to be deleted after segments
                seg_data['sub_slo'] = False
                segments.append(seg_data)
            else:
                seg_data['name'] = seg_data['name'].encode('utf-8')
                yield seg_data

    def get_slo_segments(self, obj_name, req):
        """
        Performs a swob.Request and returns the SLO manifest's segments.

        :raises HTTPServerError: on unable to load obj_name or
                                 on unable to load the SLO manifest data.
        :raises HTTPBadRequest: on not an SLO manifest
        :raises HTTPNotFound: on SLO manifest not found
        :returns: SLO manifest's segments
        """
        vrs, account, _junk = req.split_path(2, 3, True)
        new_env = req.environ.copy()
        new_env['REQUEST_METHOD'] = 'GET'
        del (new_env['wsgi.input'])
        new_env['QUERY_STRING'] = 'multipart-manifest=get'
        new_env['CONTENT_LENGTH'] = 0
        new_env['HTTP_USER_AGENT'] = \
            '%s MultipartDELETE' % new_env.get('HTTP_USER_AGENT')
        new_env['swift.source'] = 'SLO'
        new_env['PATH_INFO'] = (
            '/%s/%s/%s' % (vrs, account, obj_name.lstrip('/'))).encode('utf-8')
        resp = Request.blank('', new_env).get_response(self.app)

        if resp.is_success:
            if config_true_value(resp.headers.get('X-Static-Large-Object')):
                try:
                    return json.loads(resp.body)
                except ValueError:
                    raise HTTPServerError('Unable to load SLO manifest')
            else:
                raise HTTPBadRequest('Not an SLO manifest')
        elif resp.status_int == HTTP_NOT_FOUND:
            raise HTTPNotFound('SLO manifest not found')
        elif resp.status_int == HTTP_UNAUTHORIZED:
            raise HTTPUnauthorized('401 Unauthorized')
        else:
            raise HTTPServerError('Unable to load SLO manifest or segment.')

    def handle_multipart_delete(self, req):
        """
        Will delete all the segments in the SLO manifest and then, if
        successful, will delete the manifest file.

        :params req: a swob.Request with an obj in path
        :returns: swob.Response whose app_iter set to Bulk.handle_delete_iter
        """
        resp = HTTPOk(request=req)
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if out_content_type:
            resp.content_type = out_content_type
        resp.app_iter = self.bulk_deleter.handle_delete_iter(
            req,
            objs_to_delete=self.get_segments_to_delete_iter(req),
            user_agent='MultipartDELETE',
            swift_source='SLO',
            out_content_type=out_content_type)
        return resp

    def __call__(self, env, start_response):
        """
        WSGI entry point
        """
        req = Request(env)
        try:
            vrs, account, container, obj = req.split_path(4, 4, True)
        except ValueError:
            return self.app(env, start_response)

        # install our COPY-callback hook
        env['swift.copy_hook'] = self.copy_hook(
            env.get('swift.copy_hook',
                    lambda src_req, src_resp, sink_req: src_resp))

        try:
            if req.method == 'PUT' and \
                    req.params.get('multipart-manifest') == 'put':
                return self.handle_multipart_put(req, start_response)
            if req.method == 'DELETE' and \
                    req.params.get('multipart-manifest') == 'delete':
                return self.handle_multipart_delete(req)(env, start_response)
            if req.method == 'GET' or req.method == 'HEAD':
                return self.handle_multipart_get_or_head(req, start_response)
            if 'X-Static-Large-Object' in req.headers:
                raise HTTPBadRequest(
                    request=req,
                    body='X-Static-Large-Object is a reserved header. '
                    'To create a static large object add query param '
                    'multipart-manifest=put.')
        except HTTPException as err_resp:
            return err_resp(env, start_response)

        return self.app(env, start_response)
Esempio n. 12
0
class StaticLargeObject(object):
    """
    StaticLargeObject Middleware

    See above for a full description.

    The proxy logs created for any subrequests made will have swift.source set
    to "SLO".

    :param app: The next WSGI filter or app in the paste.deploy chain.
    :param conf: The configuration dict for the middleware.
    """

    def __init__(self, app, conf):
        self.conf = conf
        self.app = app
        self.logger = get_logger(conf, log_route='slo')
        self.max_manifest_segments = int(self.conf.get('max_manifest_segments',
                                         1000))
        self.max_manifest_size = int(self.conf.get('max_manifest_size',
                                     1024 * 1024 * 2))
        self.min_segment_size = int(self.conf.get('min_segment_size',
                                    1024 * 1024))
        self.bulk_deleter = Bulk(app, {})

    def handle_multipart_put(self, req, start_response):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input. Uses WSGIContext to
        call self.app and start_response and returns a WSGI iterator.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app(req.environ, start_response)
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge(
                "Manifest File > %d bytes" % self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be Copy requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_input(req.body_file.read(self.max_manifest_size))
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number segments must be <= %d' % self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        slo_etag = md5()
        for index, seg_dict in enumerate(parsed_data):
            obj_name = seg_dict['path']
            if isinstance(obj_name, unicode):
                obj_name = obj_name.encode('utf-8')
            obj_path = '/'.join(['', vrs, account, obj_name.lstrip('/')])
            try:
                seg_size = int(seg_dict['size_bytes'])
            except (ValueError, TypeError):
                raise HTTPBadRequest('Invalid Manifest File')
            if seg_size < self.min_segment_size and \
                    (index == 0 or index < len(parsed_data) - 1):
                raise HTTPBadRequest(
                    'Each segment, except the last, must be larger than '
                    '%d bytes.' % self.min_segment_size)

            new_env = req.environ.copy()
            new_env['PATH_INFO'] = obj_path
            new_env['REQUEST_METHOD'] = 'HEAD'
            new_env['swift.source'] = 'SLO'
            del(new_env['wsgi.input'])
            del(new_env['QUERY_STRING'])
            new_env['CONTENT_LENGTH'] = 0
            new_env['HTTP_USER_AGENT'] = \
                '%s MultipartPUT' % req.environ.get('HTTP_USER_AGENT')
            head_seg_resp = \
                Request.blank(obj_path, new_env).get_response(self.app)
            if head_seg_resp.is_success:
                total_size += seg_size
                if seg_size != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_name), 'Size Mismatch'])
                if seg_dict['etag'] == head_seg_resp.etag:
                    slo_etag.update(seg_dict['etag'])
                else:
                    problem_segments.append([quote(obj_name), 'Etag Mismatch'])
                if head_seg_resp.last_modified:
                    last_modified = head_seg_resp.last_modified
                else:
                    # shouldn't happen
                    last_modified = datetime.now()

                last_modified_formatted = \
                    last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
                seg_data = {'name': '/' + seg_dict['path'].lstrip('/'),
                            'bytes': seg_size,
                            'hash': seg_dict['etag'],
                            'content_type': head_seg_resp.content_type,
                            'last_modified': last_modified_formatted}
                if config_true_value(
                        head_seg_resp.headers.get('X-Static-Large-Object')):
                    seg_data['sub_slo'] = True
                data_for_storage.append(seg_data)

            else:
                problem_segments.append([quote(obj_name),
                                         head_seg_resp.status])
        if problem_segments:
            resp_body = get_response_body(
                out_content_type, {}, problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)
        env = req.environ

        if not env.get('CONTENT_TYPE'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream'
        env['swift.content_type_overriden'] = True
        env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size
        env['HTTP_X_STATIC_LARGE_OBJECT'] = 'True'
        json_data = json.dumps(data_for_storage)
        env['CONTENT_LENGTH'] = str(len(json_data))
        env['wsgi.input'] = StringIO(json_data)

        slo_context = SloContext(self, slo_etag)
        return slo_context.handle_slo_put(req, start_response)

    def get_segments_to_delete_iter(self, req):
        """
        A generator function to be used to delete all the segments and
        sub-segments referenced in a manifest.

        :params req: a swob.Request with an SLO manifest in path
        :raises HTTPPreconditionFailed: on invalid UTF8 in request path
        :raises HTTPBadRequest: on too many buffered sub segments and
                                on invalid SLO manifest path
        """
        if not check_utf8(req.path_info):
            raise HTTPPreconditionFailed(
                request=req, body='Invalid UTF8 or contains NULL')
        try:
            vrs, account, container, obj = req.split_path(4, 4, True)
        except ValueError:
            raise HTTPBadRequest('Invalid SLO manifiest path')

        segments = [{
            'sub_slo': True,
            'name': ('/%s/%s' % (container, obj)).decode('utf-8')}]
        while segments:
            if len(segments) > MAX_BUFFERED_SLO_SEGMENTS:
                raise HTTPBadRequest(
                    'Too many buffered slo segments to delete.')
            seg_data = segments.pop(0)
            if seg_data.get('sub_slo'):
                try:
                    segments.extend(
                        self.get_slo_segments(seg_data['name'], req))
                except HTTPException as err:
                    # allow bulk delete response to report errors
                    seg_data['error'] = {'code': err.status_int,
                                         'message': err.body}

                # add manifest back to be deleted after segments
                seg_data['sub_slo'] = False
                segments.append(seg_data)
            else:
                seg_data['name'] = seg_data['name'].encode('utf-8')
                yield seg_data

    def get_slo_segments(self, obj_name, req):
        """
        Performs a swob.Request and returns the SLO manifest's segments.

        :raises HTTPServerError: on unable to load obj_name or
                                 on unable to load the SLO manifest data.
        :raises HTTPBadRequest: on not an SLO manifest
        :raises HTTPNotFound: on SLO manifest not found
        :returns: SLO manifest's segments
        """
        vrs, account, _junk = req.split_path(2, 3, True)
        new_env = req.environ.copy()
        new_env['REQUEST_METHOD'] = 'GET'
        del(new_env['wsgi.input'])
        new_env['QUERY_STRING'] = 'multipart-manifest=get'
        new_env['CONTENT_LENGTH'] = 0
        new_env['HTTP_USER_AGENT'] = \
            '%s MultipartDELETE' % new_env.get('HTTP_USER_AGENT')
        new_env['swift.source'] = 'SLO'
        new_env['PATH_INFO'] = (
            '/%s/%s/%s' % (
            vrs, account,
            obj_name.lstrip('/'))).encode('utf-8')
        resp = Request.blank('', new_env).get_response(self.app)

        if resp.is_success:
            if config_true_value(resp.headers.get('X-Static-Large-Object')):
                try:
                    return json.loads(resp.body)
                except ValueError:
                    raise HTTPServerError('Unable to load SLO manifest')
            else:
                raise HTTPBadRequest('Not an SLO manifest')
        elif resp.status_int == HTTP_NOT_FOUND:
            raise HTTPNotFound('SLO manifest not found')
        elif resp.status_int == HTTP_UNAUTHORIZED:
            raise HTTPUnauthorized('401 Unauthorized')
        else:
            raise HTTPServerError('Unable to load SLO manifest or segment.')

    def handle_multipart_delete(self, req):
        """
        Will delete all the segments in the SLO manifest and then, if
        successful, will delete the manifest file.

        :params req: a swob.Request with an obj in path
        :returns: swob.Response whose app_iter set to Bulk.handle_delete_iter
        """
        resp = HTTPOk(request=req)
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if out_content_type:
            resp.content_type = out_content_type
        resp.app_iter = self.bulk_deleter.handle_delete_iter(
            req, objs_to_delete=self.get_segments_to_delete_iter(req),
            user_agent='MultipartDELETE', swift_source='SLO',
            out_content_type=out_content_type)
        return resp

    def __call__(self, env, start_response):
        """
        WSGI entry point
        """
        req = Request(env)
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app(env, start_response)
        try:
            if obj:
                if req.method == 'PUT' and \
                        req.params.get('multipart-manifest') == 'put':
                    return self.handle_multipart_put(req, start_response)
                if req.method == 'DELETE' and \
                        req.params.get('multipart-manifest') == 'delete':
                    return self.handle_multipart_delete(req)(env,
                                                             start_response)
                if 'X-Static-Large-Object' in req.headers:
                    raise HTTPBadRequest(
                        request=req,
                        body='X-Static-Large-Object is a reserved header. '
                        'To create a static large object add query param '
                        'multipart-manifest=put.')
        except HTTPException as err_resp:
            return err_resp(env, start_response)

        return self.app(env, start_response)
Esempio n. 13
0
class StaticLargeObject(object):
    

    def __init__(self, app, conf):
        self.conf = conf
        self.app = app
        self.logger = get_logger(conf, log_route='slo')
        
        self.max_manifest_segments = int(self.conf.get('max_manifest_segments',
                                         1000))
        
        self.max_manifest_size = int(self.conf.get('max_manifest_size',
                                     1024 * 1024 * 2))
        self.min_segment_size = int(self.conf.get('min_segment_size',
                                    1024 * 1024))
        self.bulk_deleter = Bulk(
            app, {'max_deletes_per_request': self.max_manifest_segments})

    def handle_multipart_put(self, req):
                
        try:
            vrs, account, container, obj = split_path(req.path,1, 4, True)
        except ValueError:
            return self.app
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge(
                "Manifest File > %d bytes" % self.max_manifest_size)
            
        if req.headers.get('X-Copy-From') or req.headers.get('Destination'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be Copy requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_input(req.environ['wsgi.input'].read(self.max_manifest_size))
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number segments must be <= %d' % self.max_manifest_segments)
            
        total_size = 0
        out_content_type = 'application/json'
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        for index, seg_dict in enumerate(parsed_data):
            obj_path = '/'.join(
                ['', vrs, account, seg_dict['path'].lstrip('/')])
            try:
                seg_size = int(seg_dict['size_bytes'])
            except (ValueError, TypeError):
                raise HTTPBadRequest('Invalid Manifest File')
            
            new_env = req.environ.copy()
            if isinstance(obj_path, unicode):
                obj_path = obj_path.encode('utf-8')
            new_env['PATH_INFO'] = obj_path
            new_env['REQUEST_METHOD'] = 'HEAD'
            new_env['swift.source'] = 'SLO'
            del(new_env['wsgi.input'])
            del(new_env['QUERY_STRING'])
            new_env['CONTENT_LENGTH'] = 0
            new_env['HTTP_USER_AGENT'] = \
                '%s MultipartPUT' % req.environ.get('HTTP_USER_AGENT')
            head_seg_resp = \
                Request.blank(obj_path, new_env).get_response(self.app)
                
            if head_seg_resp.status_int // 100 == 2:
                total_size += seg_size
                if seg_size != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_path), 'Size Mismatch'])
                if seg_dict['etag'] != head_seg_resp.etag:
                    problem_segments.append([quote(obj_path), 'Etag Mismatch'])
                                
                data_for_storage.append(
                    {'name': '/' + seg_dict['path'].lstrip('/'),
                     'bytes': seg_size,
                     'hash': seg_dict['etag']})

            else:
                problem_segments.append([quote(obj_path),
                                         head_seg_resp.status])
        if problem_segments:
            resp_body = get_response_body(
                out_content_type, {}, problem_segments)
            raise jresponse('-1','badrequest',req,400,param=resp_body)
        env = req.environ

        
        env['swift.content_type_overriden'] = True
        
        env['HTTP_X_STATIC_LARGE_OBJECT'] = 'True'
        json_data = json.dumps(data_for_storage)
        env['CONTENT_LENGTH'] = str(len(json_data))
        env['wsgi.input'] = StringIO(json_data)
        return self.app

    def handle_multipart_delete(self, req):
        
        new_env = req.environ.copy()
        new_env['REQUEST_METHOD'] = 'GET'
        del(new_env['wsgi.input'])
        new_env['QUERY_STRING'] = 'multipart-manifest=get'
        new_env['CONTENT_LENGTH'] = 0
        new_env['HTTP_USER_AGENT'] = \
            '%s MultipartDELETE' % req.environ.get('HTTP_USER_AGENT')
        new_env['swift.source'] = 'SLO'
        get_man_resp = \
            Request.blank('', new_env).get_response(self.app)
            
        if get_man_resp.status_int // 100 == 2:
            if not config_true_value(
                    get_man_resp.headers.get('X-Static-Large-Object')):
                raise HTTPBadRequest('Not an SLO manifest')
            try:
                manifest = json.loads(get_man_resp.body)
            except ValueError:
                raise HTTPServerError('Invalid manifest file')
            delete_resp = self.bulk_deleter.handle_delete(
                req,
                objs_to_delete=[o['name'].encode('utf-8') for o in manifest],
                user_agent='MultipartDELETE', swift_source='SLO')
            if delete_resp.status_int // 100 == 2:
                # delete the manifest file itself
                return self.app
            else:
                return delete_resp
        return get_man_resp

    @wsgify
    def __call__(self, req):
        """
        WSGI entry point
        """
        try:
            vrs, account, container, obj = split_path(req.path,1, 4, True)
        except ValueError:
            return self.app
        if obj:
            if req.method == 'PUT' and \
                    req.GET.get('multipart-manifest') == 'put':
                return self.handle_multipart_put(req)
            if req.method == 'DELETE' and \
                    req.GET.get('multipart-manifest') == 'delete':
                return self.handle_multipart_delete(req)
            if 'X-Static-Large-Object' in req.headers:
                raise HTTPBadRequest(
                    request=req,
                    body='X-Static-Large-Object is a reserved header. '
                    'To create a static large object add query param '
                    'multipart-manifest=put.')

        return self.app
Esempio n. 14
0
File: slo.py Progetto: pchng/swift
class StaticLargeObject(object):
    """
    StaticLargeObject Middleware

    See above for a full description.

    The proxy logs created for any subrequests made will have swift.source set
    to "SLO".

    :param app: The next WSGI filter or app in the paste.deploy chain.
    :param conf: The configuration dict for the middleware.
    """

    def __init__(
        self,
        app,
        conf,
        min_segment_size=DEFAULT_MIN_SEGMENT_SIZE,
        max_manifest_segments=DEFAULT_MAX_MANIFEST_SEGMENTS,
        max_manifest_size=DEFAULT_MAX_MANIFEST_SIZE,
    ):
        self.conf = conf
        self.app = app
        self.logger = get_logger(conf, log_route="slo")
        self.max_manifest_segments = max_manifest_segments
        self.max_manifest_size = max_manifest_size
        self.min_segment_size = min_segment_size
        self.max_get_time = int(self.conf.get("max_get_time", 86400))
        self.rate_limit_after_segment = int(self.conf.get("rate_limit_after_segment", "10"))
        self.rate_limit_segments_per_sec = int(self.conf.get("rate_limit_segments_per_sec", "0"))
        self.bulk_deleter = Bulk(app, {}, logger=self.logger)

    def handle_multipart_get_or_head(self, req, start_response):
        """
        Handles the GET or HEAD of a SLO manifest.

        The response body (only on GET, of course) will consist of the
        concatenation of the segments.

        :params req: a swob.Request with a path referencing an object
        :raises: HttpException on errors
        """
        return SloGetContext(self).handle_slo_get_or_head(req, start_response)

    def copy_hook(self, inner_hook):
        def slo_hook(source_req, source_resp, sink_req):
            x_slo = source_resp.headers.get("X-Static-Large-Object")
            if (
                config_true_value(x_slo)
                and source_req.params.get("multipart-manifest") != "get"
                and "swift.post_as_copy" not in source_req.environ
            ):
                source_resp = SloGetContext(self).get_or_head_response(
                    source_req, source_resp.headers.items(), source_resp.app_iter
                )
            return inner_hook(source_req, source_resp, sink_req)

        return slo_hook

    def handle_multipart_put(self, req, start_response):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input. Uses WSGIContext to
        call self and start_response and returns a WSGI iterator.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app(req.environ, start_response)
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge("Manifest File > %d bytes" % self.max_manifest_size)
        if req.headers.get("X-Copy-From"):
            raise HTTPMethodNotAllowed("Multipart Manifest PUTs cannot be COPY requests")
        if req.content_length is None and req.headers.get("transfer-encoding", "").lower() != "chunked":
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_input(req.body_file.read(self.max_manifest_size))
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge("Number of segments must be <= %d" % self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = "text/plain"
        data_for_storage = []
        slo_etag = md5()
        last_obj_path = None
        for index, seg_dict in enumerate(parsed_data):
            obj_name = seg_dict["path"]
            if isinstance(obj_name, unicode):
                obj_name = obj_name.encode("utf-8")
            obj_path = "/".join(["", vrs, account, obj_name.lstrip("/")])
            if req.path == quote(obj_path):
                raise HTTPConflict('Manifest object name "%s" ' "cannot be included in the manifest" % obj_name)
            try:
                seg_size = int(seg_dict["size_bytes"])
            except (ValueError, TypeError):
                if seg_dict["size_bytes"] is None:
                    seg_size = None
                else:
                    raise HTTPBadRequest("Invalid Manifest File")
            if seg_size is not None and seg_size < self.min_segment_size and index < len(parsed_data) - 1:
                raise HTTPBadRequest(
                    "Each segment, except the last, must be at least " "%d bytes." % self.min_segment_size
                )

            new_env = req.environ.copy()
            new_env["PATH_INFO"] = obj_path
            new_env["REQUEST_METHOD"] = "HEAD"
            new_env["swift.source"] = "SLO"
            del (new_env["wsgi.input"])
            del (new_env["QUERY_STRING"])
            new_env["CONTENT_LENGTH"] = 0
            new_env["HTTP_USER_AGENT"] = "%s MultipartPUT" % req.environ.get("HTTP_USER_AGENT")
            if obj_path != last_obj_path:
                last_obj_path = obj_path
                head_seg_resp = Request.blank(obj_path, new_env).get_response(self)

            if head_seg_resp.is_success:
                segment_length = head_seg_resp.content_length
                if seg_dict.get("range"):
                    # Since we now know the length, we can normalize the ranges
                    ranges = seg_dict["range"].ranges_for_length(head_seg_resp.content_length)

                    if not ranges:
                        problem_segments.append([quote(obj_name), "Unsatisfiable Range"])
                    elif len(ranges) > 1:
                        problem_segments.append([quote(obj_name), "Multiple Ranges"])
                    elif ranges == [(0, head_seg_resp.content_length)]:
                        # Just one range, and it exactly matches the object.
                        # Why'd we do this again?
                        seg_dict["range"] = None
                        segment_length = head_seg_resp.content_length
                    else:
                        range = ranges[0]
                        seg_dict["range"] = "%d-%d" % (range[0], range[1] - 1)
                        segment_length = range[1] - range[0]

                if segment_length < self.min_segment_size and index < len(parsed_data) - 1:
                    raise HTTPBadRequest(
                        "Each segment, except the last, must be at least " "%d bytes." % self.min_segment_size
                    )
                total_size += segment_length
                if seg_size is not None and seg_size != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_name), "Size Mismatch"])
                if seg_dict["etag"] is None or seg_dict["etag"] == head_seg_resp.etag:
                    if seg_dict.get("range"):
                        slo_etag.update("%s:%s;" % (head_seg_resp.etag, seg_dict["range"]))
                    else:
                        slo_etag.update(head_seg_resp.etag)
                else:
                    problem_segments.append([quote(obj_name), "Etag Mismatch"])
                if head_seg_resp.last_modified:
                    last_modified = head_seg_resp.last_modified
                else:
                    # shouldn't happen
                    last_modified = datetime.now()

                last_modified_formatted = last_modified.strftime("%Y-%m-%dT%H:%M:%S.%f")
                seg_data = {
                    "name": "/" + seg_dict["path"].lstrip("/"),
                    "bytes": head_seg_resp.content_length,
                    "hash": head_seg_resp.etag,
                    "content_type": head_seg_resp.content_type,
                    "last_modified": last_modified_formatted,
                }
                if seg_dict.get("range"):
                    seg_data["range"] = seg_dict["range"]

                if config_true_value(head_seg_resp.headers.get("X-Static-Large-Object")):
                    seg_data["sub_slo"] = True
                data_for_storage.append(seg_data)

            else:
                problem_segments.append([quote(obj_name), head_seg_resp.status])
        if problem_segments:
            resp_body = get_response_body(out_content_type, {}, problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)
        env = req.environ

        if not env.get("CONTENT_TYPE"):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env["CONTENT_TYPE"] = guessed_type or "application/octet-stream"
        env["swift.content_type_overridden"] = True
        env["CONTENT_TYPE"] += ";swift_bytes=%d" % total_size
        env["HTTP_X_STATIC_LARGE_OBJECT"] = "True"
        json_data = json.dumps(data_for_storage)
        if six.PY3:
            json_data = json_data.encode("utf-8")
        env["CONTENT_LENGTH"] = str(len(json_data))
        env["wsgi.input"] = BytesIO(json_data)

        slo_put_context = SloPutContext(self, slo_etag)
        return slo_put_context.handle_slo_put(req, start_response)

    def get_segments_to_delete_iter(self, req):
        """
        A generator function to be used to delete all the segments and
        sub-segments referenced in a manifest.

        :params req: a swob.Request with an SLO manifest in path
        :raises HTTPPreconditionFailed: on invalid UTF8 in request path
        :raises HTTPBadRequest: on too many buffered sub segments and
                                on invalid SLO manifest path
        """
        if not check_utf8(req.path_info):
            raise HTTPPreconditionFailed(request=req, body="Invalid UTF8 or contains NULL")
        vrs, account, container, obj = req.split_path(4, 4, True)

        segments = [{"sub_slo": True, "name": ("/%s/%s" % (container, obj)).decode("utf-8")}]
        while segments:
            if len(segments) > MAX_BUFFERED_SLO_SEGMENTS:
                raise HTTPBadRequest("Too many buffered slo segments to delete.")
            seg_data = segments.pop(0)
            if seg_data.get("sub_slo"):
                try:
                    segments.extend(self.get_slo_segments(seg_data["name"], req))
                except HTTPException as err:
                    # allow bulk delete response to report errors
                    seg_data["error"] = {"code": err.status_int, "message": err.body}

                # add manifest back to be deleted after segments
                seg_data["sub_slo"] = False
                segments.append(seg_data)
            else:
                seg_data["name"] = seg_data["name"].encode("utf-8")
                yield seg_data

    def get_slo_segments(self, obj_name, req):
        """
        Performs a swob.Request and returns the SLO manifest's segments.

        :raises HTTPServerError: on unable to load obj_name or
                                 on unable to load the SLO manifest data.
        :raises HTTPBadRequest: on not an SLO manifest
        :raises HTTPNotFound: on SLO manifest not found
        :returns: SLO manifest's segments
        """
        vrs, account, _junk = req.split_path(2, 3, True)
        new_env = req.environ.copy()
        new_env["REQUEST_METHOD"] = "GET"
        del (new_env["wsgi.input"])
        new_env["QUERY_STRING"] = "multipart-manifest=get"
        new_env["CONTENT_LENGTH"] = 0
        new_env["HTTP_USER_AGENT"] = "%s MultipartDELETE" % new_env.get("HTTP_USER_AGENT")
        new_env["swift.source"] = "SLO"
        new_env["PATH_INFO"] = ("/%s/%s/%s" % (vrs, account, obj_name.lstrip("/"))).encode("utf-8")
        resp = Request.blank("", new_env).get_response(self.app)

        if resp.is_success:
            if config_true_value(resp.headers.get("X-Static-Large-Object")):
                try:
                    return json.loads(resp.body)
                except ValueError:
                    raise HTTPServerError("Unable to load SLO manifest")
            else:
                raise HTTPBadRequest("Not an SLO manifest")
        elif resp.status_int == HTTP_NOT_FOUND:
            raise HTTPNotFound("SLO manifest not found")
        elif resp.status_int == HTTP_UNAUTHORIZED:
            raise HTTPUnauthorized("401 Unauthorized")
        else:
            raise HTTPServerError("Unable to load SLO manifest or segment.")

    def handle_multipart_delete(self, req):
        """
        Will delete all the segments in the SLO manifest and then, if
        successful, will delete the manifest file.

        :params req: a swob.Request with an obj in path
        :returns: swob.Response whose app_iter set to Bulk.handle_delete_iter
        """
        resp = HTTPOk(request=req)
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if out_content_type:
            resp.content_type = out_content_type
        resp.app_iter = self.bulk_deleter.handle_delete_iter(
            req,
            objs_to_delete=self.get_segments_to_delete_iter(req),
            user_agent="MultipartDELETE",
            swift_source="SLO",
            out_content_type=out_content_type,
        )
        return resp

    def __call__(self, env, start_response):
        """
        WSGI entry point
        """
        req = Request(env)
        try:
            vrs, account, container, obj = req.split_path(4, 4, True)
        except ValueError:
            return self.app(env, start_response)

        # install our COPY-callback hook
        env["swift.copy_hook"] = self.copy_hook(
            env.get("swift.copy_hook", lambda src_req, src_resp, sink_req: src_resp)
        )

        try:
            if req.method == "PUT" and req.params.get("multipart-manifest") == "put":
                return self.handle_multipart_put(req, start_response)
            if req.method == "DELETE" and req.params.get("multipart-manifest") == "delete":
                return self.handle_multipart_delete(req)(env, start_response)
            if req.method == "GET" or req.method == "HEAD":
                return self.handle_multipart_get_or_head(req, start_response)
            if "X-Static-Large-Object" in req.headers:
                raise HTTPBadRequest(
                    request=req,
                    body="X-Static-Large-Object is a reserved header. "
                    "To create a static large object add query param "
                    "multipart-manifest=put.",
                )
        except HTTPException as err_resp:
            return err_resp(env, start_response)

        return self.app(env, start_response)
Esempio n. 15
0
class StaticLargeObject(object):
    """
    StaticLargeObject Middleware

    See above for a full description.

    The proxy logs created for any subrequests made will have swift.source set
    to "SLO".

    :param app: The next WSGI filter or app in the paste.deploy chain.
    :param conf: The configuration dict for the middleware.
    """

    def __init__(self, app, conf):
        self.conf = conf
        self.app = app
        self.logger = get_logger(conf, log_route="slo")
        self.max_manifest_segments = int(self.conf.get("max_manifest_segments", 1000))
        self.max_manifest_size = int(self.conf.get("max_manifest_size", 1024 * 1024 * 2))
        self.min_segment_size = int(self.conf.get("min_segment_size", 1024 * 1024))
        self.bulk_deleter = Bulk(app, {"max_deletes_per_request": self.max_manifest_segments})

    def handle_multipart_put(self, req):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge("Manifest File > %d bytes" % self.max_manifest_size)
        if req.headers.get("X-Copy-From"):
            raise HTTPMethodNotAllowed("Multipart Manifest PUTs cannot be Copy requests")
        if req.content_length is None and req.headers.get("transfer-encoding", "").lower() != "chunked":
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_input(req.body_file.read(self.max_manifest_size))
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge("Number segments must be <= %d" % self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = "text/plain"
        data_for_storage = []
        for index, seg_dict in enumerate(parsed_data):
            obj_path = "/".join(["", vrs, account, seg_dict["path"].lstrip("/")])
            try:
                seg_size = int(seg_dict["size_bytes"])
            except (ValueError, TypeError):
                raise HTTPBadRequest("Invalid Manifest File")
            if seg_size < self.min_segment_size and (index == 0 or index < len(parsed_data) - 1):
                raise HTTPBadRequest(
                    "Each segment, except the last, must be larger than " "%d bytes." % self.min_segment_size
                )

            new_env = req.environ.copy()
            if isinstance(obj_path, unicode):
                obj_path = obj_path.encode("utf-8")
            new_env["PATH_INFO"] = obj_path
            new_env["REQUEST_METHOD"] = "HEAD"
            new_env["swift.source"] = "SLO"
            del (new_env["wsgi.input"])
            del (new_env["QUERY_STRING"])
            new_env["CONTENT_LENGTH"] = 0
            new_env["HTTP_USER_AGENT"] = "%s MultipartPUT" % req.environ.get("HTTP_USER_AGENT")
            head_seg_resp = Request.blank(obj_path, new_env).get_response(self.app)
            if head_seg_resp.is_success:
                total_size += seg_size
                if seg_size != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_path), "Size Mismatch"])
                if seg_dict["etag"] != head_seg_resp.etag:
                    problem_segments.append([quote(obj_path), "Etag Mismatch"])
                if head_seg_resp.last_modified:
                    last_modified = head_seg_resp.last_modified
                else:
                    # shouldn't happen
                    last_modified = datetime.now()

                last_modified_formatted = last_modified.strftime("%Y-%m-%dT%H:%M:%S.%f")
                seg_data = {
                    "name": "/" + seg_dict["path"].lstrip("/"),
                    "bytes": seg_size,
                    "hash": seg_dict["etag"],
                    "content_type": head_seg_resp.content_type,
                    "last_modified": last_modified_formatted,
                }
                if config_true_value(head_seg_resp.headers.get("X-Static-Large-Object")):
                    seg_data["sub_slo"] = True
                data_for_storage.append(seg_data)

            else:
                problem_segments.append([quote(obj_path), head_seg_resp.status])
        if problem_segments:
            resp_body = get_response_body(out_content_type, {}, problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)
        env = req.environ

        if not env.get("CONTENT_TYPE"):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env["CONTENT_TYPE"] = guessed_type or "application/octet-stream"
        env["swift.content_type_overriden"] = True
        env["CONTENT_TYPE"] += ";swift_bytes=%d" % total_size
        env["HTTP_X_STATIC_LARGE_OBJECT"] = "True"
        json_data = json.dumps(data_for_storage)
        env["CONTENT_LENGTH"] = str(len(json_data))
        env["wsgi.input"] = StringIO(json_data)
        return self.app

    def get_segments_to_delete_iter(self, req):
        """
        A generator function to be used to delete all the segments and
        sub-segments referenced in a manifest.

        :raises HTTPBadRequest: on sub manifest not manifest anymore or
                                on too many buffered sub segments
        :raises HTTPServerError: on unable to load manifest
        """
        try:
            vrs, account, container, obj = req.split_path(4, 4, True)
        except ValueError:
            raise HTTPBadRequest("Not a SLO manifest")
        sub_segments = [{"sub_slo": True, "name": ("/%s/%s" % (container, obj)).decode("utf-8")}]
        while sub_segments:
            if len(sub_segments) > MAX_BUFFERED_SLO_SEGMENTS:
                raise HTTPBadRequest("Too many buffered slo segments to delete.")
            if sub_segments:
                seg_data = sub_segments.pop(0)
            if seg_data.get("sub_slo"):
                new_env = req.environ.copy()
                new_env["REQUEST_METHOD"] = "GET"
                del (new_env["wsgi.input"])
                new_env["QUERY_STRING"] = "multipart-manifest=get"
                new_env["CONTENT_LENGTH"] = 0
                new_env["HTTP_USER_AGENT"] = "%s MultipartDELETE" % new_env.get("HTTP_USER_AGENT")
                new_env["swift.source"] = "SLO"
                new_env["PATH_INFO"] = ("/%s/%s/%s" % (vrs, account, seg_data["name"].lstrip("/"))).encode("utf-8")
                sub_resp = Request.blank("", new_env).get_response(self.app)
                if sub_resp.is_success:
                    try:
                        # if its still a SLO, load its segments
                        if config_true_value(sub_resp.headers.get("X-Static-Large-Object")):
                            sub_segments.extend(json.loads(sub_resp.body))
                    except ValueError:
                        raise HTTPServerError("Unable to load SLO manifest")
                    # add sub-manifest back to be deleted after sub segments
                    # (even if obj is not a SLO)
                    seg_data["sub_slo"] = False
                    sub_segments.append(seg_data)
                elif sub_resp.status_int != HTTP_NOT_FOUND:
                    # on deletes treat not found as success
                    raise HTTPServerError("Sub SLO unable to load.")
            else:
                yield seg_data["name"].encode("utf-8")

    def handle_multipart_delete(self, req):
        """
        Will delete all the segments in the SLO manifest and then, if
        successful, will delete the manifest file.
        :params req: a swob.Request with an obj in path
        :raises HTTPServerError: on invalid manifest
        :returns: swob.Response whose app_iter set to Bulk.handle_delete_iter
        """
        if not check_utf8(req.path_info):
            raise HTTPPreconditionFailed(request=req, body="Invalid UTF8 or contains NULL")

        resp = HTTPOk(request=req)
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if out_content_type:
            resp.content_type = out_content_type
        resp.app_iter = self.bulk_deleter.handle_delete_iter(
            req,
            objs_to_delete=self.get_segments_to_delete_iter(req),
            user_agent="MultipartDELETE",
            swift_source="SLO",
            out_content_type=out_content_type,
        )
        return resp

    @wsgify
    def __call__(self, req):
        """
        WSGI entry point
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app
        if obj:
            if req.method == "PUT" and req.params.get("multipart-manifest") == "put":
                return self.handle_multipart_put(req)
            if req.method == "DELETE" and req.params.get("multipart-manifest") == "delete":
                return self.handle_multipart_delete(req)
            if "X-Static-Large-Object" in req.headers:
                raise HTTPBadRequest(
                    request=req,
                    body="X-Static-Large-Object is a reserved header. "
                    "To create a static large object add query param "
                    "multipart-manifest=put.",
                )

        return self.app