Beispiel #1
0
    def POST(self, req):
        """
        Handles Delete Multiple Objects.
        """
        def object_key_iter(elem):
            for obj in elem.iterchildren('Object'):
                key = obj.find('./Key').text
                if not key:
                    raise UserKeyMustBeSpecified()
                version = obj.find('./VersionId')
                if version is not None:
                    version = version.text

                yield key, version

        max_body_size = min(
            # FWIW, AWS limits multideletes to 1000 keys, and swift limits
            # object names to 1024 bytes (by default). Add a factor of two to
            # allow some slop.
            2 * self.conf.max_multi_delete_objects * MAX_OBJECT_NAME_LENGTH,
            # But, don't let operators shoot themselves in the foot
            10 * 1024 * 1024)

        try:
            xml = req.xml(max_body_size)
            if not xml:
                raise MissingRequestBodyError()

            req.check_md5(xml)
            elem = fromstring(xml, 'Delete', self.logger)

            quiet = elem.find('./Quiet')
            if quiet is not None and quiet.text.lower() == 'true':
                self.quiet = True
            else:
                self.quiet = False

            delete_list = list(object_key_iter(elem))
            if len(delete_list) > self.conf.max_multi_delete_objects:
                raise MalformedXML()
        except (XMLSyntaxError, DocumentInvalid):
            raise MalformedXML()
        except ErrorResponse:
            raise
        except Exception as e:
            self.logger.error(e)
            raise

        elem = Element('DeleteResult')

        # check bucket existence
        try:
            req.get_response(self.app, 'HEAD')
        except AccessDenied as error:
            body = self._gen_error_body(error, elem, delete_list)
            return HTTPOk(body=body)

        if any(version is not None for _key, version in delete_list):
            # TODO: support deleting specific versions of objects
            raise S3NotImplemented()

        def do_delete(base_req, key, version):
            req = copy.copy(base_req)
            req.environ = copy.copy(base_req.environ)
            req.object_name = key

            try:
                query = req.gen_multipart_manifest_delete_query(self.app)
                resp = req.get_response(self.app,
                                        method='DELETE',
                                        query=query,
                                        headers={'Accept': 'application/json'})
                # Have to read the response to actually do the SLO delete
                if query:
                    try:
                        delete_result = json.loads(resp.body)
                        if delete_result['Errors']:
                            # NB: bulk includes 404s in "Number Not Found",
                            # not "Errors"
                            msg_parts = [delete_result['Response Status']]
                            msg_parts.extend(
                                '%s: %s' % (obj, status)
                                for obj, status in delete_result['Errors'])
                            return key, {
                                'code': 'SLODeleteError',
                                'message': '\n'.join(msg_parts)
                            }
                        # else, all good
                    except (ValueError, TypeError, KeyError):
                        # Logs get all the gory details
                        self.logger.exception(
                            'Could not parse SLO delete response: %r',
                            resp.body)
                        # Client gets something more generic
                        return key, {
                            'code': 'SLODeleteError',
                            'message': 'Unexpected swift response'
                        }
            except NoSuchKey:
                pass
            except ErrorResponse as e:
                return key, {'code': e.__class__.__name__, 'message': e._msg}
            return key, None

        with StreamingPile(self.conf.multi_delete_concurrency) as pile:
            for key, err in pile.asyncstarmap(
                    do_delete,
                ((req, key, version) for key, version in delete_list)):
                if err:
                    error = SubElement(elem, 'Error')
                    SubElement(error, 'Key').text = key
                    SubElement(error, 'Code').text = err['code']
                    SubElement(error, 'Message').text = err['message']
                elif not self.quiet:
                    deleted = SubElement(elem, 'Deleted')
                    SubElement(deleted, 'Key').text = key

        body = tostring(elem)

        return HTTPOk(body=body)
Beispiel #2
0
    def handle_delete_iter(self,
                           req,
                           objs_to_delete=None,
                           user_agent='BulkDelete',
                           swift_source='BD',
                           out_content_type='text/plain'):
        """
        A generator that can be assigned to a swob Response's app_iter which,
        when iterated over, will delete the objects specified in request body.
        Will occasionally yield whitespace while request is being processed.
        When the request is completed will yield a response body that can be
        parsed to determine success. See above documentation for details.

        :params req: a swob Request
        :params objs_to_delete: a list of dictionaries that specifies the
            (native string) objects to be deleted. If None, uses
            self.get_objs_to_delete to query request.
        """
        last_yield = time()
        if out_content_type and out_content_type.endswith('/xml'):
            to_yield = b'<?xml version="1.0" encoding="UTF-8"?>\n'
        else:
            to_yield = b' '
        separator = b''
        failed_files = []
        resp_dict = {
            'Response Status': HTTPOk().status,
            'Response Body': '',
            'Number Deleted': 0,
            'Number Not Found': 0
        }
        req.environ['eventlet.minimum_write_chunk_size'] = 0
        try:
            if not out_content_type:
                raise HTTPNotAcceptable(request=req)

            try:
                vrs, account, _junk = req.split_path(2, 3, True)
            except ValueError:
                raise HTTPNotFound(request=req)
            vrs = wsgi_to_str(vrs)
            account = wsgi_to_str(account)

            incoming_format = req.headers.get('Content-Type')
            if incoming_format and \
                    not incoming_format.startswith('text/plain'):
                # For now only accept newline separated object names
                raise HTTPNotAcceptable(request=req)

            if objs_to_delete is None:
                objs_to_delete = self.get_objs_to_delete(req)
            failed_file_response = {'type': HTTPBadRequest}

            def delete_filter(predicate, objs_to_delete):
                for obj_to_delete in objs_to_delete:
                    obj_name = obj_to_delete['name']
                    if not obj_name:
                        continue
                    if not predicate(obj_name):
                        continue
                    if obj_to_delete.get('error'):
                        if obj_to_delete['error']['code'] == HTTP_NOT_FOUND:
                            resp_dict['Number Not Found'] += 1
                        else:
                            failed_files.append([
                                wsgi_quote(str_to_wsgi(obj_name)),
                                obj_to_delete['error']['message']
                            ])
                        continue
                    delete_path = '/'.join(
                        ['', vrs, account,
                         obj_name.lstrip('/')])
                    if not constraints.check_utf8(delete_path):
                        failed_files.append([
                            wsgi_quote(str_to_wsgi(obj_name)),
                            HTTPPreconditionFailed().status
                        ])
                        continue
                    yield (obj_name, delete_path,
                           obj_to_delete.get('version_id'))

            def objs_then_containers(objs_to_delete):
                # process all objects first
                yield delete_filter(lambda name: '/' in name.strip('/'),
                                    objs_to_delete)
                # followed by containers
                yield delete_filter(lambda name: '/' not in name.strip('/'),
                                    objs_to_delete)

            def do_delete(obj_name, delete_path, version_id):
                delete_obj_req = make_subrequest(
                    req.environ,
                    method='DELETE',
                    path=wsgi_quote(str_to_wsgi(delete_path)),
                    headers={'X-Auth-Token': req.headers.get('X-Auth-Token')},
                    body='',
                    agent='%(orig)s ' + user_agent,
                    swift_source=swift_source)
                if version_id is None:
                    delete_obj_req.params = {}
                else:
                    delete_obj_req.params = {'version-id': version_id}
                return (delete_obj_req.get_response(self.app), obj_name, 0)

            with StreamingPile(self.delete_concurrency) as pile:
                for names_to_delete in objs_then_containers(objs_to_delete):
                    for resp, obj_name, retry in pile.asyncstarmap(
                            do_delete, names_to_delete):
                        if last_yield + self.yield_frequency < time():
                            last_yield = time()
                            yield to_yield
                            to_yield, separator = b' ', b'\r\n\r\n'
                        self._process_delete(resp, pile, obj_name, resp_dict,
                                             failed_files,
                                             failed_file_response, retry)
                        if len(failed_files) >= self.max_failed_deletes:
                            # Abort, but drain off the in-progress deletes
                            for resp, obj_name, retry in pile:
                                if last_yield + self.yield_frequency < time():
                                    last_yield = time()
                                    yield to_yield
                                    to_yield, separator = b' ', b'\r\n\r\n'
                                # Don't pass in the pile, as we shouldn't retry
                                self._process_delete(resp, None, obj_name,
                                                     resp_dict, failed_files,
                                                     failed_file_response,
                                                     retry)
                            msg = 'Max delete failures exceeded'
                            raise HTTPBadRequest(msg)

            if failed_files:
                resp_dict['Response Status'] = \
                    failed_file_response['type']().status
            elif not (resp_dict['Number Deleted']
                      or resp_dict['Number Not Found']):
                resp_dict['Response Status'] = HTTPBadRequest().status
                resp_dict['Response Body'] = 'Invalid bulk delete.'

        except HTTPException as err:
            resp_dict['Response Status'] = err.status
            resp_dict['Response Body'] = err.body.decode('utf-8')
        except Exception:
            self.logger.exception('Error in bulk delete.')
            resp_dict['Response Status'] = HTTPServerError().status

        yield separator + get_response_body(out_content_type, resp_dict,
                                            failed_files, 'delete')
Beispiel #3
0
    def handle_multipart_put(self, req, start_response):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input. Uses WSGIContext to
        call self and start_response and returns a WSGI iterator.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app(req.environ, start_response)
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge("Manifest File > %d bytes" %
                                            self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be COPY requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_and_validate_input(
            req.body_file.read(self.max_manifest_size), req.path)
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number of segments must be <= %d' %
                self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        path2indices = defaultdict(list)
        for index, seg_dict in enumerate(parsed_data):
            path2indices[seg_dict['path']].append(index)

        def do_head(obj_name):
            obj_path = '/'.join(
                ['', vrs, account,
                 get_valid_utf8_str(obj_name).lstrip('/')])

            sub_req = make_subrequest(
                req.environ,
                path=obj_path + '?',  # kill the query string
                method='HEAD',
                headers={'x-auth-token': req.headers.get('x-auth-token')},
                agent='%(orig)s SLO MultipartPUT',
                swift_source='SLO')
            return obj_name, sub_req.get_response(self)

        def validate_seg_dict(seg_dict, head_seg_resp):
            if not head_seg_resp.is_success:
                problem_segments.append(
                    [quote(obj_name), head_seg_resp.status])
                return 0, None

            segment_length = head_seg_resp.content_length
            if seg_dict.get('range'):
                # Since we now know the length, we can normalize the
                # range. We know that there is exactly one range
                # requested since we checked that earlier in
                # parse_and_validate_input().
                ranges = seg_dict['range'].ranges_for_length(
                    head_seg_resp.content_length)

                if not ranges:
                    problem_segments.append(
                        [quote(obj_name), 'Unsatisfiable Range'])
                elif ranges == [(0, head_seg_resp.content_length)]:
                    # Just one range, and it exactly matches the object.
                    # Why'd we do this again?
                    del seg_dict['range']
                    segment_length = head_seg_resp.content_length
                else:
                    rng = ranges[0]
                    seg_dict['range'] = '%d-%d' % (rng[0], rng[1] - 1)
                    segment_length = rng[1] - rng[0]

            if segment_length < 1:
                problem_segments.append([
                    quote(obj_name),
                    'Too small; each segment must be at least 1 byte.'
                ])
            if seg_dict['size_bytes'] is not None and \
                    seg_dict['size_bytes'] != head_seg_resp.content_length:
                problem_segments.append([quote(obj_name), 'Size Mismatch'])
            if seg_dict['etag'] is not None and \
                    seg_dict['etag'] != head_seg_resp.etag:
                problem_segments.append([quote(obj_name), 'Etag Mismatch'])
            if head_seg_resp.last_modified:
                last_modified = head_seg_resp.last_modified
            else:
                # shouldn't happen
                last_modified = datetime.now()

            last_modified_formatted = \
                last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
            seg_data = {
                'name': '/' + seg_dict['path'].lstrip('/'),
                'bytes': head_seg_resp.content_length,
                'hash': head_seg_resp.etag,
                'content_type': head_seg_resp.content_type,
                'last_modified': last_modified_formatted
            }
            if seg_dict.get('range'):
                seg_data['range'] = seg_dict['range']
            if config_true_value(
                    head_seg_resp.headers.get('X-Static-Large-Object')):
                seg_data['sub_slo'] = True
            return segment_length, seg_data

        data_for_storage = [None] * len(parsed_data)
        with StreamingPile(self.concurrency) as pile:
            for obj_name, resp in pile.asyncstarmap(
                    do_head, ((path, ) for path in path2indices)):
                for i in path2indices[obj_name]:
                    segment_length, seg_data = validate_seg_dict(
                        parsed_data[i], resp)
                    data_for_storage[i] = seg_data
                    total_size += segment_length

        if problem_segments:
            resp_body = get_response_body(out_content_type, {},
                                          problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)

        slo_etag = md5()
        for seg_data in data_for_storage:
            if seg_data.get('range'):
                slo_etag.update('%s:%s;' %
                                (seg_data['hash'], seg_data['range']))
            else:
                slo_etag.update(seg_data['hash'])

        slo_etag = slo_etag.hexdigest()
        req.headers.update({
            SYSMETA_SLO_ETAG: slo_etag,
            SYSMETA_SLO_SIZE: total_size,
            'X-Static-Large-Object': 'True',
        })

        json_data = json.dumps(data_for_storage)
        if six.PY3:
            json_data = json_data.encode('utf-8')
        req.body = json_data

        env = req.environ
        if not env.get('CONTENT_TYPE'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream'
        env['swift.content_type_overridden'] = True
        env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size

        def start_response_wrapper(status, headers, exc_info=None):
            for i, (header, _value) in enumerate(headers):
                if header.lower() == 'etag':
                    headers[i] = ('Etag', '"%s"' % slo_etag)
                    break
            return start_response(status, headers, exc_info)

        return self.app(env, start_response_wrapper)
Beispiel #4
0
    def POST(self, req):
        """
        Handles Delete Multiple Objects.
        """
        def object_key_iter(elem):
            for obj in elem.iterchildren('Object'):
                key = obj.find('./Key').text
                if not key:
                    raise UserKeyMustBeSpecified()
                version = obj.find('./VersionId')
                if version is not None:
                    version = version.text

                yield key, version

        max_body_size = min(
            # FWIW, AWS limits multideletes to 1000 keys, and swift limits
            # object names to 1024 bytes (by default). Add a factor of two to
            # allow some slop.
            2 * self.conf.max_multi_delete_objects * MAX_OBJECT_NAME_LENGTH,
            # But, don't let operators shoot themselves in the foot
            10 * 1024 * 1024)

        try:
            xml = req.xml(max_body_size)
            if not xml:
                raise MissingRequestBodyError()

            req.check_md5(xml)
            elem = fromstring(xml, 'Delete', self.logger)

            quiet = elem.find('./Quiet')
            if quiet is not None and quiet.text.lower() == 'true':
                self.quiet = True
            else:
                self.quiet = False

            delete_list = list(object_key_iter(elem))
            if len(delete_list) > self.conf.max_multi_delete_objects:
                raise MalformedXML()
        except (XMLSyntaxError, DocumentInvalid):
            raise MalformedXML()
        except ErrorResponse:
            raise
        except Exception as e:
            self.logger.error(e)
            raise

        elem = Element('DeleteResult')

        # check bucket existence
        try:
            req.get_response(self.app, 'HEAD')
        except AccessDenied as error:
            body = self._gen_error_body(error, elem, delete_list)
            return HTTPOk(body=body)

        if any(version is not None for _key, version in delete_list):
            # TODO: support deleting specific versions of objects
            raise S3NotImplemented()

        def do_delete(base_req, key, version):
            req = copy.copy(base_req)
            req.environ = copy.copy(base_req.environ)
            req.object_name = key

            try:
                query = req.gen_multipart_manifest_delete_query(self.app)
                req.get_response(self.app, method='DELETE', query=query)
            except NoSuchKey:
                pass
            except ErrorResponse as e:
                return key, {'code': e.__class__.__name__, 'message': e._msg}
            return key, None

        with StreamingPile(self.conf.multi_delete_concurrency) as pile:
            for key, err in pile.asyncstarmap(
                    do_delete,
                ((req, key, version) for key, version in delete_list)):
                if err:
                    error = SubElement(elem, 'Error')
                    SubElement(error, 'Key').text = key
                    SubElement(error, 'Code').text = err['code']
                    SubElement(error, 'Message').text = err['message']
                elif not self.quiet:
                    deleted = SubElement(elem, 'Deleted')
                    SubElement(deleted, 'Key').text = key

        body = tostring(elem)

        return HTTPOk(body=body)