Example #1
0
 def _app_call(self, env):
     """
     Ensures start_response has been called before returning.
     """
     self._response_status = None
     self._response_headers = None
     self._response_exc_info = None
     resp = self.app(env, self._start_response)
     # if start_response has not been called, iterate until we've got a
     # non-empty chunk, by which time the app *should* have called it
     if self._response_status is None:
         resp = reiterate(resp)
     return resp
Example #2
0
 def _app_call(self, env):
     """
     Ensures start_response has been called before returning.
     """
     self._response_status = None
     self._response_headers = None
     self._response_exc_info = None
     resp = self.app(env, self._start_response)
     # if start_response has not been called, iterate until we've got a
     # non-empty chunk, by which time the app *should* have called it
     if self._response_status is None:
         resp = reiterate(resp)
     return resp
Example #3
0
File: swob.py Project: leoh0/swift
    def call_application(self, application):
        """
        Calls the application with this request's environment.  Returns the
        status, headers, and app_iter for the response as a tuple.

        :param application: the WSGI application to call
        """
        output = []
        captured = []

        def start_response(status, headers, exc_info=None):
            captured[:] = [status, headers, exc_info]
            return output.append
        app_iter = application(self.environ, start_response)
        if not app_iter:
            app_iter = output
        if not captured:
            app_iter = reiterate(app_iter)
        return (captured[0], captured[1], app_iter)
Example #4
0
    def call_application(self, application):
        """
        Calls the application with this request's environment.  Returns the
        status, headers, and app_iter for the response as a tuple.

        :param application: the WSGI application to call
        """
        output = []
        captured = []

        def start_response(status, headers, exc_info=None):
            captured[:] = [status, headers, exc_info]
            return output.append
        app_iter = application(self.environ, start_response)
        if not app_iter:
            app_iter = output
        if not captured:
            app_iter = reiterate(app_iter)
        return (captured[0], captured[1], app_iter)
Example #5
0
    def POST(self, req):
        """
        Handles Complete Multipart Upload.
        """
        upload_id = req.params['uploadId']
        resp = _get_upload_info(req, self.app, upload_id)
        headers = {
            'Accept': 'application/json',
            sysmeta_header('object', 'upload-id'): upload_id
        }
        for key, val in resp.headers.items():
            _key = key.lower()
            if _key.startswith('x-amz-meta-'):
                headers['x-object-meta-' + _key[11:]] = val

        hct_header = sysmeta_header('object', 'has-content-type')
        if resp.sysmeta_headers.get(hct_header) == 'yes':
            content_type = resp.sysmeta_headers.get(
                sysmeta_header('object', 'content-type'))
        elif hct_header in resp.sysmeta_headers:
            # has-content-type is present but false, so no content type was
            # set on initial upload. In that case, we won't set one on our
            # PUT request. Swift will end up guessing one based on the
            # object name.
            content_type = None
        else:
            content_type = resp.headers.get('Content-Type')

        if content_type:
            headers['Content-Type'] = content_type

        container = req.container_name + MULTIUPLOAD_SUFFIX
        s3_etag_hasher = md5(usedforsecurity=False)
        manifest = []
        previous_number = 0
        try:
            xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE)
            if not xml:
                raise InvalidRequest(msg='You must specify at least one part')
            if 'content-md5' in req.headers:
                # If an MD5 was provided, we need to verify it.
                # Note that S3Request already took care of translating to ETag
                if req.headers['etag'] != md5(
                        xml, usedforsecurity=False).hexdigest():
                    raise BadDigest(content_md5=req.headers['content-md5'])
                # We're only interested in the body here, in the
                # multipart-upload controller -- *don't* let it get
                # plumbed down to the object-server
                del req.headers['etag']

            complete_elem = fromstring(xml, 'CompleteMultipartUpload',
                                       self.logger)
            for part_elem in complete_elem.iterchildren('Part'):
                part_number = int(part_elem.find('./PartNumber').text)

                if part_number <= previous_number:
                    raise InvalidPartOrder(upload_id=upload_id)
                previous_number = part_number

                etag = normalize_etag(part_elem.find('./ETag').text)
                if len(etag) != 32 or any(c not in '0123456789abcdef'
                                          for c in etag):
                    raise InvalidPart(upload_id=upload_id,
                                      part_number=part_number)

                manifest.append({
                    'path':
                    '/%s/%s/%s/%d' %
                    (wsgi_to_str(container), wsgi_to_str(
                        req.object_name), upload_id, part_number),
                    'etag':
                    etag
                })
                s3_etag_hasher.update(binascii.a2b_hex(etag))
        except (XMLSyntaxError, DocumentInvalid):
            # NB: our schema definitions catch uploads with no parts here
            raise MalformedXML()
        except ErrorResponse:
            raise
        except Exception as e:
            self.logger.error(e)
            raise

        s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest))
        s3_etag_header = sysmeta_header('object', 'etag')
        if resp.sysmeta_headers.get(s3_etag_header) == s3_etag:
            # This header should only already be present if the upload marker
            # has been cleaned up and the current target uses the same
            # upload-id; assuming the segments to use haven't changed, the work
            # is already done
            return HTTPOk(body=_make_complete_body(req, s3_etag, False),
                          content_type='application/xml')
        headers[s3_etag_header] = s3_etag
        # Leave base header value blank; SLO will populate
        c_etag = '; s3_etag=%s' % s3_etag
        headers[get_container_update_override_key('etag')] = c_etag

        too_small_message = ('s3api requires that each segment be at least '
                             '%d bytes' % self.conf.min_segment_size)

        def size_checker(manifest):
            # Check the size of each segment except the last and make sure
            # they are all more than the minimum upload chunk size.
            # Note that we need to use the *internal* keys, since we're
            # looking at the manifest that's about to be written.
            return [(item['name'], too_small_message) for item in manifest[:-1]
                    if item and item['bytes'] < self.conf.min_segment_size]

        req.environ['swift.callback.slo_manifest_hook'] = size_checker
        start_time = time.time()

        def response_iter():
            # NB: XML requires that the XML declaration, if present, be at the
            # very start of the document. Clients *will* call us out on not
            # being valid XML if we pass through whitespace before it.
            # Track whether we've sent anything yet so we can yield out that
            # declaration *first*
            yielded_anything = False

            try:
                try:
                    # TODO: add support for versioning
                    put_resp = req.get_response(self.app,
                                                'PUT',
                                                body=json.dumps(manifest),
                                                query={
                                                    'multipart-manifest':
                                                    'put',
                                                    'heartbeat': 'on'
                                                },
                                                headers=headers)
                    if put_resp.status_int == 202:
                        body = []
                        put_resp.fix_conditional_response()
                        for chunk in put_resp.response_iter:
                            if not chunk.strip():
                                if time.time() - start_time < 10:
                                    # Include some grace period to keep
                                    # ceph-s3tests happy
                                    continue
                                if not yielded_anything:
                                    yield (b'<?xml version="1.0" '
                                           b'encoding="UTF-8"?>\n')
                                yielded_anything = True
                                yield chunk
                                continue
                            body.append(chunk)
                        body = json.loads(b''.join(body))
                        if body['Response Status'] != '201 Created':
                            for seg, err in body['Errors']:
                                if err == too_small_message:
                                    raise EntityTooSmall()
                                elif err in ('Etag Mismatch', '404 Not Found'):
                                    raise InvalidPart(upload_id=upload_id)
                            raise InvalidRequest(
                                status=body['Response Status'],
                                msg='\n'.join(': '.join(err)
                                              for err in body['Errors']))
                except BadSwiftRequest as e:
                    msg = str(e)
                    if too_small_message in msg:
                        raise EntityTooSmall(msg)
                    elif ', Etag Mismatch' in msg:
                        raise InvalidPart(upload_id=upload_id)
                    elif ', 404 Not Found' in msg:
                        raise InvalidPart(upload_id=upload_id)
                    else:
                        raise

                # clean up the multipart-upload record
                obj = '%s/%s' % (req.object_name, upload_id)
                try:
                    req.get_response(self.app, 'DELETE', container, obj)
                except NoSuchKey:
                    # The important thing is that we wrote out a tombstone to
                    # make sure the marker got cleaned up. If it's already
                    # gone (e.g., because of concurrent completes or a retried
                    # complete), so much the better.
                    pass

                yield _make_complete_body(req, s3_etag, yielded_anything)
            except ErrorResponse as err_resp:
                if yielded_anything:
                    err_resp.xml_declaration = False
                    yield b'\n'
                else:
                    # Oh good, we can still change HTTP status code, too!
                    resp.status = err_resp.status
                for chunk in err_resp({}, lambda *a: None):
                    yield chunk

        resp = HTTPOk()  # assume we're good for now... but see above!
        resp.app_iter = reiterate(response_iter())
        resp.content_type = "application/xml"

        return resp
Example #6
0
    def _perform_subrequest(self, orig_env, attributes, fp, keys):
        """
        Performs the subrequest and returns the response.

        :param orig_env: The WSGI environment dict; will only be used
                         to form a new env for the subrequest.
        :param attributes: dict of the attributes of the form so far.
        :param fp: The file-like object containing the request body.
        :param keys: The account keys to validate the signature with.
        :returns: (status_line, headers_list)
        """
        if not keys:
            raise FormUnauthorized('invalid signature')
        try:
            max_file_size = int(attributes.get('max_file_size') or 0)
        except ValueError:
            raise FormInvalid('max_file_size not an integer')
        subenv = make_pre_authed_env(orig_env, 'PUT', agent=None,
                                     swift_source='FP')
        if 'QUERY_STRING' in subenv:
            del subenv['QUERY_STRING']
        subenv['HTTP_TRANSFER_ENCODING'] = 'chunked'
        subenv['wsgi.input'] = _CappedFileLikeObject(fp, max_file_size)
        if not subenv['PATH_INFO'].endswith('/') and \
                subenv['PATH_INFO'].count('/') < 4:
            subenv['PATH_INFO'] += '/'
        subenv['PATH_INFO'] += str_to_wsgi(
            attributes['filename'] or 'filename')
        if 'x_delete_at' in attributes:
            try:
                subenv['HTTP_X_DELETE_AT'] = int(attributes['x_delete_at'])
            except ValueError:
                raise FormInvalid('x_delete_at not an integer: '
                                  'Unix timestamp required.')
        if 'x_delete_after' in attributes:
            try:
                subenv['HTTP_X_DELETE_AFTER'] = int(
                    attributes['x_delete_after'])
            except ValueError:
                raise FormInvalid('x_delete_after not an integer: '
                                  'Number of seconds required.')
        if 'content-type' in attributes:
            subenv['CONTENT_TYPE'] = \
                attributes['content-type'] or 'application/octet-stream'
        if 'content-encoding' in attributes:
            subenv['HTTP_CONTENT_ENCODING'] = attributes['content-encoding']
        try:
            if int(attributes.get('expires') or 0) < time():
                raise FormUnauthorized('form expired')
        except ValueError:
            raise FormInvalid('expired not an integer')
        hmac_body = '%s\n%s\n%s\n%s\n%s' % (
            wsgi_to_str(orig_env['PATH_INFO']),
            attributes.get('redirect') or '',
            attributes.get('max_file_size') or '0',
            attributes.get('max_file_count') or '0',
            attributes.get('expires') or '0')
        if six.PY3:
            hmac_body = hmac_body.encode('utf-8')

        has_valid_sig = False
        for key in keys:
            # Encode key like in swift.common.utls.get_hmac.
            if not isinstance(key, six.binary_type):
                key = key.encode('utf8')
            sig = hmac.new(key, hmac_body, sha1).hexdigest()
            if streq_const_time(sig, (attributes.get('signature') or
                                      'invalid')):
                has_valid_sig = True
        if not has_valid_sig:
            raise FormUnauthorized('invalid signature')

        substatus = [None]
        subheaders = [None]

        wsgi_input = subenv['wsgi.input']

        def _start_response(status, headers, exc_info=None):
            if wsgi_input.file_size_exceeded:
                raise EOFError("max_file_size exceeded")

            substatus[0] = status
            subheaders[0] = headers

        # reiterate to ensure the response started,
        # but drop any data on the floor
        close_if_possible(reiterate(self.app(subenv, _start_response)))
        return substatus[0], subheaders[0]
Example #7
0
    def POST(self, req):
        """
        Handles Complete Multipart Upload.
        """
        upload_id = req.params['uploadId']
        resp = _get_upload_info(req, self.app, upload_id)
        headers = {'Accept': 'application/json'}
        for key, val in resp.headers.items():
            _key = key.lower()
            if _key.startswith('x-amz-meta-'):
                headers['x-object-meta-' + _key[11:]] = val

        hct_header = sysmeta_header('object', 'has-content-type')
        if resp.sysmeta_headers.get(hct_header) == 'yes':
            content_type = resp.sysmeta_headers.get(
                sysmeta_header('object', 'content-type'))
        elif hct_header in resp.sysmeta_headers:
            # has-content-type is present but false, so no content type was
            # set on initial upload. In that case, we won't set one on our
            # PUT request. Swift will end up guessing one based on the
            # object name.
            content_type = None
        else:
            content_type = resp.headers.get('Content-Type')

        if content_type:
            headers['Content-Type'] = content_type

        container = req.container_name + MULTIUPLOAD_SUFFIX
        s3_etag_hasher = md5()
        manifest = []
        previous_number = 0
        try:
            xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE)
            if not xml:
                raise InvalidRequest(msg='You must specify at least one part')
            if 'content-md5' in req.headers:
                # If an MD5 was provided, we need to verify it.
                # Note that S3Request already took care of translating to ETag
                if req.headers['etag'] != md5(xml).hexdigest():
                    raise BadDigest(content_md5=req.headers['content-md5'])
                # We're only interested in the body here, in the
                # multipart-upload controller -- *don't* let it get
                # plumbed down to the object-server
                del req.headers['etag']

            complete_elem = fromstring(xml, 'CompleteMultipartUpload',
                                       self.logger)
            for part_elem in complete_elem.iterchildren('Part'):
                part_number = int(part_elem.find('./PartNumber').text)

                if part_number <= previous_number:
                    raise InvalidPartOrder(upload_id=upload_id)
                previous_number = part_number

                etag = normalize_etag(part_elem.find('./ETag').text)
                if len(etag) != 32 or any(c not in '0123456789abcdef'
                                          for c in etag):
                    raise InvalidPart(upload_id=upload_id,
                                      part_number=part_number)

                manifest.append({
                    'path':
                    '/%s/%s/%s/%d' %
                    (container, req.object_name, upload_id, part_number),
                    'etag':
                    etag
                })
                s3_etag_hasher.update(binascii.a2b_hex(etag))
        except (XMLSyntaxError, DocumentInvalid):
            # NB: our schema definitions catch uploads with no parts here
            raise MalformedXML()
        except ErrorResponse:
            raise
        except Exception as e:
            self.logger.error(e)
            raise

        s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest))
        headers[sysmeta_header('object', 'etag')] = s3_etag
        # Leave base header value blank; SLO will populate
        c_etag = '; s3_etag=%s' % s3_etag
        headers[get_container_update_override_key('etag')] = c_etag

        too_small_message = ('s3api requires that each segment be at least '
                             '%d bytes' % self.conf.min_segment_size)

        def size_checker(manifest):
            # Check the size of each segment except the last and make sure
            # they are all more than the minimum upload chunk size.
            # Note that we need to use the *internal* keys, since we're
            # looking at the manifest that's about to be written.
            return [(item['name'], too_small_message) for item in manifest[:-1]
                    if item and item['bytes'] < self.conf.min_segment_size]

        req.environ['swift.callback.slo_manifest_hook'] = size_checker
        start_time = time.time()

        def response_iter():
            # NB: XML requires that the XML declaration, if present, be at the
            # very start of the document. Clients *will* call us out on not
            # being valid XML if we pass through whitespace before it.
            # Track whether we've sent anything yet so we can yield out that
            # declaration *first*
            yielded_anything = False

            try:
                try:
                    # TODO: add support for versioning
                    put_resp = req.get_response(self.app,
                                                'PUT',
                                                body=json.dumps(manifest),
                                                query={
                                                    'multipart-manifest':
                                                    'put',
                                                    'heartbeat': 'on'
                                                },
                                                headers=headers)
                    if put_resp.status_int == 202:
                        body = []
                        put_resp.fix_conditional_response()
                        for chunk in put_resp.response_iter:
                            if not chunk.strip():
                                if time.time() - start_time < 10:
                                    # Include some grace period to keep
                                    # ceph-s3tests happy
                                    continue
                                if not yielded_anything:
                                    yield (b'<?xml version="1.0" '
                                           b'encoding="UTF-8"?>\n')
                                yielded_anything = True
                                yield chunk
                                continue
                            body.append(chunk)
                        body = json.loads(b''.join(body))
                        if body['Response Status'] != '201 Created':
                            for seg, err in body['Errors']:
                                if err == too_small_message:
                                    raise EntityTooSmall()
                                elif err in ('Etag Mismatch', '404 Not Found'):
                                    raise InvalidPart(upload_id=upload_id)
                            raise InvalidRequest(
                                status=body['Response Status'],
                                msg='\n'.join(': '.join(err)
                                              for err in body['Errors']))
                except BadSwiftRequest as e:
                    msg = str(e)
                    if too_small_message in msg:
                        raise EntityTooSmall(msg)
                    elif ', Etag Mismatch' in msg:
                        raise InvalidPart(upload_id=upload_id)
                    elif ', 404 Not Found' in msg:
                        raise InvalidPart(upload_id=upload_id)
                    else:
                        raise

                # clean up the multipart-upload record
                obj = '%s/%s' % (req.object_name, upload_id)
                try:
                    req.get_response(self.app, 'DELETE', container, obj)
                except NoSuchKey:
                    # We know that this existed long enough for us to HEAD
                    pass

                result_elem = Element('CompleteMultipartUploadResult')

                # NOTE: boto with sig v4 appends port to HTTP_HOST value at
                # the request header when the port is non default value and it
                # makes req.host_url like as http://localhost:8080:8080/path
                # that obviously invalid. Probably it should be resolved at
                # swift.common.swob though, tentatively we are parsing and
                # reconstructing the correct host_url info here.
                # in detail, https://github.com/boto/boto/pull/3513
                parsed_url = urlparse(req.host_url)
                host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname)
                # Why are we doing our own port parsing? Because py3 decided
                # to start raising ValueErrors on access after parsing such
                # an invalid port
                netloc = parsed_url.netloc.split('@')[-1].split(']')[-1]
                if ':' in netloc:
                    port = netloc.split(':', 2)[1]
                    host_url += ':%s' % port

                SubElement(result_elem, 'Location').text = host_url + req.path
                SubElement(result_elem, 'Bucket').text = req.container_name
                SubElement(result_elem, 'Key').text = req.object_name
                SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag
                resp.headers.pop('ETag', None)
                if yielded_anything:
                    yield b'\n'
                yield tostring(result_elem,
                               xml_declaration=not yielded_anything)
            except ErrorResponse as err_resp:
                if yielded_anything:
                    err_resp.xml_declaration = False
                    yield b'\n'
                else:
                    # Oh good, we can still change HTTP status code, too!
                    resp.status = err_resp.status
                for chunk in err_resp({}, lambda *a: None):
                    yield chunk

        resp = HTTPOk()  # assume we're good for now... but see above!
        resp.app_iter = reiterate(response_iter())
        resp.content_type = "application/xml"

        return resp
Example #8
0
        def iter_response(iterable):
            iterator = reiterate(iterable)
            content_length = None
            for h, v in start_response_args[0][1]:
                if h.lower() == 'content-length':
                    content_length = int(v)
                    break
                elif h.lower() == 'transfer-encoding':
                    break
            else:
                if isinstance(iterator, list):
                    content_length = sum(len(i) for i in iterator)
                    start_response_args[0][1].append(
                        ('Content-Length', str(content_length)))

            req = Request(env)
            method = self.method_from_req(req)
            if method == 'HEAD':
                content_length = 0
            if content_length is not None:
                iterator = enforce_byte_count(iterator, content_length)

            wire_status_int = int(start_response_args[0][0].split(' ', 1)[0])
            resp_headers = dict(start_response_args[0][1])
            start_response(*start_response_args[0])

            # Log timing information for time-to-first-byte (GET requests only)
            ttfb = 0.0
            if method == 'GET':
                policy_index = get_policy_index(req.headers, resp_headers)
                metric_name = self.statsd_metric_name(req, wire_status_int,
                                                      method)
                metric_name_policy = self.statsd_metric_name_policy(
                    req, wire_status_int, method, policy_index)
                ttfb = time.time() - start_time
                if metric_name:
                    self.access_logger.timing(
                        metric_name + '.first-byte.timing', ttfb * 1000)
                if metric_name_policy:
                    self.access_logger.timing(
                        metric_name_policy + '.first-byte.timing', ttfb * 1000)

            bytes_sent = 0
            client_disconnect = False
            start_status = wire_status_int
            try:
                for chunk in iterator:
                    bytes_sent += len(chunk)
                    yield chunk
            except StopIteration:  # iterator was depleted
                return
            except GeneratorExit:  # generator was closed before we finished
                client_disconnect = True
                raise
            except Exception:
                start_status = 500
                raise
            finally:
                status_int = status_int_for_logging(start_status,
                                                    client_disconnect)
                self.log_request(req,
                                 status_int,
                                 input_proxy.bytes_received,
                                 bytes_sent,
                                 start_time,
                                 time.time(),
                                 resp_headers=resp_headers,
                                 ttfb=ttfb,
                                 wire_status_int=wire_status_int)
                iterator.close()
Example #9
0
    def _perform_subrequest(self, orig_env, attributes, fp, keys):
        """
        Performs the subrequest and returns the response.

        :param orig_env: The WSGI environment dict; will only be used
                         to form a new env for the subrequest.
        :param attributes: dict of the attributes of the form so far.
        :param fp: The file-like object containing the request body.
        :param keys: The account keys to validate the signature with.
        :returns: (status_line, headers_list)
        """
        if not keys:
            raise FormUnauthorized('invalid signature')
        try:
            max_file_size = int(attributes.get('max_file_size') or 0)
        except ValueError:
            raise FormInvalid('max_file_size not an integer')
        subenv = make_pre_authed_env(orig_env, 'PUT', agent=None,
                                     swift_source='FP')
        if 'QUERY_STRING' in subenv:
            del subenv['QUERY_STRING']
        subenv['HTTP_TRANSFER_ENCODING'] = 'chunked'
        subenv['wsgi.input'] = _CappedFileLikeObject(fp, max_file_size)
        if not subenv['PATH_INFO'].endswith('/') and \
                subenv['PATH_INFO'].count('/') < 4:
            subenv['PATH_INFO'] += '/'
        subenv['PATH_INFO'] += attributes['filename'] or 'filename'
        if 'x_delete_at' in attributes:
            try:
                subenv['HTTP_X_DELETE_AT'] = int(attributes['x_delete_at'])
            except ValueError:
                raise FormInvalid('x_delete_at not an integer: '
                                  'Unix timestamp required.')
        if 'x_delete_after' in attributes:
            try:
                subenv['HTTP_X_DELETE_AFTER'] = int(
                    attributes['x_delete_after'])
            except ValueError:
                raise FormInvalid('x_delete_after not an integer: '
                                  'Number of seconds required.')
        if 'content-type' in attributes:
            subenv['CONTENT_TYPE'] = \
                attributes['content-type'] or 'application/octet-stream'
        if 'content-encoding' in attributes:
            subenv['HTTP_CONTENT_ENCODING'] = attributes['content-encoding']
        try:
            if int(attributes.get('expires') or 0) < time():
                raise FormUnauthorized('form expired')
        except ValueError:
            raise FormInvalid('expired not an integer')
        hmac_body = '%s\n%s\n%s\n%s\n%s' % (
            orig_env['PATH_INFO'],
            attributes.get('redirect') or '',
            attributes.get('max_file_size') or '0',
            attributes.get('max_file_count') or '0',
            attributes.get('expires') or '0')
        if six.PY3:
            hmac_body = hmac_body.encode('utf-8')

        has_valid_sig = False
        for key in keys:
            sig = hmac.new(key, hmac_body, sha1).hexdigest()
            if streq_const_time(sig, (attributes.get('signature') or
                                      'invalid')):
                has_valid_sig = True
        if not has_valid_sig:
            raise FormUnauthorized('invalid signature')

        substatus = [None]
        subheaders = [None]

        wsgi_input = subenv['wsgi.input']

        def _start_response(status, headers, exc_info=None):
            if wsgi_input.file_size_exceeded:
                raise EOFError("max_file_size exceeded")

            substatus[0] = status
            subheaders[0] = headers

        # reiterate to ensure the response started,
        # but drop any data on the floor
        close_if_possible(reiterate(self.app(subenv, _start_response)))
        return substatus[0], subheaders[0]
Example #10
0
    def POST(self, req):
        """
        Handles Complete Multipart Upload.
        """
        upload_id = req.params['uploadId']
        resp = _get_upload_info(req, self.app, upload_id)
        headers = {'Accept': 'application/json'}
        for key, val in resp.headers.items():
            _key = key.lower()
            if _key.startswith('x-amz-meta-'):
                headers['x-object-meta-' + _key[11:]] = val

        hct_header = sysmeta_header('object', 'has-content-type')
        if resp.sysmeta_headers.get(hct_header) == 'yes':
            content_type = resp.sysmeta_headers.get(
                sysmeta_header('object', 'content-type'))
        elif hct_header in resp.sysmeta_headers:
            # has-content-type is present but false, so no content type was
            # set on initial upload. In that case, we won't set one on our
            # PUT request. Swift will end up guessing one based on the
            # object name.
            content_type = None
        else:
            content_type = resp.headers.get('Content-Type')

        if content_type:
            headers['Content-Type'] = content_type

        # Query for the objects in the segments area to make sure it completed
        query = {
            'format': 'json',
            'prefix': '%s/%s/' % (req.object_name, upload_id),
            'delimiter': '/'
        }

        container = req.container_name + MULTIUPLOAD_SUFFIX
        resp = req.get_response(self.app, 'GET', container, '', query=query)
        objinfo = json.loads(resp.body)
        objtable = dict((o['name'], {
            'path': '/'.join(['', container, o['name']]),
            'etag': o['hash'],
            'size_bytes': o['bytes']
        }) for o in objinfo)

        s3_etag_hasher = md5()
        manifest = []
        previous_number = 0
        try:
            xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE)
            if not xml:
                raise InvalidRequest(msg='You must specify at least one part')

            complete_elem = fromstring(xml, 'CompleteMultipartUpload',
                                       self.logger)
            for part_elem in complete_elem.iterchildren('Part'):
                part_number = int(part_elem.find('./PartNumber').text)

                if part_number <= previous_number:
                    raise InvalidPartOrder(upload_id=upload_id)
                previous_number = part_number

                etag = part_elem.find('./ETag').text
                if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"':
                    # strip double quotes
                    etag = etag[1:-1]

                info = objtable.get("%s/%s/%s" %
                                    (req.object_name, upload_id, part_number))
                if info is None or info['etag'] != etag:
                    raise InvalidPart(upload_id=upload_id,
                                      part_number=part_number)

                s3_etag_hasher.update(etag.decode('hex'))
                info['size_bytes'] = int(info['size_bytes'])
                manifest.append(info)
        except (XMLSyntaxError, DocumentInvalid):
            # NB: our schema definitions catch uploads with no parts here
            raise MalformedXML()
        except ErrorResponse:
            raise
        except Exception as e:
            self.logger.error(e)
            raise

        s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest))
        headers[sysmeta_header('object', 'etag')] = s3_etag
        # Leave base header value blank; SLO will populate
        c_etag = '; s3_etag=%s' % s3_etag
        headers['X-Object-Sysmeta-Container-Update-Override-Etag'] = c_etag

        # Check the size of each segment except the last and make sure they are
        # all more than the minimum upload chunk size
        for info in manifest[:-1]:
            if info['size_bytes'] < self.conf.min_segment_size:
                raise EntityTooSmall()

        def response_iter():
            # NB: XML requires that the XML declaration, if present, be at the
            # very start of the document. Clients *will* call us out on not
            # being valid XML if we pass through whitespace before it.
            # Track whether we've sent anything yet so we can yield out that
            # declaration *first*
            yielded_anything = False

            try:
                try:
                    # TODO: add support for versioning
                    put_resp = req.get_response(self.app,
                                                'PUT',
                                                body=json.dumps(manifest),
                                                query={
                                                    'multipart-manifest':
                                                    'put',
                                                    'heartbeat': 'on'
                                                },
                                                headers=headers)
                    if put_resp.status_int == 202:
                        body = []
                        put_resp.fix_conditional_response()
                        for chunk in put_resp.response_iter:
                            if not chunk.strip():
                                if not yielded_anything:
                                    yield ('<?xml version="1.0" '
                                           'encoding="UTF-8"?>\n')
                                yielded_anything = True
                                yield chunk
                            body.append(chunk)
                        body = json.loads(''.join(body))
                        if body['Response Status'] != '201 Created':
                            raise InvalidRequest(
                                status=body['Response Status'],
                                msg='\n'.join(': '.join(err)
                                              for err in body['Errors']))
                except BadSwiftRequest as e:
                    msg = str(e)
                    expected_msg = ('too small; each segment must be '
                                    'at least 1 byte')
                    if expected_msg in msg:
                        # FIXME: AWS S3 allows a smaller object than 5 MB if
                        # there is only one part.  Use a COPY request to copy
                        # the part object from the segments container instead.
                        raise EntityTooSmall(msg)
                    else:
                        raise

                # clean up the multipart-upload record
                obj = '%s/%s' % (req.object_name, upload_id)
                try:
                    req.get_response(self.app, 'DELETE', container, obj)
                except NoSuchKey:
                    # We know that this existed long enough for us to HEAD
                    pass

                result_elem = Element('CompleteMultipartUploadResult')

                # NOTE: boto with sig v4 appends port to HTTP_HOST value at
                # the request header when the port is non default value and it
                # makes req.host_url like as http://localhost:8080:8080/path
                # that obviously invalid. Probably it should be resolved at
                # swift.common.swob though, tentatively we are parsing and
                # reconstructing the correct host_url info here.
                # in detail, https://github.com/boto/boto/pull/3513
                parsed_url = urlparse(req.host_url)
                host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname)
                if parsed_url.port:
                    host_url += ':%s' % parsed_url.port

                SubElement(result_elem, 'Location').text = host_url + req.path
                SubElement(result_elem, 'Bucket').text = req.container_name
                SubElement(result_elem, 'Key').text = req.object_name
                SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag
                resp.headers.pop('ETag', None)
                if yielded_anything:
                    yield '\n'
                yield tostring(result_elem,
                               xml_declaration=not yielded_anything)
            except ErrorResponse as err_resp:
                if yielded_anything:
                    err_resp.xml_declaration = False
                    yield '\n'
                else:
                    # Oh good, we can still change HTTP status code, too!
                    resp.status = err_resp.status
                for chunk in err_resp({}, lambda *a: None):
                    yield chunk

        resp = HTTPOk()  # assume we're good for now... but see above!
        resp.app_iter = reiterate(response_iter())
        resp.content_type = "application/xml"

        return resp
Example #11
0
    def POST(self, req):
        """
        Handles Complete Multipart Upload.
        """
        upload_id = req.params['uploadId']
        resp = _get_upload_info(req, self.app, upload_id)
        headers = {'Accept': 'application/json'}
        for key, val in resp.headers.items():
            _key = key.lower()
            if _key.startswith('x-amz-meta-'):
                headers['x-object-meta-' + _key[11:]] = val

        hct_header = sysmeta_header('object', 'has-content-type')
        if resp.sysmeta_headers.get(hct_header) == 'yes':
            content_type = resp.sysmeta_headers.get(
                sysmeta_header('object', 'content-type'))
        elif hct_header in resp.sysmeta_headers:
            # has-content-type is present but false, so no content type was
            # set on initial upload. In that case, we won't set one on our
            # PUT request. Swift will end up guessing one based on the
            # object name.
            content_type = None
        else:
            content_type = resp.headers.get('Content-Type')

        if content_type:
            headers['Content-Type'] = content_type

        container = req.container_name + MULTIUPLOAD_SUFFIX
        s3_etag_hasher = md5()
        manifest = []
        previous_number = 0
        try:
            xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE)
            if not xml:
                raise InvalidRequest(msg='You must specify at least one part')
            if 'content-md5' in req.headers:
                # If an MD5 was provided, we need to verify it.
                # Note that S3Request already took care of translating to ETag
                if req.headers['etag'] != md5(xml).hexdigest():
                    raise BadDigest(content_md5=req.headers['content-md5'])
                # We're only interested in the body here, in the
                # multipart-upload controller -- *don't* let it get
                # plumbed down to the object-server
                del req.headers['etag']

            complete_elem = fromstring(
                xml, 'CompleteMultipartUpload', self.logger)
            for part_elem in complete_elem.iterchildren('Part'):
                part_number = int(part_elem.find('./PartNumber').text)

                if part_number <= previous_number:
                    raise InvalidPartOrder(upload_id=upload_id)
                previous_number = part_number

                etag = part_elem.find('./ETag').text
                if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"':
                    # strip double quotes
                    etag = etag[1:-1]
                if len(etag) != 32 or any(c not in '0123456789abcdef'
                                          for c in etag):
                    raise InvalidPart(upload_id=upload_id,
                                      part_number=part_number)

                manifest.append({
                    'path': '/%s/%s/%s/%d' % (
                        container, req.object_name, upload_id, part_number),
                    'etag': etag})
                s3_etag_hasher.update(binascii.a2b_hex(etag))
        except (XMLSyntaxError, DocumentInvalid):
            # NB: our schema definitions catch uploads with no parts here
            raise MalformedXML()
        except ErrorResponse:
            raise
        except Exception as e:
            self.logger.error(e)
            raise

        s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest))
        headers[sysmeta_header('object', 'etag')] = s3_etag
        # Leave base header value blank; SLO will populate
        c_etag = '; s3_etag=%s' % s3_etag
        headers['X-Object-Sysmeta-Container-Update-Override-Etag'] = c_etag

        too_small_message = ('s3api requires that each segment be at least '
                             '%d bytes' % self.conf.min_segment_size)

        def size_checker(manifest):
            # Check the size of each segment except the last and make sure
            # they are all more than the minimum upload chunk size.
            # Note that we need to use the *internal* keys, since we're
            # looking at the manifest that's about to be written.
            return [
                (item['name'], too_small_message)
                for item in manifest[:-1]
                if item and item['bytes'] < self.conf.min_segment_size]

        req.environ['swift.callback.slo_manifest_hook'] = size_checker
        start_time = time.time()

        def response_iter():
            # NB: XML requires that the XML declaration, if present, be at the
            # very start of the document. Clients *will* call us out on not
            # being valid XML if we pass through whitespace before it.
            # Track whether we've sent anything yet so we can yield out that
            # declaration *first*
            yielded_anything = False

            try:
                try:
                    # TODO: add support for versioning
                    put_resp = req.get_response(
                        self.app, 'PUT', body=json.dumps(manifest),
                        query={'multipart-manifest': 'put',
                               'heartbeat': 'on'},
                        headers=headers)
                    if put_resp.status_int == 202:
                        body = []
                        put_resp.fix_conditional_response()
                        for chunk in put_resp.response_iter:
                            if not chunk.strip():
                                if time.time() - start_time < 10:
                                    # Include some grace period to keep
                                    # ceph-s3tests happy
                                    continue
                                if not yielded_anything:
                                    yield (b'<?xml version="1.0" '
                                           b'encoding="UTF-8"?>\n')
                                yielded_anything = True
                                yield chunk
                                continue
                            body.append(chunk)
                        body = json.loads(b''.join(body))
                        if body['Response Status'] != '201 Created':
                            for seg, err in body['Errors']:
                                if err == too_small_message:
                                    raise EntityTooSmall()
                                elif err in ('Etag Mismatch', '404 Not Found'):
                                    raise InvalidPart(upload_id=upload_id)
                            raise InvalidRequest(
                                status=body['Response Status'],
                                msg='\n'.join(': '.join(err)
                                              for err in body['Errors']))
                except BadSwiftRequest as e:
                    msg = str(e)
                    if too_small_message in msg:
                        raise EntityTooSmall(msg)
                    elif ', Etag Mismatch' in msg:
                        raise InvalidPart(upload_id=upload_id)
                    elif ', 404 Not Found' in msg:
                        raise InvalidPart(upload_id=upload_id)
                    else:
                        raise

                # clean up the multipart-upload record
                obj = '%s/%s' % (req.object_name, upload_id)
                try:
                    req.get_response(self.app, 'DELETE', container, obj)
                except NoSuchKey:
                    # We know that this existed long enough for us to HEAD
                    pass

                result_elem = Element('CompleteMultipartUploadResult')

                # NOTE: boto with sig v4 appends port to HTTP_HOST value at
                # the request header when the port is non default value and it
                # makes req.host_url like as http://localhost:8080:8080/path
                # that obviously invalid. Probably it should be resolved at
                # swift.common.swob though, tentatively we are parsing and
                # reconstructing the correct host_url info here.
                # in detail, https://github.com/boto/boto/pull/3513
                parsed_url = urlparse(req.host_url)
                host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname)
                # Why are we doing our own port parsing? Because py3 decided
                # to start raising ValueErrors on access after parsing such
                # an invalid port
                netloc = parsed_url.netloc.split('@')[-1].split(']')[-1]
                if ':' in netloc:
                    port = netloc.split(':', 2)[1]
                    host_url += ':%s' % port

                SubElement(result_elem, 'Location').text = host_url + req.path
                SubElement(result_elem, 'Bucket').text = req.container_name
                SubElement(result_elem, 'Key').text = req.object_name
                SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag
                resp.headers.pop('ETag', None)
                if yielded_anything:
                    yield b'\n'
                yield tostring(result_elem,
                               xml_declaration=not yielded_anything)
            except ErrorResponse as err_resp:
                if yielded_anything:
                    err_resp.xml_declaration = False
                    yield b'\n'
                else:
                    # Oh good, we can still change HTTP status code, too!
                    resp.status = err_resp.status
                for chunk in err_resp({}, lambda *a: None):
                    yield chunk

        resp = HTTPOk()  # assume we're good for now... but see above!
        resp.app_iter = reiterate(response_iter())
        resp.content_type = "application/xml"

        return resp