コード例 #1
0
def direct_put_object(node,
                      part,
                      account,
                      container,
                      name,
                      contents,
                      content_length=None,
                      etag=None,
                      content_type=None,
                      headers=None,
                      conn_timeout=5,
                      response_timeout=15,
                      chunk_size=65535):
    """
    Put object directly from the object server.

    :param node: node dictionary from the ring
    :param part: partition the container is on
    :param account: account name
    :param container: container name
    :param name: object name
    :param contents: an iterable or string to read object data from
    :param content_length: value to send as content-length header
    :param etag: etag of contents
    :param content_type: value to send as content-type header
    :param headers: additional headers to include in the request
    :param conn_timeout: timeout in seconds for establishing the connection
    :param response_timeout: timeout in seconds for getting the response
    :param chunk_size: if defined, chunk size of data to send.
    :returns: etag from the server response
    :raises ClientException: HTTP PUT request failed
    """

    path = _make_path(account, container, name)
    if headers is None:
        headers = {}
    if etag:
        headers['ETag'] = normalize_etag(etag)
    if content_type is not None:
        headers['Content-Type'] = content_type
    else:
        headers['Content-Type'] = 'application/octet-stream'
    # Incase the caller want to insert an object with specific age
    add_ts = 'X-Timestamp' not in headers

    resp = _make_req(node,
                     part,
                     'PUT',
                     path,
                     gen_headers(headers, add_ts=add_ts),
                     'Object',
                     conn_timeout,
                     response_timeout,
                     contents=contents,
                     content_length=content_length,
                     chunk_size=chunk_size)

    return normalize_etag(resp.getheader('etag'))
コード例 #2
0
ファイル: encrypter.py プロジェクト: KirillArkhipov1/swift-1
    def handle_put(self, req, start_response):
        self._check_headers(req)
        keys = self.get_keys(req.environ, required=['object', 'container'])
        self.encrypt_user_metadata(req, keys)

        enc_input_proxy = EncInputWrapper(self.crypto, keys, req, self.logger)
        req.environ['wsgi.input'] = enc_input_proxy

        resp = self._app_call(req.environ)

        # If an etag is in the response headers and a plaintext etag was
        # calculated, then overwrite the response value with the plaintext etag
        # provided it matches the ciphertext etag. If it does not match then do
        # not overwrite and allow the response value to return to client.
        mod_resp_headers = self._response_headers
        if (is_success(self._get_status_int())
                and enc_input_proxy.plaintext_md5):
            plaintext_etag = enc_input_proxy.plaintext_md5.hexdigest()
            ciphertext_etag = enc_input_proxy.ciphertext_md5.hexdigest()
            mod_resp_headers = [
                (h, v if
                 (h.lower() != 'etag'
                  or normalize_etag(v) != ciphertext_etag) else plaintext_etag)
                for h, v in mod_resp_headers
            ]

        start_response(self._response_status, mod_resp_headers,
                       self._response_exc_info)
        return resp
コード例 #3
0
ファイル: obj.py プロジェクト: irispastal/swift
    def GETorHEAD(self, req):
        had_match = False
        for match_header in ('if-match', 'if-none-match'):
            if match_header not in req.headers:
                continue
            had_match = True
            for value in list_from_csv(req.headers[match_header]):
                value = normalize_etag(value)
                if value.endswith('-N'):
                    # Deal with fake S3-like etags for SLOs uploaded via Swift
                    req.headers[match_header] += ', ' + value[:-2]

        if had_match:
            # Update where to look
            update_etag_is_at_header(req, sysmeta_header('object', 'etag'))

        resp = req.get_response(self.app)

        if req.method == 'HEAD':
            resp.app_iter = None

        for key in ('content-type', 'content-language', 'expires',
                    'cache-control', 'content-disposition',
                    'content-encoding'):
            if 'response-' + key in req.params:
                resp.headers[key] = req.params['response-' + key]

        return resp
コード例 #4
0
def _validate_and_prep_request_headers(req):
    """
    Validate that the value from x-symlink-target header is well formatted
    and that the x-symlink-target-etag header (if present) does not contain
    problematic characters. We assume the caller ensures that
    x-symlink-target header is present in req.headers.

    :param req: HTTP request object
    :returns: a tuple, the full versioned path to the object (as a WSGI string)
              and the X-Symlink-Target-Etag header value which may be None
    :raise: HTTPPreconditionFailed if x-symlink-target value
            is not well formatted.
    :raise: HTTPBadRequest if the x-symlink-target value points to the request
            path.
    :raise: HTTPBadRequest if the x-symlink-target-etag value contains
            a semicolon, double-quote, or backslash.
    """
    # N.B. check_path_header doesn't assert the leading slash and
    # copy middleware may accept the format. In the symlink, API
    # says apparently to use "container/object" format so add the
    # validation first, here.
    error_body = 'X-Symlink-Target header must be of the form ' \
                 '<container name>/<object name>'
    if wsgi_unquote(req.headers[TGT_OBJ_SYMLINK_HDR]).startswith('/'):
        raise HTTPPreconditionFailed(body=error_body,
                                     request=req,
                                     content_type='text/plain')

    # check container and object format
    container, obj = check_path_header(req, TGT_OBJ_SYMLINK_HDR, 2, error_body)
    req.headers[TGT_OBJ_SYMLINK_HDR] = wsgi_quote('%s/%s' % (container, obj))

    # Check account format if it exists
    account = check_account_format(
        req, wsgi_unquote(req.headers[TGT_ACCT_SYMLINK_HDR])) \
        if TGT_ACCT_SYMLINK_HDR in req.headers else None

    # Extract request path
    _junk, req_acc, req_cont, req_obj = req.split_path(4, 4, True)

    if account:
        req.headers[TGT_ACCT_SYMLINK_HDR] = wsgi_quote(account)
    else:
        account = req_acc

    # Check if symlink targets the symlink itself or not
    if (account, container, obj) == (req_acc, req_cont, req_obj):
        raise HTTPBadRequest(body='Symlink cannot target itself',
                             request=req,
                             content_type='text/plain')
    etag = normalize_etag(req.headers.get(TGT_ETAG_SYMLINK_HDR, None))
    if etag and any(c in etag for c in ';"\\'):
        # See cgi.parse_header for why the above chars are problematic
        raise HTTPBadRequest(body='Bad %s format' %
                             TGT_ETAG_SYMLINK_HDR.title(),
                             request=req,
                             content_type='text/plain')
    if not (etag or req.headers.get('Content-Type')):
        req.headers['Content-Type'] = 'application/symlink'
    return '/v1/%s/%s/%s' % (account, container, obj), etag
コード例 #5
0
    def test_async_updates_after_PUT_and_POST(self):
        # verify correct update values when PUT update and POST updates are
        # missed but then async updates are sent
        cpart, cnodes = self.container_ring.get_nodes(self.account, 'c1')
        client.put_container(self.url, self.token, 'c1',
                             headers={'X-Storage-Policy':
                                      self.policy.name})

        # PUT and POST to object while one container server is stopped so that
        # we force async updates to it
        kill_server((cnodes[0]['ip'], cnodes[0]['port']), self.ipport2server)
        content = u'stuff'
        client.put_object(self.url, self.token, 'c1', 'o1', contents=content,
                          content_type='test/ctype')
        meta = client.head_object(self.url, self.token, 'c1', 'o1')

        int_client = self.make_internal_client()
        int_client.set_object_metadata(
            self.account, 'c1', 'o1', {'X-Object-Meta-Fruit': 'Tomato'})
        self.assertEqual(
            'Tomato',
            int_client.get_object_metadata(self.account, 'c1', 'o1')
            ['x-object-meta-fruit'])  # sanity

        # re-start the container server and assert that it does not yet know
        # about the object
        start_server((cnodes[0]['ip'], cnodes[0]['port']), self.ipport2server)
        self.assertFalse(direct_client.direct_get_container(
            cnodes[0], cpart, self.account, 'c1')[1])

        # Run the object-updaters to send the async pendings
        Manager(['object-updater']).once()

        # check the re-started container server got same update as others.
        # we cannot assert the actual etag value because it may be encrypted
        listing_etags = set()
        for cnode in cnodes:
            listing = direct_client.direct_get_container(
                cnode, cpart, self.account, 'c1')[1]
            self.assertEqual(1, len(listing))
            self.assertEqual(len(content), listing[0]['bytes'])
            self.assertEqual('test/ctype', listing[0]['content_type'])
            listing_etags.add(listing[0]['hash'])
        self.assertEqual(1, len(listing_etags))

        # check that listing meta returned to client is consistent with object
        # meta returned to client
        hdrs, listing = client.get_container(self.url, self.token, 'c1')
        self.assertEqual(1, len(listing))
        self.assertEqual('o1', listing[0]['name'])
        self.assertEqual(len(content), listing[0]['bytes'])
        self.assertEqual(normalize_etag(meta['etag']), listing[0]['hash'])
        self.assertEqual('test/ctype', listing[0]['content_type'])
コード例 #6
0
    def GETorHEAD(self, req):
        had_match = False
        for match_header in ('if-match', 'if-none-match'):
            if match_header not in req.headers:
                continue
            had_match = True
            for value in list_from_csv(req.headers[match_header]):
                value = normalize_etag(value)
                if value.endswith('-N'):
                    # Deal with fake S3-like etags for SLOs uploaded via Swift
                    req.headers[match_header] += ', ' + value[:-2]

        if had_match:
            # Update where to look
            update_etag_is_at_header(req, sysmeta_header('object', 'etag'))

        object_name = req.object_name
        version_id = version_id_param(req)

        query = {} if version_id is None else {'version-id': version_id}
        if version_id not in ('null', None):
            container_info = req.get_container_info(self.app)
            if not container_info.get(
                    'sysmeta', {}).get('versions-container', ''):
                # Versioning has never been enabled
                raise NoSuchVersion(object_name, version_id)

        cors_rule = None
        if req.headers.get('Origin'):
            cors_rule = get_cors(self.app, req, req.method,
                                 req.headers.get('Origin'))
        resp = req.get_response(self.app, query=query)

        if req.method == 'HEAD':
            resp.app_iter = None

        if 'x-amz-meta-deleted' in resp.headers:
            raise NoSuchKey(object_name)

        for key in ('content-type', 'content-language', 'expires',
                    'cache-control', 'content-disposition',
                    'content-encoding'):
            if 'response-' + key in req.params:
                resp.headers[key] = req.params['response-' + key]

        if cors_rule is not None:
            cors_fill_headers(req, resp, cors_rule)
        return resp
コード例 #7
0
    def test_async_update_after_PUT(self):
        cpart, cnodes = self.container_ring.get_nodes(self.account, 'c1')
        client.put_container(self.url, self.token, 'c1',
                             headers={'X-Storage-Policy':
                                      self.policy.name})

        # put an object while one container server is stopped so that we force
        # an async update to it
        kill_server((cnodes[0]['ip'], cnodes[0]['port']), self.ipport2server)
        content = u'stuff'
        client.put_object(self.url, self.token, 'c1', 'o1', contents=content,
                          content_type='test/ctype')
        meta = client.head_object(self.url, self.token, 'c1', 'o1')

        # re-start the container server and assert that it does not yet know
        # about the object
        start_server((cnodes[0]['ip'], cnodes[0]['port']), self.ipport2server)
        self.assertFalse(direct_client.direct_get_container(
            cnodes[0], cpart, self.account, 'c1')[1])

        # Run the object-updaters to be sure updates are done
        Manager(['object-updater']).once()

        # check the re-started container server got same update as others.
        # we cannot assert the actual etag value because it may be encrypted
        listing_etags = set()
        for cnode in cnodes:
            listing = direct_client.direct_get_container(
                cnode, cpart, self.account, 'c1')[1]
            self.assertEqual(1, len(listing))
            self.assertEqual(len(content), listing[0]['bytes'])
            self.assertEqual('test/ctype', listing[0]['content_type'])
            listing_etags.add(listing[0]['hash'])
        self.assertEqual(1, len(listing_etags))

        # check that listing meta returned to client is consistent with object
        # meta returned to client
        hdrs, listing = client.get_container(self.url, self.token, 'c1')
        self.assertEqual(1, len(listing))
        self.assertEqual('o1', listing[0]['name'])
        self.assertEqual(len(content), listing[0]['bytes'])
        self.assertEqual(normalize_etag(meta['etag']), listing[0]['hash'])
        self.assertEqual('test/ctype', listing[0]['content_type'])
コード例 #8
0
    def GETorHEAD(self, req):
        had_match = False
        for match_header in ('if-match', 'if-none-match'):
            if match_header not in req.headers:
                continue
            had_match = True
            for value in list_from_csv(req.headers[match_header]):
                value = normalize_etag(value)
                if value.endswith('-N'):
                    # Deal with fake S3-like etags for SLOs uploaded via Swift
                    req.headers[match_header] += ', ' + value[:-2]

        if had_match:
            # Update where to look
            update_etag_is_at_header(req, sysmeta_header('object', 'etag'))

        object_name = req.object_name
        version_id = req.params.get('versionId')
        if version_id not in ('null', None) and \
                'object_versioning' not in get_swift_info():
            raise S3NotImplemented()
        query = {} if version_id is None else {'version-id': version_id}
        resp = req.get_response(self.app, query=query)

        if req.method == 'HEAD':
            resp.app_iter = None

        if 'x-amz-meta-deleted' in resp.headers:
            raise NoSuchKey(object_name)

        for key in ('content-type', 'content-language', 'expires',
                    'cache-control', 'content-disposition',
                    'content-encoding'):
            if 'response-' + key in req.params:
                resp.headers[key] = req.params['response-' + key]

        return resp
コード例 #9
0
    def POST(self, req):
        """
        Handles Complete Multipart Upload.
        """
        upload_id = req.params['uploadId']
        resp = _get_upload_info(req, self.app, upload_id)
        headers = {
            'Accept': 'application/json',
            sysmeta_header('object', 'upload-id'): upload_id
        }
        for key, val in resp.headers.items():
            _key = key.lower()
            if _key.startswith('x-amz-meta-'):
                headers['x-object-meta-' + _key[11:]] = val

        hct_header = sysmeta_header('object', 'has-content-type')
        if resp.sysmeta_headers.get(hct_header) == 'yes':
            content_type = resp.sysmeta_headers.get(
                sysmeta_header('object', 'content-type'))
        elif hct_header in resp.sysmeta_headers:
            # has-content-type is present but false, so no content type was
            # set on initial upload. In that case, we won't set one on our
            # PUT request. Swift will end up guessing one based on the
            # object name.
            content_type = None
        else:
            content_type = resp.headers.get('Content-Type')

        if content_type:
            headers['Content-Type'] = content_type

        container = req.container_name + MULTIUPLOAD_SUFFIX
        s3_etag_hasher = md5(usedforsecurity=False)
        manifest = []
        previous_number = 0
        try:
            xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE)
            if not xml:
                raise InvalidRequest(msg='You must specify at least one part')
            if 'content-md5' in req.headers:
                # If an MD5 was provided, we need to verify it.
                # Note that S3Request already took care of translating to ETag
                if req.headers['etag'] != md5(
                        xml, usedforsecurity=False).hexdigest():
                    raise BadDigest(content_md5=req.headers['content-md5'])
                # We're only interested in the body here, in the
                # multipart-upload controller -- *don't* let it get
                # plumbed down to the object-server
                del req.headers['etag']

            complete_elem = fromstring(xml, 'CompleteMultipartUpload',
                                       self.logger)
            for part_elem in complete_elem.iterchildren('Part'):
                part_number = int(part_elem.find('./PartNumber').text)

                if part_number <= previous_number:
                    raise InvalidPartOrder(upload_id=upload_id)
                previous_number = part_number

                etag = normalize_etag(part_elem.find('./ETag').text)
                if len(etag) != 32 or any(c not in '0123456789abcdef'
                                          for c in etag):
                    raise InvalidPart(upload_id=upload_id,
                                      part_number=part_number)

                manifest.append({
                    'path':
                    '/%s/%s/%s/%d' %
                    (wsgi_to_str(container), wsgi_to_str(
                        req.object_name), upload_id, part_number),
                    'etag':
                    etag
                })
                s3_etag_hasher.update(binascii.a2b_hex(etag))
        except (XMLSyntaxError, DocumentInvalid):
            # NB: our schema definitions catch uploads with no parts here
            raise MalformedXML()
        except ErrorResponse:
            raise
        except Exception as e:
            self.logger.error(e)
            raise

        s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest))
        s3_etag_header = sysmeta_header('object', 'etag')
        if resp.sysmeta_headers.get(s3_etag_header) == s3_etag:
            # This header should only already be present if the upload marker
            # has been cleaned up and the current target uses the same
            # upload-id; assuming the segments to use haven't changed, the work
            # is already done
            return HTTPOk(body=_make_complete_body(req, s3_etag, False),
                          content_type='application/xml')
        headers[s3_etag_header] = s3_etag
        # Leave base header value blank; SLO will populate
        c_etag = '; s3_etag=%s' % s3_etag
        headers[get_container_update_override_key('etag')] = c_etag

        too_small_message = ('s3api requires that each segment be at least '
                             '%d bytes' % self.conf.min_segment_size)

        def size_checker(manifest):
            # Check the size of each segment except the last and make sure
            # they are all more than the minimum upload chunk size.
            # Note that we need to use the *internal* keys, since we're
            # looking at the manifest that's about to be written.
            return [(item['name'], too_small_message) for item in manifest[:-1]
                    if item and item['bytes'] < self.conf.min_segment_size]

        req.environ['swift.callback.slo_manifest_hook'] = size_checker
        start_time = time.time()

        def response_iter():
            # NB: XML requires that the XML declaration, if present, be at the
            # very start of the document. Clients *will* call us out on not
            # being valid XML if we pass through whitespace before it.
            # Track whether we've sent anything yet so we can yield out that
            # declaration *first*
            yielded_anything = False

            try:
                try:
                    # TODO: add support for versioning
                    put_resp = req.get_response(self.app,
                                                'PUT',
                                                body=json.dumps(manifest),
                                                query={
                                                    'multipart-manifest':
                                                    'put',
                                                    'heartbeat': 'on'
                                                },
                                                headers=headers)
                    if put_resp.status_int == 202:
                        body = []
                        put_resp.fix_conditional_response()
                        for chunk in put_resp.response_iter:
                            if not chunk.strip():
                                if time.time() - start_time < 10:
                                    # Include some grace period to keep
                                    # ceph-s3tests happy
                                    continue
                                if not yielded_anything:
                                    yield (b'<?xml version="1.0" '
                                           b'encoding="UTF-8"?>\n')
                                yielded_anything = True
                                yield chunk
                                continue
                            body.append(chunk)
                        body = json.loads(b''.join(body))
                        if body['Response Status'] != '201 Created':
                            for seg, err in body['Errors']:
                                if err == too_small_message:
                                    raise EntityTooSmall()
                                elif err in ('Etag Mismatch', '404 Not Found'):
                                    raise InvalidPart(upload_id=upload_id)
                            raise InvalidRequest(
                                status=body['Response Status'],
                                msg='\n'.join(': '.join(err)
                                              for err in body['Errors']))
                except BadSwiftRequest as e:
                    msg = str(e)
                    if too_small_message in msg:
                        raise EntityTooSmall(msg)
                    elif ', Etag Mismatch' in msg:
                        raise InvalidPart(upload_id=upload_id)
                    elif ', 404 Not Found' in msg:
                        raise InvalidPart(upload_id=upload_id)
                    else:
                        raise

                # clean up the multipart-upload record
                obj = '%s/%s' % (req.object_name, upload_id)
                try:
                    req.get_response(self.app, 'DELETE', container, obj)
                except NoSuchKey:
                    # The important thing is that we wrote out a tombstone to
                    # make sure the marker got cleaned up. If it's already
                    # gone (e.g., because of concurrent completes or a retried
                    # complete), so much the better.
                    pass

                yield _make_complete_body(req, s3_etag, yielded_anything)
            except ErrorResponse as err_resp:
                if yielded_anything:
                    err_resp.xml_declaration = False
                    yield b'\n'
                else:
                    # Oh good, we can still change HTTP status code, too!
                    resp.status = err_resp.status
                for chunk in err_resp({}, lambda *a: None):
                    yield chunk

        resp = HTTPOk()  # assume we're good for now... but see above!
        resp.app_iter = reiterate(response_iter())
        resp.content_type = "application/xml"

        return resp
コード例 #10
0
ファイル: sync.py プロジェクト: irispastal/swift
    def container_sync_row(self, row, sync_to, user_key, broker, info, realm,
                           realm_key):
        """
        Sends the update the row indicates to the sync_to container.
        Update can be either delete or put.

        :param row: The updated row in the local database triggering the sync
                    update.
        :param sync_to: The URL to the remote container.
        :param user_key: The X-Container-Sync-Key to use when sending requests
                         to the other container.
        :param broker: The local container database broker.
        :param info: The get_info result from the local container database
                     broker.
        :param realm: The realm from self.realms_conf, if there is one.
            If None, fallback to using the older allowed_sync_hosts
            way of syncing.
        :param realm_key: The realm key from self.realms_conf, if there
            is one. If None, fallback to using the older
            allowed_sync_hosts way of syncing.
        :returns: True on success
        """
        try:
            start_time = time()
            # extract last modified time from the created_at value
            ts_data, ts_ctype, ts_meta = decode_timestamps(row['created_at'])
            if row['deleted']:
                # when sync'ing a deleted object, use ts_data - this is the
                # timestamp of the source tombstone
                try:
                    headers = {'x-timestamp': ts_data.internal}
                    self._update_sync_to_headers(row['name'], sync_to,
                                                 user_key, realm, realm_key,
                                                 'DELETE', headers)
                    delete_object(sync_to,
                                  name=row['name'],
                                  headers=headers,
                                  proxy=self.select_http_proxy(),
                                  logger=self.logger,
                                  timeout=self.conn_timeout)
                except ClientException as err:
                    if err.http_status != HTTP_NOT_FOUND:
                        raise
                self.container_deletes += 1
                self.container_stats['deletes'] += 1
                self.logger.increment('deletes')
                self.logger.timing_since('deletes.timing', start_time)
            else:
                # when sync'ing a live object, use ts_meta - this is the time
                # at which the source object was last modified by a PUT or POST
                if self._object_in_remote_container(row['name'], sync_to,
                                                    user_key, realm, realm_key,
                                                    ts_meta):
                    return True
                exc = None
                # look up for the newest one; the symlink=get query-string has
                # no effect unless symlinks are enabled in the internal client
                # in which case it ensures that symlink objects retain their
                # symlink property when sync'd.
                headers_out = {
                    'X-Newest':
                    True,
                    'X-Backend-Storage-Policy-Index':
                    str(info['storage_policy_index'])
                }
                try:
                    source_obj_status, headers, body = \
                        self.swift.get_object(info['account'],
                                              info['container'], row['name'],
                                              headers=headers_out,
                                              acceptable_statuses=(2, 4),
                                              params={'symlink': 'get'})

                except (Exception, UnexpectedResponse, Timeout) as err:
                    headers = {}
                    body = None
                    exc = err
                timestamp = Timestamp(headers.get('x-timestamp', 0))
                if timestamp < ts_meta:
                    if exc:
                        raise exc
                    raise Exception(
                        _('Unknown exception trying to GET: '
                          '%(account)r %(container)r %(object)r'), {
                              'account': info['account'],
                              'container': info['container'],
                              'object': row['name']
                          })
                for key in ('date', 'last-modified'):
                    if key in headers:
                        del headers[key]
                if 'etag' in headers:
                    headers['etag'] = normalize_etag(headers['etag'])
                if 'content-type' in headers:
                    headers['content-type'] = clean_content_type(
                        headers['content-type'])
                self._update_sync_to_headers(row['name'], sync_to, user_key,
                                             realm, realm_key, 'PUT', headers)
                put_object(sync_to,
                           name=row['name'],
                           headers=headers,
                           contents=FileLikeIter(body),
                           proxy=self.select_http_proxy(),
                           logger=self.logger,
                           timeout=self.conn_timeout)
                self.container_puts += 1
                self.container_stats['puts'] += 1
                self.container_stats['bytes'] += row['size']
                self.logger.increment('puts')
                self.logger.timing_since('puts.timing', start_time)
        except ClientException as err:
            if err.http_status == HTTP_UNAUTHORIZED:
                self.logger.info(
                    _('Unauth %(sync_from)r => %(sync_to)r'), {
                        'sync_from':
                        '%s/%s' %
                        (quote(info['account']), quote(info['container'])),
                        'sync_to':
                        sync_to
                    })
            elif err.http_status == HTTP_NOT_FOUND:
                self.logger.info(
                    _('Not found %(sync_from)r => %(sync_to)r \
                      - object %(obj_name)r'), {
                        'sync_from':
                        '%s/%s' %
                        (quote(info['account']), quote(info['container'])),
                        'sync_to':
                        sync_to,
                        'obj_name':
                        row['name']
                    })
            else:
                self.logger.exception(_('ERROR Syncing %(db_file)s %(row)s'), {
                    'db_file': str(broker),
                    'row': row
                })
            self.container_failures += 1
            self.logger.increment('failures')
            return False
        except (Exception, Timeout) as err:
            self.logger.exception(_('ERROR Syncing %(db_file)s %(row)s'), {
                'db_file': str(broker),
                'row': row
            })
            self.container_failures += 1
            self.logger.increment('failures')
            return False
        return True
コード例 #11
0
ファイル: dlo.py プロジェクト: this-pama/swift-1
    def get_or_head_response(self, req, x_object_manifest):
        '''
        :param req: user's request
        :param x_object_manifest: as unquoted, native string
        '''
        response_headers = self._response_headers

        container, obj_prefix = x_object_manifest.split('/', 1)

        version, account, _junk = req.split_path(2, 3, True)
        version = wsgi_to_str(version)
        account = wsgi_to_str(account)
        error_response, segments = self._get_container_listing(
            req, version, account, container, obj_prefix)
        if error_response:
            return error_response
        have_complete_listing = len(segments) < \
            constraints.CONTAINER_LISTING_LIMIT

        first_byte = last_byte = None
        actual_content_length = None
        content_length_for_swob_range = None
        if req.range and len(req.range.ranges) == 1:
            content_length_for_swob_range = sum(o['bytes'] for o in segments)

            # This is a hack to handle suffix byte ranges (e.g. "bytes=-5"),
            # which we can't honor unless we have a complete listing.
            _junk, range_end = req.range.ranges_for_length(float("inf"))[0]

            # If this is all the segments, we know whether or not this
            # range request is satisfiable.
            #
            # Alternately, we may not have all the segments, but this range
            # falls entirely within the first page's segments, so we know
            # that it is satisfiable.
            if (have_complete_listing
               or range_end < content_length_for_swob_range):
                byteranges = req.range.ranges_for_length(
                    content_length_for_swob_range)
                if not byteranges:
                    headers = {'Accept-Ranges': 'bytes'}
                    if have_complete_listing:
                        headers['Content-Range'] = 'bytes */%d' % (
                            content_length_for_swob_range, )
                    return HTTPRequestedRangeNotSatisfiable(
                        request=req, headers=headers)
                first_byte, last_byte = byteranges[0]
                # For some reason, swob.Range.ranges_for_length adds 1 to the
                # last byte's position.
                last_byte -= 1
                actual_content_length = last_byte - first_byte + 1
            else:
                # The range may or may not be satisfiable, but we can't tell
                # based on just one page of listing, and we're not going to go
                # get more pages because that would use up too many resources,
                # so we ignore the Range header and return the whole object.
                actual_content_length = None
                content_length_for_swob_range = None
                req.range = None
        else:
            req.range = None

        response_headers = [
            (h, v) for h, v in response_headers
            if h.lower() not in ("content-length", "content-range")]

        if content_length_for_swob_range is not None:
            # Here, we have to give swob a big-enough content length so that
            # it can compute the actual content length based on the Range
            # header. This value will not be visible to the client; swob will
            # substitute its own Content-Length.
            #
            # Note: if the manifest points to at least CONTAINER_LISTING_LIMIT
            # segments, this may be less than the sum of all the segments'
            # sizes. However, it'll still be greater than the last byte in the
            # Range header, so it's good enough for swob.
            response_headers.append(('Content-Length',
                                     str(content_length_for_swob_range)))
        elif have_complete_listing:
            actual_content_length = sum(o['bytes'] for o in segments)
            response_headers.append(('Content-Length',
                                     str(actual_content_length)))

        if have_complete_listing:
            response_headers = [(h, v) for h, v in response_headers
                                if h.lower() != "etag"]
            etag = md5()
            for seg_dict in segments:
                etag.update(normalize_etag(seg_dict['hash']).encode('utf8'))
            response_headers.append(('Etag', '"%s"' % etag.hexdigest()))

        app_iter = None
        if req.method == 'GET':
            listing_iter = RateLimitedIterator(
                self._segment_listing_iterator(
                    req, version, account, container, obj_prefix, segments,
                    first_byte=first_byte, last_byte=last_byte),
                self.dlo.rate_limit_segments_per_sec,
                limit_after=self.dlo.rate_limit_after_segment)

            app_iter = SegmentedIterable(
                req, self.dlo.app, listing_iter, ua_suffix="DLO MultipartGET",
                swift_source="DLO", name=req.path, logger=self.logger,
                max_get_time=self.dlo.max_get_time,
                response_body_length=actual_content_length)

            try:
                app_iter.validate_first_segment()
            except HTTPException as err_resp:
                return err_resp
            except (SegmentError, ListingIterError):
                return HTTPConflict(request=req)

        resp = Response(request=req, headers=response_headers,
                        conditional_response=True,
                        app_iter=app_iter)

        return resp
コード例 #12
0
    def POST(self, req):
        """
        Handles Complete Multipart Upload.
        """
        upload_id = req.params['uploadId']
        resp = _get_upload_info(req, self.app, upload_id)
        headers = {'Accept': 'application/json'}
        for key, val in resp.headers.items():
            _key = key.lower()
            if _key.startswith('x-amz-meta-'):
                headers['x-object-meta-' + _key[11:]] = val

        hct_header = sysmeta_header('object', 'has-content-type')
        if resp.sysmeta_headers.get(hct_header) == 'yes':
            content_type = resp.sysmeta_headers.get(
                sysmeta_header('object', 'content-type'))
        elif hct_header in resp.sysmeta_headers:
            # has-content-type is present but false, so no content type was
            # set on initial upload. In that case, we won't set one on our
            # PUT request. Swift will end up guessing one based on the
            # object name.
            content_type = None
        else:
            content_type = resp.headers.get('Content-Type')

        if content_type:
            headers['Content-Type'] = content_type

        container = req.container_name + MULTIUPLOAD_SUFFIX
        s3_etag_hasher = md5()
        manifest = []
        previous_number = 0
        try:
            xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE)
            if not xml:
                raise InvalidRequest(msg='You must specify at least one part')
            if 'content-md5' in req.headers:
                # If an MD5 was provided, we need to verify it.
                # Note that S3Request already took care of translating to ETag
                if req.headers['etag'] != md5(xml).hexdigest():
                    raise BadDigest(content_md5=req.headers['content-md5'])
                # We're only interested in the body here, in the
                # multipart-upload controller -- *don't* let it get
                # plumbed down to the object-server
                del req.headers['etag']

            complete_elem = fromstring(xml, 'CompleteMultipartUpload',
                                       self.logger)
            for part_elem in complete_elem.iterchildren('Part'):
                part_number = int(part_elem.find('./PartNumber').text)

                if part_number <= previous_number:
                    raise InvalidPartOrder(upload_id=upload_id)
                previous_number = part_number

                etag = normalize_etag(part_elem.find('./ETag').text)
                if len(etag) != 32 or any(c not in '0123456789abcdef'
                                          for c in etag):
                    raise InvalidPart(upload_id=upload_id,
                                      part_number=part_number)

                manifest.append({
                    'path':
                    '/%s/%s/%s/%d' %
                    (container, req.object_name, upload_id, part_number),
                    'etag':
                    etag
                })
                s3_etag_hasher.update(binascii.a2b_hex(etag))
        except (XMLSyntaxError, DocumentInvalid):
            # NB: our schema definitions catch uploads with no parts here
            raise MalformedXML()
        except ErrorResponse:
            raise
        except Exception as e:
            self.logger.error(e)
            raise

        s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest))
        headers[sysmeta_header('object', 'etag')] = s3_etag
        # Leave base header value blank; SLO will populate
        c_etag = '; s3_etag=%s' % s3_etag
        headers[get_container_update_override_key('etag')] = c_etag

        too_small_message = ('s3api requires that each segment be at least '
                             '%d bytes' % self.conf.min_segment_size)

        def size_checker(manifest):
            # Check the size of each segment except the last and make sure
            # they are all more than the minimum upload chunk size.
            # Note that we need to use the *internal* keys, since we're
            # looking at the manifest that's about to be written.
            return [(item['name'], too_small_message) for item in manifest[:-1]
                    if item and item['bytes'] < self.conf.min_segment_size]

        req.environ['swift.callback.slo_manifest_hook'] = size_checker
        start_time = time.time()

        def response_iter():
            # NB: XML requires that the XML declaration, if present, be at the
            # very start of the document. Clients *will* call us out on not
            # being valid XML if we pass through whitespace before it.
            # Track whether we've sent anything yet so we can yield out that
            # declaration *first*
            yielded_anything = False

            try:
                try:
                    # TODO: add support for versioning
                    put_resp = req.get_response(self.app,
                                                'PUT',
                                                body=json.dumps(manifest),
                                                query={
                                                    'multipart-manifest':
                                                    'put',
                                                    'heartbeat': 'on'
                                                },
                                                headers=headers)
                    if put_resp.status_int == 202:
                        body = []
                        put_resp.fix_conditional_response()
                        for chunk in put_resp.response_iter:
                            if not chunk.strip():
                                if time.time() - start_time < 10:
                                    # Include some grace period to keep
                                    # ceph-s3tests happy
                                    continue
                                if not yielded_anything:
                                    yield (b'<?xml version="1.0" '
                                           b'encoding="UTF-8"?>\n')
                                yielded_anything = True
                                yield chunk
                                continue
                            body.append(chunk)
                        body = json.loads(b''.join(body))
                        if body['Response Status'] != '201 Created':
                            for seg, err in body['Errors']:
                                if err == too_small_message:
                                    raise EntityTooSmall()
                                elif err in ('Etag Mismatch', '404 Not Found'):
                                    raise InvalidPart(upload_id=upload_id)
                            raise InvalidRequest(
                                status=body['Response Status'],
                                msg='\n'.join(': '.join(err)
                                              for err in body['Errors']))
                except BadSwiftRequest as e:
                    msg = str(e)
                    if too_small_message in msg:
                        raise EntityTooSmall(msg)
                    elif ', Etag Mismatch' in msg:
                        raise InvalidPart(upload_id=upload_id)
                    elif ', 404 Not Found' in msg:
                        raise InvalidPart(upload_id=upload_id)
                    else:
                        raise

                # clean up the multipart-upload record
                obj = '%s/%s' % (req.object_name, upload_id)
                try:
                    req.get_response(self.app, 'DELETE', container, obj)
                except NoSuchKey:
                    # We know that this existed long enough for us to HEAD
                    pass

                result_elem = Element('CompleteMultipartUploadResult')

                # NOTE: boto with sig v4 appends port to HTTP_HOST value at
                # the request header when the port is non default value and it
                # makes req.host_url like as http://localhost:8080:8080/path
                # that obviously invalid. Probably it should be resolved at
                # swift.common.swob though, tentatively we are parsing and
                # reconstructing the correct host_url info here.
                # in detail, https://github.com/boto/boto/pull/3513
                parsed_url = urlparse(req.host_url)
                host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname)
                # Why are we doing our own port parsing? Because py3 decided
                # to start raising ValueErrors on access after parsing such
                # an invalid port
                netloc = parsed_url.netloc.split('@')[-1].split(']')[-1]
                if ':' in netloc:
                    port = netloc.split(':', 2)[1]
                    host_url += ':%s' % port

                SubElement(result_elem, 'Location').text = host_url + req.path
                SubElement(result_elem, 'Bucket').text = req.container_name
                SubElement(result_elem, 'Key').text = req.object_name
                SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag
                resp.headers.pop('ETag', None)
                if yielded_anything:
                    yield b'\n'
                yield tostring(result_elem,
                               xml_declaration=not yielded_anything)
            except ErrorResponse as err_resp:
                if yielded_anything:
                    err_resp.xml_declaration = False
                    yield b'\n'
                else:
                    # Oh good, we can still change HTTP status code, too!
                    resp.status = err_resp.status
                for chunk in err_resp({}, lambda *a: None):
                    yield chunk

        resp = HTTPOk()  # assume we're good for now... but see above!
        resp.app_iter = reiterate(response_iter())
        resp.content_type = "application/xml"

        return resp
コード例 #13
0
 def _assert_object_metadata_matches_listing(self, listing, metadata):
     self.assertEqual(listing['bytes'], int(metadata['content-length']))
     self.assertEqual(listing['hash'], normalize_etag(metadata['etag']))
     self.assertEqual(listing['content_type'], metadata['content-type'])
     modified = Timestamp(metadata['x-timestamp']).isoformat
     self.assertEqual(listing['last_modified'], modified)
コード例 #14
0
    def GET(self, req):
        """
        Handle GET Bucket (List Objects) request
        """

        max_keys = req.get_validated_param('max-keys',
                                           self.conf.max_bucket_listing)
        # TODO: Separate max_bucket_listing and default_bucket_listing
        tag_max_keys = max_keys
        max_keys = min(max_keys, self.conf.max_bucket_listing)

        encoding_type = req.params.get('encoding-type')
        if encoding_type is not None and encoding_type != 'url':
            err_msg = 'Invalid Encoding Method specified in Request'
            raise InvalidArgument('encoding-type', encoding_type, err_msg)

        query = {
            'format': 'json',
            'limit': max_keys + 1,
        }
        if 'prefix' in req.params:
            query.update({'prefix': req.params['prefix']})
        if 'delimiter' in req.params:
            query.update({'delimiter': req.params['delimiter']})
        fetch_owner = False
        if 'versions' in req.params:
            listing_type = 'object-versions'
            if 'key-marker' in req.params:
                query.update({'marker': req.params['key-marker']})
            elif 'version-id-marker' in req.params:
                err_msg = ('A version-id marker cannot be specified without '
                           'a key marker.')
                raise InvalidArgument('version-id-marker',
                                      req.params['version-id-marker'], err_msg)
        elif int(req.params.get('list-type', '1')) == 2:
            listing_type = 'version-2'
            if 'start-after' in req.params:
                query.update({'marker': req.params['start-after']})
            # continuation-token overrides start-after
            if 'continuation-token' in req.params:
                decoded = b64decode(req.params['continuation-token'])
                if not six.PY2:
                    decoded = decoded.decode('utf8')
                query.update({'marker': decoded})
            if 'fetch-owner' in req.params:
                fetch_owner = config_true_value(req.params['fetch-owner'])
        else:
            listing_type = 'version-1'
            if 'marker' in req.params:
                query.update({'marker': req.params['marker']})

        resp = req.get_response(self.app, query=query)

        objects = json.loads(resp.body)

        # in order to judge that truncated is valid, check whether
        # max_keys + 1 th element exists in swift.
        is_truncated = max_keys > 0 and len(objects) > max_keys
        objects = objects[:max_keys]

        if listing_type == 'object-versions':
            elem = Element('ListVersionsResult')
            SubElement(elem, 'Name').text = req.container_name
            SubElement(elem, 'Prefix').text = req.params.get('prefix')
            SubElement(elem, 'KeyMarker').text = req.params.get('key-marker')
            SubElement(
                elem,
                'VersionIdMarker').text = req.params.get('version-id-marker')
            if is_truncated:
                if 'name' in objects[-1]:
                    SubElement(elem, 'NextKeyMarker').text = \
                        objects[-1]['name']
                if 'subdir' in objects[-1]:
                    SubElement(elem, 'NextKeyMarker').text = \
                        objects[-1]['subdir']
                SubElement(elem, 'NextVersionIdMarker').text = 'null'
        else:
            elem = Element('ListBucketResult')
            SubElement(elem, 'Name').text = req.container_name
            SubElement(elem, 'Prefix').text = req.params.get('prefix')
            if listing_type == 'version-1':
                SubElement(elem, 'Marker').text = req.params.get('marker')
                if is_truncated and 'delimiter' in req.params:
                    if 'name' in objects[-1]:
                        name = objects[-1]['name']
                    else:
                        name = objects[-1]['subdir']
                    if encoding_type == 'url':
                        name = quote(name.encode('utf-8'))
                    SubElement(elem, 'NextMarker').text = name
            elif listing_type == 'version-2':
                if is_truncated:
                    if 'name' in objects[-1]:
                        SubElement(elem, 'NextContinuationToken').text = \
                            b64encode(objects[-1]['name'].encode('utf-8'))
                    if 'subdir' in objects[-1]:
                        SubElement(elem, 'NextContinuationToken').text = \
                            b64encode(objects[-1]['subdir'].encode('utf-8'))
                if 'continuation-token' in req.params:
                    SubElement(elem, 'ContinuationToken').text = \
                        req.params['continuation-token']
                if 'start-after' in req.params:
                    SubElement(elem, 'StartAfter').text = \
                        req.params['start-after']
                SubElement(elem, 'KeyCount').text = str(len(objects))

        SubElement(elem, 'MaxKeys').text = str(tag_max_keys)

        if 'delimiter' in req.params:
            SubElement(elem, 'Delimiter').text = req.params['delimiter']

        if encoding_type == 'url':
            SubElement(elem, 'EncodingType').text = encoding_type

        SubElement(elem, 'IsTruncated').text = \
            'true' if is_truncated else 'false'

        for o in objects:
            if 'subdir' not in o:
                name = o['name']
                if encoding_type == 'url':
                    name = quote(name.encode('utf-8'))

                if listing_type == 'object-versions':
                    contents = SubElement(elem, 'Version')
                    SubElement(contents, 'Key').text = name
                    SubElement(contents, 'VersionId').text = 'null'
                    SubElement(contents, 'IsLatest').text = 'true'
                else:
                    contents = SubElement(elem, 'Contents')
                    SubElement(contents, 'Key').text = name
                SubElement(contents, 'LastModified').text = \
                    o['last_modified'][:-3] + 'Z'
                if 's3_etag' in o:
                    # New-enough MUs are already in the right format
                    etag = o['s3_etag']
                elif 'slo_etag' in o:
                    # SLOs may be in something *close* to the MU format
                    etag = '"%s-N"' % swob.normalize_etag(o['slo_etag'])
                else:
                    etag = o['hash']
                    if len(etag) < 2 or etag[::len(etag) - 1] != '""':
                        # Normal objects just use the MD5
                        etag = '"%s"' % o['hash']
                        # This also catches sufficiently-old SLOs, but we have
                        # no way to identify those from container listings
                    # Otherwise, somebody somewhere (proxyfs, maybe?) made this
                    # look like an RFC-compliant ETag; we don't need to
                    # quote-wrap.
                SubElement(contents, 'ETag').text = etag
                SubElement(contents, 'Size').text = str(o['bytes'])
                if fetch_owner or listing_type != 'version-2':
                    owner = SubElement(contents, 'Owner')
                    SubElement(owner, 'ID').text = req.user_id
                    SubElement(owner, 'DisplayName').text = req.user_id
                SubElement(contents, 'StorageClass').text = 'STANDARD'

        for o in objects:
            if 'subdir' in o:
                common_prefixes = SubElement(elem, 'CommonPrefixes')
                name = o['subdir']
                if encoding_type == 'url':
                    name = quote(name.encode('utf-8'))
                SubElement(common_prefixes, 'Prefix').text = name

        body = tostring(elem)

        return HTTPOk(body=body, content_type='application/xml')