Example #1
0
    def get_controller(self, req):
        """
        Get the controller to handle a request.

        :param req: the request
        :returns: tuple of (controller class, path dictionary)

        :raises: ValueError (thrown by split_path) if given invalid path
        """
        if req.path == '/info':
            d = dict(version=None,
                     expose_info=self.expose_info,
                     disallowed_sections=self.disallowed_sections,
                     admin_key=self.admin_key)
            return InfoController, d

        version, account, container, obj = split_path(req.path, 1, 4, True)
        d = dict(version=version,
                 account_name=account,
                 container_name=container,
                 object_name=obj)
        if account and not valid_api_version(version):
            raise APIVersionError('Invalid path')
        if obj and container and account:
            info = get_container_info(req.environ, self)
            policy_index = req.headers.get('X-Backend-Storage-Policy-Index',
                                           info['storage_policy'])
            policy = POLICIES.get_by_index(policy_index)
            if not policy:
                # This indicates that a new policy has been created,
                # with rings, deployed, released (i.e. deprecated =
                # False), used by a client to create a container via
                # another proxy that was restarted after the policy
                # was released, and is now cached - all before this
                # worker was HUPed to stop accepting new
                # connections.  There should never be an "unknown"
                # index - but when there is - it's probably operator
                # error and hopefully temporary.
                raise HTTPServiceUnavailable('Unknown Storage Policy')
            return self.obj_controller_router[policy], d
        elif container and account:
            return ContainerController, d
        elif account and not container and not obj:
            return AccountController, d
        return None, d
    def handle_obj_versions_delete(self, req, object_versions, account_name,
                                   container_name, object_name):
        lcontainer = object_versions.split('/')[0]
        prefix_len = '%03x' % len(object_name)
        lprefix = prefix_len + object_name + '/'

        item_iter = self._listing_iter(account_name, lcontainer, lprefix, req)

        authed = False
        for previous_version in item_iter:
            if not authed:
                # we're about to start making COPY requests - need to
                # validate the write access to the versioned container
                if 'swift.authorize' in req.environ:
                    container_info = get_container_info(req.environ, self.app)
                    req.acl = container_info.get('write_acl')
                    aresp = req.environ['swift.authorize'](req)
                    if aresp:
                        return aresp
                    authed = True

            # there are older versions so copy the previous version to the
            # current object and delete the previous version
            prev_obj_name = previous_version['name'].encode('utf-8')

            copy_path = '/v1/' + account_name + '/' + \
                        lcontainer + '/' + prev_obj_name

            copy_headers = {
                'X-Newest': 'True',
                'Destination': container_name + '/' + object_name,
                'x-auth-token': req.headers.get('x-auth-token')
            }

            copy_req = make_pre_authed_request(req.environ,
                                               path=copy_path,
                                               headers=copy_headers,
                                               method='COPY',
                                               swift_source='VW')
            copy_resp = copy_req.get_response(self.app)

            # if the version isn't there, keep trying with previous version
            if copy_resp.status_int == HTTP_NOT_FOUND:
                continue

            if not is_success(copy_resp.status_int):
                if is_client_error(copy_resp.status_int):
                    # some user error, maybe permissions
                    return HTTPPreconditionFailed(request=req)
                else:
                    # could not copy the data, bail
                    return HTTPServiceUnavailable(request=req)

            # reset these because the COPY changed them
            new_del_req = make_pre_authed_request(req.environ,
                                                  path=copy_path,
                                                  method='DELETE',
                                                  swift_source='VW')
            req = new_del_req

            # remove 'X-If-Delete-At', since it is not for the older copy
            if 'X-If-Delete-At' in req.headers:
                del req.headers['X-If-Delete-At']
            break

        # handle DELETE request here in case it was modified
        return req.get_response(self.app)
    def handle_obj_versions_put(self, req, object_versions, object_name,
                                policy_index):
        ret = None

        # do a HEAD request to check object versions
        _headers = {
            'X-Newest': 'True',
            'X-Backend-Storage-Policy-Index': policy_index,
            'x-auth-token': req.headers.get('x-auth-token')
        }

        # make a pre_auth request in case the user has write access
        # to container, but not READ. This was allowed in previous version
        # (i.e., before middleware) so keeping the same behavior here
        head_req = make_pre_authed_request(req.environ,
                                           path=req.path_info,
                                           headers=_headers,
                                           method='HEAD',
                                           swift_source='VW')
        hresp = head_req.get_response(self.app)

        is_dlo_manifest = 'X-Object-Manifest' in req.headers or \
                          'X-Object-Manifest' in hresp.headers

        # if there's an existing object, then copy it to
        # X-Versions-Location
        if is_success(hresp.status_int) and not is_dlo_manifest:
            lcontainer = object_versions.split('/')[0]
            prefix_len = '%03x' % len(object_name)
            lprefix = prefix_len + object_name + '/'
            ts_source = hresp.environ.get('swift_x_timestamp')
            if ts_source is None:
                ts_source = time.mktime(
                    time.strptime(hresp.headers['last-modified'],
                                  '%a, %d %b %Y %H:%M:%S GMT'))
            new_ts = Timestamp(ts_source).internal
            vers_obj_name = lprefix + new_ts
            copy_headers = {
                'Destination': '%s/%s' % (lcontainer, vers_obj_name),
                'x-auth-token': req.headers.get('x-auth-token')
            }

            # COPY implementation sets X-Newest to True when it internally
            # does a GET on source object. So, we don't have to explicity
            # set it in request headers here.
            copy_req = make_pre_authed_request(req.environ,
                                               path=req.path_info,
                                               headers=copy_headers,
                                               method='COPY',
                                               swift_source='VW')
            copy_resp = copy_req.get_response(self.app)

            if is_success(copy_resp.status_int):
                # success versioning previous existing object
                # return None and handle original request
                ret = None
            else:
                if is_client_error(copy_resp.status_int):
                    # missing container or bad permissions
                    ret = HTTPPreconditionFailed(request=req)
                else:
                    # could not copy the data, bail
                    ret = HTTPServiceUnavailable(request=req)

        else:
            if hresp.status_int == HTTP_NOT_FOUND or is_dlo_manifest:
                # nothing to version
                # return None and handle original request
                ret = None
            else:
                # if not HTTP_NOT_FOUND, return error immediately
                ret = hresp

        return ret
Example #4
0
    def DELETE(self, req):
        """HTTP DELETE request handler."""
        container_info = self.container_info(
            self.account_name, self.container_name, req)
        # pass the policy index to storage nodes via req header
        policy_index = req.headers.get('X-Backend-Storage-Policy-Index',
                                       container_info['storage_policy'])
        obj_ring = self.app.get_object_ring(policy_index)
        # pass the policy index to storage nodes via req header
        req.headers['X-Backend-Storage-Policy-Index'] = policy_index
        container_partition = container_info['partition']
        containers = container_info['nodes']
        req.acl = container_info['write_acl']
        req.environ['swift_sync_key'] = container_info['sync_key']
        object_versions = container_info['versions']
        if object_versions:
            # this is a version manifest and needs to be handled differently
            object_versions = unquote(object_versions)
            lcontainer = object_versions.split('/')[0]
            prefix_len = '%03x' % len(self.object_name)
            lprefix = prefix_len + self.object_name + '/'
            last_item = None
            try:
                for last_item in self._listing_iter(lcontainer, lprefix,
                                                    req.environ):
                    pass
            except ListingIterNotFound:
                # no worries, last_item is None
                pass
            except ListingIterNotAuthorized as err:
                return err.aresp
            except ListingIterError:
                return HTTPServerError(request=req)
            if last_item:
                # there are older versions so copy the previous version to the
                # current object and delete the previous version
                orig_container = self.container_name
                orig_obj = self.object_name
                self.container_name = lcontainer
                self.object_name = last_item['name'].encode('utf-8')
                copy_path = '/v1/' + self.account_name + '/' + \
                            self.container_name + '/' + self.object_name
                copy_headers = {'X-Newest': 'True',
                                'Destination': orig_container + '/' + orig_obj
                                }
                copy_environ = {'REQUEST_METHOD': 'COPY',
                                'swift_versioned_copy': True
                                }
                creq = Request.blank(copy_path, headers=copy_headers,
                                     environ=copy_environ)
                copy_resp = self.COPY(creq)
                if is_client_error(copy_resp.status_int):
                    # some user error, maybe permissions
                    return HTTPPreconditionFailed(request=req)
                elif not is_success(copy_resp.status_int):
                    # could not copy the data, bail
                    return HTTPServiceUnavailable(request=req)
                # reset these because the COPY changed them
                self.container_name = lcontainer
                self.object_name = last_item['name'].encode('utf-8')
                new_del_req = Request.blank(copy_path, environ=req.environ)
                container_info = self.container_info(
                    self.account_name, self.container_name, req)
                policy_idx = container_info['storage_policy']
                obj_ring = self.app.get_object_ring(policy_idx)
                # pass the policy index to storage nodes via req header
                new_del_req.headers['X-Backend-Storage-Policy-Index'] = \
                    policy_idx
                container_partition = container_info['partition']
                containers = container_info['nodes']
                new_del_req.acl = container_info['write_acl']
                new_del_req.path_info = copy_path
                req = new_del_req
                # remove 'X-If-Delete-At', since it is not for the older copy
                if 'X-If-Delete-At' in req.headers:
                    del req.headers['X-If-Delete-At']
        if 'swift.authorize' in req.environ:
            aresp = req.environ['swift.authorize'](req)
            if aresp:
                return aresp
        if not containers:
            return HTTPNotFound(request=req)
        partition, nodes = obj_ring.get_nodes(
            self.account_name, self.container_name, self.object_name)
        # Used by container sync feature
        if 'x-timestamp' in req.headers:
            try:
                req_timestamp = Timestamp(req.headers['X-Timestamp'])
            except ValueError:
                return HTTPBadRequest(
                    request=req, content_type='text/plain',
                    body='X-Timestamp should be a UNIX timestamp float value; '
                         'was %r' % req.headers['x-timestamp'])
            req.headers['X-Timestamp'] = req_timestamp.internal
        else:
            req.headers['X-Timestamp'] = Timestamp(time.time()).internal

        headers = self._backend_requests(
            req, len(nodes), container_partition, containers)
        resp = self.make_requests(req, obj_ring,
                                  partition, 'DELETE', req.swift_entity_path,
                                  headers)
        return resp
Example #5
0
    def PUT(self, req):
        """HTTP PUT request handler."""
        if req.if_none_match is not None and '*' not in req.if_none_match:
            # Sending an etag with if-none-match isn't currently supported
            return HTTPBadRequest(request=req, content_type='text/plain',
                                  body='If-None-Match only supports *')
        container_info = self.container_info(
            self.account_name, self.container_name, req)
        policy_index = req.headers.get('X-Backend-Storage-Policy-Index',
                                       container_info['storage_policy'])
        obj_ring = self.app.get_object_ring(policy_index)
        # pass the policy index to storage nodes via req header
        req.headers['X-Backend-Storage-Policy-Index'] = policy_index
        container_partition = container_info['partition']
        containers = container_info['nodes']
        req.acl = container_info['write_acl']
        req.environ['swift_sync_key'] = container_info['sync_key']
        object_versions = container_info['versions']
        if 'swift.authorize' in req.environ:
            aresp = req.environ['swift.authorize'](req)
            if aresp:
                return aresp
        if not containers:
            return HTTPNotFound(request=req)
        try:
            ml = req.message_length()
        except ValueError as e:
            return HTTPBadRequest(request=req, content_type='text/plain',
                                  body=str(e))
        except AttributeError as e:
            return HTTPNotImplemented(request=req, content_type='text/plain',
                                      body=str(e))
        if ml is not None and ml > constraints.MAX_FILE_SIZE:
            return HTTPRequestEntityTooLarge(request=req)
        if 'x-delete-after' in req.headers:
            try:
                x_delete_after = int(req.headers['x-delete-after'])
            except ValueError:
                return HTTPBadRequest(request=req,
                                      content_type='text/plain',
                                      body='Non-integer X-Delete-After')
            req.headers['x-delete-at'] = normalize_delete_at_timestamp(
                time.time() + x_delete_after)
        partition, nodes = obj_ring.get_nodes(
            self.account_name, self.container_name, self.object_name)
        # do a HEAD request for container sync and checking object versions
        if 'x-timestamp' in req.headers or \
                (object_versions and not
                 req.environ.get('swift_versioned_copy')):
            # make sure proxy-server uses the right policy index
            _headers = {'X-Backend-Storage-Policy-Index': policy_index,
                        'X-Newest': 'True'}
            hreq = Request.blank(req.path_info, headers=_headers,
                                 environ={'REQUEST_METHOD': 'HEAD'})
            hresp = self.GETorHEAD_base(
                hreq, _('Object'), obj_ring, partition,
                hreq.swift_entity_path)
        # Used by container sync feature
        if 'x-timestamp' in req.headers:
            try:
                req_timestamp = Timestamp(req.headers['X-Timestamp'])
                if hresp.environ and 'swift_x_timestamp' in hresp.environ and \
                        hresp.environ['swift_x_timestamp'] >= req_timestamp:
                    return HTTPAccepted(request=req)
            except ValueError:
                return HTTPBadRequest(
                    request=req, content_type='text/plain',
                    body='X-Timestamp should be a UNIX timestamp float value; '
                         'was %r' % req.headers['x-timestamp'])
            req.headers['X-Timestamp'] = req_timestamp.internal
        else:
            req.headers['X-Timestamp'] = Timestamp(time.time()).internal
        # Sometimes the 'content-type' header exists, but is set to None.
        content_type_manually_set = True
        detect_content_type = \
            config_true_value(req.headers.get('x-detect-content-type'))
        if detect_content_type or not req.headers.get('content-type'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            req.headers['Content-Type'] = guessed_type or \
                'application/octet-stream'
            if detect_content_type:
                req.headers.pop('x-detect-content-type')
            else:
                content_type_manually_set = False

        error_response = check_object_creation(req, self.object_name) or \
            check_content_type(req)
        if error_response:
            return error_response
        if object_versions and not req.environ.get('swift_versioned_copy'):
            if hresp.status_int != HTTP_NOT_FOUND:
                # This is a version manifest and needs to be handled
                # differently. First copy the existing data to a new object,
                # then write the data from this request to the version manifest
                # object.
                lcontainer = object_versions.split('/')[0]
                prefix_len = '%03x' % len(self.object_name)
                lprefix = prefix_len + self.object_name + '/'
                ts_source = hresp.environ.get('swift_x_timestamp')
                if ts_source is None:
                    ts_source = time.mktime(time.strptime(
                                            hresp.headers['last-modified'],
                                            '%a, %d %b %Y %H:%M:%S GMT'))
                new_ts = Timestamp(ts_source).internal
                vers_obj_name = lprefix + new_ts
                copy_headers = {
                    'Destination': '%s/%s' % (lcontainer, vers_obj_name)}
                copy_environ = {'REQUEST_METHOD': 'COPY',
                                'swift_versioned_copy': True
                                }
                copy_req = Request.blank(req.path_info, headers=copy_headers,
                                         environ=copy_environ)
                copy_resp = self.COPY(copy_req)
                if is_client_error(copy_resp.status_int):
                    # missing container or bad permissions
                    return HTTPPreconditionFailed(request=req)
                elif not is_success(copy_resp.status_int):
                    # could not copy the data, bail
                    return HTTPServiceUnavailable(request=req)

        reader = req.environ['wsgi.input'].read
        data_source = iter(lambda: reader(self.app.client_chunk_size), '')
        source_header = req.headers.get('X-Copy-From')
        source_resp = None
        if source_header:
            if req.environ.get('swift.orig_req_method', req.method) != 'POST':
                req.environ.setdefault('swift.log_info', []).append(
                    'x-copy-from:%s' % source_header)
            src_container_name, src_obj_name = check_copy_from_header(req)
            ver, acct, _rest = req.split_path(2, 3, True)
            if isinstance(acct, unicode):
                acct = acct.encode('utf-8')
            source_header = '/%s/%s/%s/%s' % (ver, acct,
                                              src_container_name, src_obj_name)
            source_req = req.copy_get()
            # make sure the source request uses it's container_info
            source_req.headers.pop('X-Backend-Storage-Policy-Index', None)
            source_req.path_info = source_header
            source_req.headers['X-Newest'] = 'true'
            orig_obj_name = self.object_name
            orig_container_name = self.container_name
            self.object_name = src_obj_name
            self.container_name = src_container_name
            sink_req = Request.blank(req.path_info,
                                     environ=req.environ, headers=req.headers)
            source_resp = self.GET(source_req)
            # This gives middlewares a way to change the source; for example,
            # this lets you COPY a SLO manifest and have the new object be the
            # concatenation of the segments (like what a GET request gives
            # the client), not a copy of the manifest file.
            hook = req.environ.get(
                'swift.copy_hook',
                (lambda source_req, source_resp, sink_req: source_resp))
            source_resp = hook(source_req, source_resp, sink_req)

            if source_resp.status_int >= HTTP_MULTIPLE_CHOICES:
                return source_resp
            self.object_name = orig_obj_name
            self.container_name = orig_container_name
            data_source = iter(source_resp.app_iter)
            sink_req.content_length = source_resp.content_length
            if sink_req.content_length is None:
                # This indicates a transfer-encoding: chunked source object,
                # which currently only happens because there are more than
                # CONTAINER_LISTING_LIMIT segments in a segmented object. In
                # this case, we're going to refuse to do the server-side copy.
                return HTTPRequestEntityTooLarge(request=req)
            if sink_req.content_length > constraints.MAX_FILE_SIZE:
                return HTTPRequestEntityTooLarge(request=req)
            sink_req.etag = source_resp.etag
            # we no longer need the X-Copy-From header
            del sink_req.headers['X-Copy-From']
            if not content_type_manually_set:
                sink_req.headers['Content-Type'] = \
                    source_resp.headers['Content-Type']
            if not config_true_value(
                    sink_req.headers.get('x-fresh-metadata', 'false')):
                copy_headers_into(source_resp, sink_req)
                copy_headers_into(req, sink_req)
            # copy over x-static-large-object for POSTs and manifest copies
            if 'X-Static-Large-Object' in source_resp.headers and \
                    req.params.get('multipart-manifest') == 'get':
                sink_req.headers['X-Static-Large-Object'] = \
                    source_resp.headers['X-Static-Large-Object']

            req = sink_req

        if 'x-delete-at' in req.headers:
            try:
                x_delete_at = normalize_delete_at_timestamp(
                    int(req.headers['x-delete-at']))
                if int(x_delete_at) < time.time():
                    return HTTPBadRequest(
                        body='X-Delete-At in past', request=req,
                        content_type='text/plain')
            except ValueError:
                return HTTPBadRequest(request=req, content_type='text/plain',
                                      body='Non-integer X-Delete-At')
            req.environ.setdefault('swift.log_info', []).append(
                'x-delete-at:%s' % x_delete_at)
            delete_at_container = normalize_delete_at_timestamp(
                int(x_delete_at) /
                self.app.expiring_objects_container_divisor *
                self.app.expiring_objects_container_divisor)
            delete_at_part, delete_at_nodes = \
                self.app.container_ring.get_nodes(
                    self.app.expiring_objects_account, delete_at_container)
        else:
            delete_at_container = delete_at_part = delete_at_nodes = None

        node_iter = GreenthreadSafeIterator(
            self.iter_nodes_local_first(obj_ring, partition))
        pile = GreenPile(len(nodes))
        te = req.headers.get('transfer-encoding', '')
        chunked = ('chunked' in te)

        outgoing_headers = self._backend_requests(
            req, len(nodes), container_partition, containers,
            delete_at_container, delete_at_part, delete_at_nodes)

        for nheaders in outgoing_headers:
            # RFC2616:8.2.3 disallows 100-continue without a body
            if (req.content_length > 0) or chunked:
                nheaders['Expect'] = '100-continue'
            pile.spawn(self._connect_put_node, node_iter, partition,
                       req.swift_entity_path, nheaders,
                       self.app.logger.thread_locals)

        conns = [conn for conn in pile if conn]
        min_conns = quorum_size(len(nodes))

        if req.if_none_match is not None and '*' in req.if_none_match:
            statuses = [conn.resp.status for conn in conns if conn.resp]
            if HTTP_PRECONDITION_FAILED in statuses:
                # If we find any copy of the file, it shouldn't be uploaded
                self.app.logger.debug(
                    _('Object PUT returning 412, %(statuses)r'),
                    {'statuses': statuses})
                return HTTPPreconditionFailed(request=req)

        if len(conns) < min_conns:
            self.app.logger.error(
                _('Object PUT returning 503, %(conns)s/%(nodes)s '
                  'required connections'),
                {'conns': len(conns), 'nodes': min_conns})
            return HTTPServiceUnavailable(request=req)
        bytes_transferred = 0
        try:
            with ContextPool(len(nodes)) as pool:
                for conn in conns:
                    conn.failed = False
                    conn.queue = Queue(self.app.put_queue_depth)
                    pool.spawn(self._send_file, conn, req.path)
                while True:
                    with ChunkReadTimeout(self.app.client_timeout):
                        try:
                            chunk = next(data_source)
                        except StopIteration:
                            if chunked:
                                for conn in conns:
                                    conn.queue.put('0\r\n\r\n')
                            break
                    bytes_transferred += len(chunk)
                    if bytes_transferred > constraints.MAX_FILE_SIZE:
                        return HTTPRequestEntityTooLarge(request=req)
                    for conn in list(conns):
                        if not conn.failed:
                            conn.queue.put(
                                '%x\r\n%s\r\n' % (len(chunk), chunk)
                                if chunked else chunk)
                        else:
                            conns.remove(conn)
                    if len(conns) < min_conns:
                        self.app.logger.error(_(
                            'Object PUT exceptions during'
                            ' send, %(conns)s/%(nodes)s required connections'),
                            {'conns': len(conns), 'nodes': min_conns})
                        return HTTPServiceUnavailable(request=req)
                for conn in conns:
                    if conn.queue.unfinished_tasks:
                        conn.queue.join()
            conns = [conn for conn in conns if not conn.failed]
        except ChunkReadTimeout as err:
            self.app.logger.warn(
                _('ERROR Client read timeout (%ss)'), err.seconds)
            self.app.logger.increment('client_timeouts')
            return HTTPRequestTimeout(request=req)
        except (Exception, Timeout):
            self.app.logger.exception(
                _('ERROR Exception causing client disconnect'))
            return HTTPClientDisconnect(request=req)
        if req.content_length and bytes_transferred < req.content_length:
            req.client_disconnect = True
            self.app.logger.warn(
                _('Client disconnected without sending enough data'))
            self.app.logger.increment('client_disconnects')
            return HTTPClientDisconnect(request=req)

        statuses, reasons, bodies, etags = self._get_put_responses(req, conns,
                                                                   nodes)

        if len(etags) > 1:
            self.app.logger.error(
                _('Object servers returned %s mismatched etags'), len(etags))
            return HTTPServerError(request=req)
        etag = etags.pop() if len(etags) else None
        resp = self.best_response(req, statuses, reasons, bodies,
                                  _('Object PUT'), etag=etag)
        if source_header:
            resp.headers['X-Copied-From'] = quote(
                source_header.split('/', 3)[3])
            if 'last-modified' in source_resp.headers:
                resp.headers['X-Copied-From-Last-Modified'] = \
                    source_resp.headers['last-modified']
            copy_headers_into(req, resp)
        resp.last_modified = math.ceil(
            float(Timestamp(req.headers['X-Timestamp'])))
        return resp
Example #6
0
    def PUT(self, req):
        """HTTP PUT request handler."""
        if req.if_none_match is not None and '*' not in req.if_none_match:
            # Sending an etag with if-none-match isn't currently supported
            return HTTPBadRequest(request=req,
                                  content_type='text/plain',
                                  body='If-None-Match only supports *')
        container_info = self.container_info(self.account_name,
                                             self.container_name, req)
        policy_index = req.headers.get('X-Backend-Storage-Policy-Index',
                                       container_info['storage_policy'])
        obj_ring = self.app.get_object_ring(policy_index)

        # pass the policy index to storage nodes via req header
        req.headers['X-Backend-Storage-Policy-Index'] = policy_index
        container_partition = container_info['partition']
        containers = container_info['nodes']
        req.acl = container_info['write_acl']
        req.environ['swift_sync_key'] = container_info['sync_key']
        object_versions = container_info['versions']
        if 'swift.authorize' in req.environ:
            aresp = req.environ['swift.authorize'](req)
            if aresp:
                return aresp

        if not containers:
            return HTTPNotFound(request=req)

        # Sometimes the 'content-type' header exists, but is set to None.
        content_type_manually_set = True
        detect_content_type = \
            config_true_value(req.headers.get('x-detect-content-type'))
        if detect_content_type or not req.headers.get('content-type'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            req.headers['Content-Type'] = guessed_type or \
                'application/octet-stream'
            if detect_content_type:
                req.headers.pop('x-detect-content-type')
            else:
                content_type_manually_set = False

        error_response = check_object_creation(req, self.object_name) or \
            check_content_type(req)
        if error_response:
            return error_response

        partition, nodes = obj_ring.get_nodes(self.account_name,
                                              self.container_name,
                                              self.object_name)
        ####################################  CHANGED_CODE  ############################################################
        # Change the nodes list to contain only one dictionary item instead of the original 3 returned by the ring.
        d = dict()
        # d[partition] = nodes[1:]
        # f.write(str(d)+"\n")
        # f.close()
        print("===Original Nodes===")
        print(nodes)
        temp_nodes = []
        flag = 0
        f = open("/home/hduser/swift/swift/proxy/controllers/spindowndevices",
                 "r")
        sdlist = f.read().split("\n")
        print("===Spun down devices===:", sdlist)
        f.close()

        upnodes = [item for item in nodes if item['device'] not in sdlist]
        downnodes = [item for item in nodes if item['device'] in sdlist]
        temp_nodes = upnodes
        if (len(downnodes) > 0):
            d = ast.literal_eval(
                open("/home/hduser/swift/swift/proxy/controllers/nodes.txt",
                     "r").read())
            # d_temp=pickle.load("/home/hduser/swift/proxy/controllers/nodes.p","rb")
            # print("===Current dict===:",d)
            for item in downnodes:
                if (partition in d):
                    d[partition].append(item)
                    # print("===Modified dict===:",d)
                else:
                    d[partition] = [item]
                    # print("===Modified dict===:",d)
        # pickle.dump(d,open("/home/hduser/nodes.p","wb"))
        # print("Before writing:",d)
        fo = open("/home/hduser/swift/swift/proxy/controllers/nodes.txt", "w")
        fo.write(str(d) + "\n")
        fo.close()
        # pickle.dump(d,open("/home/hduser/swift/swift/proxy/controllers/nodes.p","wb"))
        ## Old method, IGNORE
        # for item in nodes:
        #     device = item['device']
        #     if(device not in sdlist):
        #     # if(os.path.ismount("path"))
        #         temp_nodes.append(item)
        #         flag = 1
        #         break
        #     else:
        #         pickle.dump(d,open("/home/hduser/nodes.p","wb"))
        #         # d = pickle.load(open("/home/hduser/nodes.p","rb"))
        #         import ast
        #         d = ast.literal_eval(open("/home/hduser/nodes.txt","r").read())
        #         print("===Current dict===:",d)
        #         if(partition in d):
        #             print("In IF")
        #             d[partition].append(item)
        #             print("===Modified dict===:",d)
        #         else:
        #             print("In ELSE")
        #             d[partition] = [item]
        #             print("===Modified dict===:",d)
        #         pickle.dump(d,open("/home/hduser/nodes.p","wb"))
        #         fo = open("/home/hduser/nodes.txt","w")
        #         fo.write(str(d)+"\n")

        # Code to spin up a device if none are running already.
        if (len(upnodes) == 0):
            dev = nodes[0]['device']
            print("===ALL NODES DOWN===")
            print("===Mounting device===", dev)
            os.system("mount /dev/" + str(dev))

        print('===In controller PUT===:')
        print("===Partition===", partition)
        nodes = temp_nodes

        print('===In controller PUT===:')
        print("===Partition===", partition)
        nodes = temp_nodes
        print("===Nodes===:", nodes)

        check_ssd()
        ############################################  CHANGED_CODE  ########################################################

        # do a HEAD request for checking object versions
        if object_versions and not req.environ.get('swift_versioned_copy'):
            # make sure proxy-server uses the right policy index
            _headers = {
                'X-Backend-Storage-Policy-Index': policy_index,
                'X-Newest': 'True'
            }
            hreq = Request.blank(req.path_info,
                                 headers=_headers,
                                 environ={'REQUEST_METHOD': 'HEAD'})
            hresp = self.GETorHEAD_base(hreq, _('Object'), obj_ring, partition,
                                        hreq.swift_entity_path)

        # Used by container sync feature
        if 'x-timestamp' in req.headers:
            try:
                req_timestamp = Timestamp(req.headers['X-Timestamp'])
            except ValueError:
                return HTTPBadRequest(
                    request=req,
                    content_type='text/plain',
                    body='X-Timestamp should be a UNIX timestamp float value; '
                    'was %r' % req.headers['x-timestamp'])
            req.headers['X-Timestamp'] = req_timestamp.internal
        else:
            req.headers['X-Timestamp'] = Timestamp(time.time()).internal

        if object_versions and not req.environ.get('swift_versioned_copy'):
            is_manifest = 'X-Object-Manifest' in req.headers or \
                          'X-Object-Manifest' in hresp.headers
            if hresp.status_int != HTTP_NOT_FOUND and not is_manifest:
                # This is a version manifest and needs to be handled
                # differently. First copy the existing data to a new object,
                # then write the data from this request to the version manifest
                # object.
                lcontainer = object_versions.split('/')[0]
                prefix_len = '%03x' % len(self.object_name)
                lprefix = prefix_len + self.object_name + '/'
                ts_source = hresp.environ.get('swift_x_timestamp')
                if ts_source is None:
                    ts_source = time.mktime(
                        time.strptime(hresp.headers['last-modified'],
                                      '%a, %d %b %Y %H:%M:%S GMT'))
                new_ts = Timestamp(ts_source).internal
                vers_obj_name = lprefix + new_ts
                copy_headers = {
                    'Destination': '%s/%s' % (lcontainer, vers_obj_name)
                }
                copy_environ = {
                    'REQUEST_METHOD': 'COPY',
                    'swift_versioned_copy': True
                }
                copy_req = Request.blank(req.path_info,
                                         headers=copy_headers,
                                         environ=copy_environ)
                copy_resp = self.COPY(copy_req)
                if is_client_error(copy_resp.status_int):
                    # missing container or bad permissions
                    return HTTPPreconditionFailed(request=req)
                elif not is_success(copy_resp.status_int):
                    # could not copy the data, bail
                    return HTTPServiceUnavailable(request=req)

        reader = req.environ['wsgi.input'].read
        data_source = iter(lambda: reader(self.app.client_chunk_size), '')
        source_header = req.headers.get('X-Copy-From')
        source_resp = None
        if source_header:
            if req.environ.get('swift.orig_req_method', req.method) != 'POST':
                req.environ.setdefault('swift.log_info', []).append(
                    'x-copy-from:%s' % source_header)
            ver, acct, _rest = req.split_path(2, 3, True)
            src_account_name = req.headers.get('X-Copy-From-Account', None)
            if src_account_name:
                src_account_name = check_account_format(req, src_account_name)
            else:
                src_account_name = acct
            src_container_name, src_obj_name = check_copy_from_header(req)
            source_header = '/%s/%s/%s/%s' % (ver, src_account_name,
                                              src_container_name, src_obj_name)
            source_req = req.copy_get()

            # make sure the source request uses it's container_info
            source_req.headers.pop('X-Backend-Storage-Policy-Index', None)
            source_req.path_info = source_header
            source_req.headers['X-Newest'] = 'true'
            orig_obj_name = self.object_name
            orig_container_name = self.container_name
            orig_account_name = self.account_name
            self.object_name = src_obj_name
            self.container_name = src_container_name
            self.account_name = src_account_name
            sink_req = Request.blank(req.path_info,
                                     environ=req.environ,
                                     headers=req.headers)
            source_resp = self.GET(source_req)

            # This gives middlewares a way to change the source; for example,
            # this lets you COPY a SLO manifest and have the new object be the
            # concatenation of the segments (like what a GET request gives
            # the client), not a copy of the manifest file.
            hook = req.environ.get(
                'swift.copy_hook',
                (lambda source_req, source_resp, sink_req: source_resp))
            source_resp = hook(source_req, source_resp, sink_req)

            if source_resp.status_int >= HTTP_MULTIPLE_CHOICES:
                return source_resp
            self.object_name = orig_obj_name
            self.container_name = orig_container_name
            self.account_name = orig_account_name
            data_source = iter(source_resp.app_iter)
            sink_req.content_length = source_resp.content_length
            if sink_req.content_length is None:
                # This indicates a transfer-encoding: chunked source object,
                # which currently only happens because there are more than
                # CONTAINER_LISTING_LIMIT segments in a segmented object. In
                # this case, we're going to refuse to do the server-side copy.
                return HTTPRequestEntityTooLarge(request=req)
            if sink_req.content_length > constraints.MAX_FILE_SIZE:
                return HTTPRequestEntityTooLarge(request=req)
            sink_req.etag = source_resp.etag

            # we no longer need the X-Copy-From header
            del sink_req.headers['X-Copy-From']
            if 'X-Copy-From-Account' in sink_req.headers:
                del sink_req.headers['X-Copy-From-Account']
            if not content_type_manually_set:
                sink_req.headers['Content-Type'] = \
                    source_resp.headers['Content-Type']
            if config_true_value(
                    sink_req.headers.get('x-fresh-metadata', 'false')):
                # post-as-copy: ignore new sysmeta, copy existing sysmeta
                condition = lambda k: is_sys_meta('object', k)
                remove_items(sink_req.headers, condition)
                copy_header_subset(source_resp, sink_req, condition)
            else:
                # copy/update existing sysmeta and user meta
                copy_headers_into(source_resp, sink_req)
                copy_headers_into(req, sink_req)

            # copy over x-static-large-object for POSTs and manifest copies
            if 'X-Static-Large-Object' in source_resp.headers and \
                    req.params.get('multipart-manifest') == 'get':
                sink_req.headers['X-Static-Large-Object'] = \
                    source_resp.headers['X-Static-Large-Object']

            req = sink_req

        req, delete_at_container, delete_at_part, \
            delete_at_nodes = self._config_obj_expiration(req)

        node_iter = GreenthreadSafeIterator(
            self.iter_nodes_local_first(obj_ring, partition))
        pile = GreenPile(len(nodes))
        te = req.headers.get('transfer-encoding', '')
        chunked = ('chunked' in te)

        outgoing_headers = self._backend_requests(
            req, len(nodes), container_partition, containers,
            delete_at_container, delete_at_part, delete_at_nodes)

        for nheaders in outgoing_headers:
            # RFC2616:8.2.3 disallows 100-continue without a body
            if (req.content_length > 0) or chunked:
                nheaders['Expect'] = '100-continue'

#################################  CHANGED_CODE  ###################################################################
# Replaced node_iter by nodes in the following line to make sure that a new list with different order isnt used.
# Change from node_iter to nodes to make sure it writes to the same device.
# Without this, it gets a new list of nodes from the ring in a different order and connects to the first one.

            pile.spawn(self._connect_put_node, nodes, partition,
                       req.swift_entity_path, nheaders,
                       self.app.logger.thread_locals)

#################################  CHANGED_CODE ###################################################################

        conns = [conn for conn in pile if conn]
        min_conns = quorum_size(len(nodes))

        if req.if_none_match is not None and '*' in req.if_none_match:
            statuses = [conn.resp.status for conn in conns if conn.resp]
            if HTTP_PRECONDITION_FAILED in statuses:
                # If we find any copy of the file, it shouldn't be uploaded
                self.app.logger.debug(
                    _('Object PUT returning 412, %(statuses)r'),
                    {'statuses': statuses})
                return HTTPPreconditionFailed(request=req)

        if any(conn for conn in conns
               if conn.resp and conn.resp.status == HTTP_CONFLICT):
            timestamps = [
                HeaderKeyDict(
                    conn.resp.getheaders()).get('X-Backend-Timestamp')
                for conn in conns if conn.resp
            ]
            self.app.logger.debug(
                _('Object PUT returning 202 for 409: '
                  '%(req_timestamp)s <= %(timestamps)r'), {
                      'req_timestamp': req.timestamp.internal,
                      'timestamps': ', '.join(timestamps)
                  })
            return HTTPAccepted(request=req)

        if len(conns) < min_conns:
            self.app.logger.error(
                _('Object PUT returning 503, %(conns)s/%(nodes)s '
                  'required connections'), {
                      'conns': len(conns),
                      'nodes': min_conns
                  })
            return HTTPServiceUnavailable(request=req)
        bytes_transferred = 0
        try:
            with ContextPool(len(nodes)) as pool:
                for conn in conns:
                    conn.failed = False
                    conn.queue = Queue(self.app.put_queue_depth)
                    pool.spawn(self._send_file, conn, req.path)
                while True:
                    with ChunkReadTimeout(self.app.client_timeout):
                        try:
                            chunk = next(data_source)
                        except StopIteration:
                            if chunked:
                                for conn in conns:
                                    conn.queue.put('0\r\n\r\n')
                            break
                    bytes_transferred += len(chunk)
                    if bytes_transferred > constraints.MAX_FILE_SIZE:
                        return HTTPRequestEntityTooLarge(request=req)
                    for conn in list(conns):
                        if not conn.failed:
                            conn.queue.put('%x\r\n%s\r\n' %
                                           (len(chunk),
                                            chunk) if chunked else chunk)
                        else:
                            conns.remove(conn)
                    if len(conns) < min_conns:
                        self.app.logger.error(
                            _('Object PUT exceptions during'
                              ' send, %(conns)s/%(nodes)s required connections'
                              ), {
                                  'conns': len(conns),
                                  'nodes': min_conns
                              })
                        return HTTPServiceUnavailable(request=req)
                for conn in conns:
                    if conn.queue.unfinished_tasks:
                        conn.queue.join()
            conns = [conn for conn in conns if not conn.failed]
        except ChunkReadTimeout as err:
            self.app.logger.warn(_('ERROR Client read timeout (%ss)'),
                                 err.seconds)
            self.app.logger.increment('client_timeouts')
            return HTTPRequestTimeout(request=req)
        except (Exception, Timeout):
            self.app.logger.exception(
                _('ERROR Exception causing client disconnect'))
            return HTTPClientDisconnect(request=req)
        if req.content_length and bytes_transferred < req.content_length:
            req.client_disconnect = True
            self.app.logger.warn(
                _('Client disconnected without sending enough data'))
            self.app.logger.increment('client_disconnects')
            return HTTPClientDisconnect(request=req)

        statuses, reasons, bodies, etags = self._get_put_responses(
            req, conns, nodes)

        if len(etags) > 1:
            self.app.logger.error(
                _('Object servers returned %s mismatched etags'), len(etags))
            return HTTPServerError(request=req)
        etag = etags.pop() if len(etags) else None
        resp = self.best_response(req,
                                  statuses,
                                  reasons,
                                  bodies,
                                  _('Object PUT'),
                                  etag=etag)
        if source_header:
            acct, path = source_header.split('/', 3)[2:4]
            resp.headers['X-Copied-From-Account'] = quote(acct)
            resp.headers['X-Copied-From'] = quote(path)
            if 'last-modified' in source_resp.headers:
                resp.headers['X-Copied-From-Last-Modified'] = \
                    source_resp.headers['last-modified']
            copy_headers_into(req, resp)
        resp.last_modified = math.ceil(
            float(Timestamp(req.headers['X-Timestamp'])))
        return resp
Example #7
0
    def _get_from_shards(self, req, resp):
        # Construct listing using shards described by the response body.
        # The history of containers that have returned shard ranges is
        # maintained in the request environ so that loops can be avoided by
        # forcing an object listing if the same container is visited again.
        # This can happen in at least two scenarios:
        #   1. a container has filled a gap in its shard ranges with a
        #      shard range pointing to itself
        #   2. a root container returns a (stale) shard range pointing to a
        #      shard that has shrunk into the root, in which case the shrunken
        #      shard may return the root's shard range.
        shard_listing_history = req.environ.setdefault(
            'swift.shard_listing_history', [])
        shard_listing_history.append((self.account_name, self.container_name))
        shard_ranges = [ShardRange.from_dict(data)
                        for data in json.loads(resp.body)]
        self.app.logger.debug('GET listing from %s shards for: %s',
                              len(shard_ranges), req.path_qs)
        if not shard_ranges:
            # can't find ranges or there was a problem getting the ranges. So
            # return what we have.
            return resp

        objects = []
        req_limit = constrain_req_limit(req, CONTAINER_LISTING_LIMIT)
        params = req.params.copy()
        params.pop('states', None)
        req.headers.pop('X-Backend-Record-Type', None)
        reverse = config_true_value(params.get('reverse'))
        marker = wsgi_to_str(params.get('marker'))
        end_marker = wsgi_to_str(params.get('end_marker'))
        prefix = wsgi_to_str(params.get('prefix'))

        limit = req_limit
        all_resp_status = []
        for i, shard_range in enumerate(shard_ranges):
            params['limit'] = limit
            # Always set marker to ensure that object names less than or equal
            # to those already in the listing are not fetched; if the listing
            # is empty then the original request marker, if any, is used. This
            # allows misplaced objects below the expected shard range to be
            # included in the listing.
            if objects:
                last_name = objects[-1].get('name',
                                            objects[-1].get('subdir', u''))
                params['marker'] = bytes_to_wsgi(last_name.encode('utf-8'))
            elif marker:
                params['marker'] = str_to_wsgi(marker)
            else:
                params['marker'] = ''
            # Always set end_marker to ensure that misplaced objects beyond the
            # expected shard range are not fetched. This prevents a misplaced
            # object obscuring correctly placed objects in the next shard
            # range.
            if end_marker and end_marker in shard_range:
                params['end_marker'] = str_to_wsgi(end_marker)
            elif reverse:
                params['end_marker'] = str_to_wsgi(shard_range.lower_str)
            else:
                params['end_marker'] = str_to_wsgi(shard_range.end_marker)

            headers = {}
            if ((shard_range.account, shard_range.container) in
                    shard_listing_history):
                # directed back to same container - force GET of objects
                headers['X-Backend-Record-Type'] = 'object'
            if config_true_value(req.headers.get('x-newest', False)):
                headers['X-Newest'] = 'true'

            if prefix:
                if prefix > shard_range:
                    continue
                try:
                    just_past = prefix[:-1] + chr(ord(prefix[-1]) + 1)
                except ValueError:
                    pass
                else:
                    if just_past < shard_range:
                        continue

            self.app.logger.debug(
                'Getting listing part %d from shard %s %s with %s',
                i, shard_range, shard_range.name, headers)
            objs, shard_resp = self._get_container_listing(
                req, shard_range.account, shard_range.container,
                headers=headers, params=params)
            all_resp_status.append(shard_resp.status_int)

            sharding_state = shard_resp.headers.get('x-backend-sharding-state',
                                                    'unknown')

            if objs is None:
                # give up if any non-success response from shard containers
                self.app.logger.error(
                    'Aborting listing from shards due to bad response: %r'
                    % all_resp_status)
                return HTTPServiceUnavailable(request=req)

            self.app.logger.debug(
                'Found %d objects in shard (state=%s), total = %d',
                len(objs), sharding_state, len(objs) + len(objects))

            if not objs:
                # tolerate empty shard containers
                continue

            objects.extend(objs)
            limit -= len(objs)

            if limit <= 0:
                break
            last_name = objects[-1].get('name',
                                        objects[-1].get('subdir', u''))
            if six.PY2:
                last_name = last_name.encode('utf8')
            if end_marker and reverse and end_marker >= last_name:
                break
            if end_marker and not reverse and end_marker <= last_name:
                break

        resp.body = json.dumps(objects).encode('ascii')
        constrained = any(req.params.get(constraint) for constraint in (
            'marker', 'end_marker', 'path', 'prefix', 'delimiter'))
        if not constrained and len(objects) < req_limit:
            self.app.logger.debug('Setting object count to %s' % len(objects))
            # prefer the actual listing stats over the potentially outdated
            # root stats. This condition is only likely when a sharded
            # container is shrinking or in tests; typically a sharded container
            # will have more than CONTAINER_LISTING_LIMIT objects so any
            # unconstrained listing will be capped by the limit and total
            # object stats cannot therefore be inferred from the listing.
            resp.headers['X-Container-Object-Count'] = len(objects)
            resp.headers['X-Container-Bytes-Used'] = sum(
                [o['bytes'] for o in objects])
        return resp
    def _requests_to_bytes_iter(self):
        # Take the requests out of self._coalesce_requests, actually make
        # the requests, and generate the bytes from the responses.
        #
        # Yields 2-tuples (segment-name, byte-chunk). The segment name is
        # used for logging.
        for data_or_req, seg_etag, seg_size in self._coalesce_requests():
            if isinstance(data_or_req, bytes):  # ugly, awful overloading
                yield ('data segment', data_or_req)
                continue
            seg_req = data_or_req
            seg_resp = seg_req.get_response(self.app)
            if not is_success(seg_resp.status_int):
                # Error body should be short
                body = seg_resp.body
                if not six.PY2:
                    body = body.decode('utf8')
                msg = 'While processing manifest %s, got %d (%s) ' \
                    'while retrieving %s' % (
                        self.name, seg_resp.status_int,
                        body if len(body) <= 60 else body[:57] + '...',
                        seg_req.path)
                if is_server_error(seg_resp.status_int):
                    self.logger.error(msg)
                    raise HTTPServiceUnavailable(request=seg_req,
                                                 content_type='text/plain')
                raise SegmentError(msg)
            elif (
                (seg_etag and (seg_resp.etag != seg_etag)) or
                (seg_size and
                 (seg_resp.content_length != seg_size) and not seg_req.range)):
                # The content-length check is for security reasons. Seems
                # possible that an attacker could upload a >1mb object and
                # then replace it with a much smaller object with same
                # etag. Then create a big nested SLO that calls that
                # object many times which would hammer our obj servers. If
                # this is a range request, don't check content-length
                # because it won't match.
                close_if_possible(seg_resp.app_iter)
                raise SegmentError(
                    'Object segment no longer valid: '
                    '%(path)s etag: %(r_etag)s != %(s_etag)s or '
                    '%(r_size)s != %(s_size)s.' % {
                        'path': seg_req.path,
                        'r_etag': seg_resp.etag,
                        'r_size': seg_resp.content_length,
                        's_etag': seg_etag,
                        's_size': seg_size
                    })
            else:
                self.current_resp = seg_resp

            resp_len = 0
            seg_hash = None
            if seg_resp.etag and not seg_req.headers.get('Range'):
                # Only calculate the MD5 if it we can use it to validate
                seg_hash = md5(usedforsecurity=False)

            document_iters = maybe_multipart_byteranges_to_document_iters(
                seg_resp.app_iter, seg_resp.headers['Content-Type'])

            for chunk in itertools.chain.from_iterable(document_iters):
                if seg_hash:
                    seg_hash.update(chunk)
                    resp_len += len(chunk)
                yield (seg_req.path, chunk)
            close_if_possible(seg_resp.app_iter)

            if seg_hash:
                if resp_len != seg_resp.content_length:
                    raise SegmentError(
                        "Bad response length for %(seg)s as part of %(name)s: "
                        "headers had %(from_headers)s, but response length "
                        "was actually %(actual)s" % {
                            'seg': seg_req.path,
                            'from_headers': seg_resp.content_length,
                            'name': self.name,
                            'actual': resp_len
                        })
                if seg_hash.hexdigest() != seg_resp.etag:
                    raise SegmentError(
                        "Bad MD5 checksum for %(seg)s as part of %(name)s: "
                        "headers had %(etag)s, but object MD5 was actually "
                        "%(actual)s" % {
                            'seg': seg_req.path,
                            'etag': seg_resp.etag,
                            'name': self.name,
                            'actual': seg_hash.hexdigest()
                        })
Example #9
0
    def PUT(self, req):
        """HTTP PUT request handler."""
        container_info = self.container_info(
            self.account_name, self.container_name)
        container_partition = container_info['partition']
        containers = container_info['nodes']
        req.acl = container_info['write_acl']
        req.environ['swift_sync_key'] = container_info['sync_key']
        object_versions = container_info['versions']
        if 'swift.authorize' in req.environ:
            aresp = req.environ['swift.authorize'](req)
            if aresp:
                return aresp
        if not containers:
            return HTTPNotFound(request=req)
        if 'x-delete-after' in req.headers:
            try:
                x_delete_after = int(req.headers['x-delete-after'])
            except ValueError:
                    return HTTPBadRequest(request=req,
                                          content_type='text/plain',
                                          body='Non-integer X-Delete-After')
            req.headers['x-delete-at'] = '%d' % (time.time() + x_delete_after)
        partition, nodes = self.app.object_ring.get_nodes(
            self.account_name, self.container_name, self.object_name)
        # do a HEAD request for container sync and checking object versions
        if 'x-timestamp' in req.headers or \
                (object_versions and not
                 req.environ.get('swift_versioned_copy')):
            hreq = Request.blank(req.path_info, headers={'X-Newest': 'True'},
                                 environ={'REQUEST_METHOD': 'HEAD'})
            hresp = self.GETorHEAD_base(
                hreq, _('Object'), self.app.object_ring, partition,
                hreq.path_info)
        # Used by container sync feature
        if 'x-timestamp' in req.headers:
            try:
                req.headers['X-Timestamp'] = \
                    normalize_timestamp(float(req.headers['x-timestamp']))
                if hresp.environ and 'swift_x_timestamp' in hresp.environ and \
                    float(hresp.environ['swift_x_timestamp']) >= \
                        float(req.headers['x-timestamp']):
                    return HTTPAccepted(request=req)
            except ValueError:
                return HTTPBadRequest(
                    request=req, content_type='text/plain',
                    body='X-Timestamp should be a UNIX timestamp float value; '
                         'was %r' % req.headers['x-timestamp'])
        else:
            req.headers['X-Timestamp'] = normalize_timestamp(time.time())
        # Sometimes the 'content-type' header exists, but is set to None.
        content_type_manually_set = True
        if not req.headers.get('content-type'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            req.headers['Content-Type'] = guessed_type or \
                'application/octet-stream'
            content_type_manually_set = False
        error_response = check_object_creation(req, self.object_name) or \
            check_content_type(req)
        if error_response:
            return error_response
        if object_versions and not req.environ.get('swift_versioned_copy'):
            is_manifest = 'x-object-manifest' in req.headers or \
                          'x-object-manifest' in hresp.headers
            if hresp.status_int != HTTP_NOT_FOUND and not is_manifest:
                # This is a version manifest and needs to be handled
                # differently. First copy the existing data to a new object,
                # then write the data from this request to the version manifest
                # object.
                lcontainer = object_versions.split('/')[0]
                prefix_len = '%03x' % len(self.object_name)
                lprefix = prefix_len + self.object_name + '/'
                ts_source = hresp.environ.get('swift_x_timestamp')
                if ts_source is None:
                    ts_source = time.mktime(time.strptime(
                                            hresp.headers['last-modified'],
                                            '%a, %d %b %Y %H:%M:%S GMT'))
                new_ts = normalize_timestamp(ts_source)
                vers_obj_name = lprefix + new_ts
                copy_headers = {
                    'Destination': '%s/%s' % (lcontainer, vers_obj_name)}
                copy_environ = {'REQUEST_METHOD': 'COPY',
                                'swift_versioned_copy': True
                                }
                copy_req = Request.blank(req.path_info, headers=copy_headers,
                                         environ=copy_environ)
                copy_resp = self.COPY(copy_req)
                if is_client_error(copy_resp.status_int):
                    # missing container or bad permissions
                    return HTTPPreconditionFailed(request=req)
                elif not is_success(copy_resp.status_int):
                    # could not copy the data, bail
                    return HTTPServiceUnavailable(request=req)

        reader = req.environ['wsgi.input'].read
        data_source = iter(lambda: reader(self.app.client_chunk_size), '')
        source_header = req.headers.get('X-Copy-From')
        source_resp = None
        if source_header:
            source_header = unquote(source_header)
            acct = req.path_info.split('/', 2)[1]
            if isinstance(acct, unicode):
                acct = acct.encode('utf-8')
            if not source_header.startswith('/'):
                source_header = '/' + source_header
            source_header = '/' + acct + source_header
            try:
                src_container_name, src_obj_name = \
                    source_header.split('/', 3)[2:]
            except ValueError:
                return HTTPPreconditionFailed(
                    request=req,
                    body='X-Copy-From header must be of the form'
                         '<container name>/<object name>')
            source_req = req.copy_get()
            source_req.path_info = source_header
            source_req.headers['X-Newest'] = 'true'
            orig_obj_name = self.object_name
            orig_container_name = self.container_name
            self.object_name = src_obj_name
            self.container_name = src_container_name
            source_resp = self.GET(source_req)
            if source_resp.status_int >= HTTP_MULTIPLE_CHOICES:
                return source_resp
            self.object_name = orig_obj_name
            self.container_name = orig_container_name
            new_req = Request.blank(req.path_info,
                                    environ=req.environ, headers=req.headers)
            data_source = source_resp.app_iter
            new_req.content_length = source_resp.content_length
            if new_req.content_length is None:
                # This indicates a transfer-encoding: chunked source object,
                # which currently only happens because there are more than
                # CONTAINER_LISTING_LIMIT segments in a segmented object. In
                # this case, we're going to refuse to do the server-side copy.
                return HTTPRequestEntityTooLarge(request=req)
            if new_req.content_length > MAX_FILE_SIZE:
                return HTTPRequestEntityTooLarge(request=req)
            new_req.etag = source_resp.etag
            # we no longer need the X-Copy-From header
            del new_req.headers['X-Copy-From']
            if not content_type_manually_set:
                new_req.headers['Content-Type'] = \
                    source_resp.headers['Content-Type']
            if not config_true_value(
                    new_req.headers.get('x-fresh-metadata', 'false')):
                copy_headers_into(source_resp, new_req)
                copy_headers_into(req, new_req)
            # copy over x-static-large-object for POSTs and manifest copies
            if 'X-Static-Large-Object' in source_resp.headers and \
                    req.params.get('multipart-manifest') == 'get':
                new_req.headers['X-Static-Large-Object'] = \
                    source_resp.headers['X-Static-Large-Object']

            req = new_req

        if 'x-delete-at' in req.headers:
            try:
                x_delete_at = int(req.headers['x-delete-at'])
                if x_delete_at < time.time():
                    return HTTPBadRequest(
                        body='X-Delete-At in past', request=req,
                        content_type='text/plain')
            except ValueError:
                return HTTPBadRequest(request=req, content_type='text/plain',
                                      body='Non-integer X-Delete-At')
            delete_at_container = str(
                x_delete_at /
                self.app.expiring_objects_container_divisor *
                self.app.expiring_objects_container_divisor)
            delete_at_part, delete_at_nodes = \
                self.app.container_ring.get_nodes(
                    self.app.expiring_objects_account, delete_at_container)
        else:
            delete_at_part = delete_at_nodes = None

        node_iter = GreenthreadSafeIterator(
            self.iter_nodes(self.app.object_ring, partition))
        pile = GreenPile(len(nodes))
        chunked = req.headers.get('transfer-encoding')

        outgoing_headers = self._backend_requests(
            req, len(nodes), container_partition, containers,
            delete_at_part, delete_at_nodes)

        for nheaders in outgoing_headers:
            # RFC2616:8.2.3 disallows 100-continue without a body
            if (req.content_length > 0) or chunked:
                nheaders['Expect'] = '100-continue'
            pile.spawn(self._connect_put_node, node_iter, partition,
                       req.path_info, nheaders, self.app.logger.thread_locals)

        conns = [conn for conn in pile if conn]
        if len(conns) <= len(nodes) / 2:
            self.app.logger.error(
                _('Object PUT returning 503, %(conns)s/%(nodes)s '
                  'required connections'),
                {'conns': len(conns), 'nodes': len(nodes) // 2 + 1})
            return HTTPServiceUnavailable(request=req)
        bytes_transferred = 0
        try:
            with ContextPool(len(nodes)) as pool:
                for conn in conns:
                    conn.failed = False
                    conn.queue = Queue(self.app.put_queue_depth)
                    pool.spawn(self._send_file, conn, req.path)
                while True:
                    with ChunkReadTimeout(self.app.client_timeout):
                        try:
                            chunk = next(data_source)
                        except StopIteration:
                            if chunked:
                                [conn.queue.put('0\r\n\r\n') for conn in conns]
                            break
                    bytes_transferred += len(chunk)
                    if bytes_transferred > MAX_FILE_SIZE:
                        return HTTPRequestEntityTooLarge(request=req)
                    for conn in list(conns):
                        if not conn.failed:
                            conn.queue.put(
                                '%x\r\n%s\r\n' % (len(chunk), chunk)
                                if chunked else chunk)
                        else:
                            conns.remove(conn)
                    if len(conns) <= len(nodes) / 2:
                        self.app.logger.error(_(
                            'Object PUT exceptions during'
                            ' send, %(conns)s/%(nodes)s required connections'),
                            {'conns': len(conns), 'nodes': len(nodes) / 2 + 1})
                        return HTTPServiceUnavailable(request=req)
                for conn in conns:
                    if conn.queue.unfinished_tasks:
                        conn.queue.join()
            conns = [conn for conn in conns if not conn.failed]
        except ChunkReadTimeout, err:
            self.app.logger.warn(
                _('ERROR Client read timeout (%ss)'), err.seconds)
            self.app.logger.increment('client_timeouts')
            return HTTPRequestTimeout(request=req)
Example #10
0
    def GETorHEAD(self, req):
        """Handle HTTP GET or HEAD requests."""
        container_info = self.container_info(
            self.account_name, self.container_name, req)
        req.acl = container_info['read_acl']
        if 'swift.authorize' in req.environ:
            aresp = req.environ['swift.authorize'](req)
            if aresp:
                return aresp

        partition = self.app.object_ring.get_part(
            self.account_name, self.container_name, self.object_name)
        resp = self.GETorHEAD_base(
            req, _('Object'), self.app.object_ring, partition, req.path_info)

        if ';' in resp.headers.get('content-type', ''):
            # strip off swift_bytes from content-type
            content_type, check_extra_meta = \
                resp.headers['content-type'].rsplit(';', 1)
            if check_extra_meta.lstrip().startswith('swift_bytes='):
                resp.content_type = content_type

        large_object = None
        if config_true_value(resp.headers.get('x-static-large-object')) and \
                req.params.get('multipart-manifest') == 'get' and \
                'X-Copy-From' not in req.headers and \
                self.app.allow_static_large_object:
            resp.content_type = 'application/json'

        if config_true_value(resp.headers.get('x-static-large-object')) and \
                req.params.get('multipart-manifest') != 'get' and \
                self.app.allow_static_large_object:
            large_object = 'SLO'
            listing_page1 = ()
            listing = []
            lcontainer = None  # container name is included in listing
            if resp.status_int == HTTP_OK and \
                    req.method == 'GET' and not req.range:
                try:
                    listing = json.loads(resp.body)
                except ValueError:
                    listing = []
            else:
                # need to make a second request to get whole manifest
                new_req = req.copy_get()
                new_req.method = 'GET'
                new_req.range = None
                new_resp = self.GETorHEAD_base(
                    new_req, _('Object'), self.app.object_ring, partition,
                    req.path_info)
                if new_resp.status_int // 100 == 2:
                    try:
                        listing = json.loads(new_resp.body)
                    except ValueError:
                        listing = []
                else:
                    return HTTPServiceUnavailable(
                        "Unable to load SLO manifest", request=req)

        if 'x-object-manifest' in resp.headers and \
                req.params.get('multipart-manifest') != 'get':
            large_object = 'DLO'
            lcontainer, lprefix = \
                resp.headers['x-object-manifest'].split('/', 1)
            lcontainer = unquote(lcontainer)
            lprefix = unquote(lprefix)
            try:
                pages_iter = iter(self._listing_pages_iter(lcontainer, lprefix,
                                                           req.environ))
                listing_page1 = pages_iter.next()
                listing = itertools.chain(listing_page1,
                                          self._remaining_items(pages_iter))
            except ListingIterNotFound:
                return HTTPNotFound(request=req)
            except ListingIterNotAuthorized, err:
                return err.aresp
            except ListingIterError:
                return HTTPServerError(request=req)
Example #11
0
                     self.container_name + '/' + self.object_name
         copy_headers = {'X-Newest': 'True',
                         'Destination': orig_container + '/' + orig_obj
                         }
         copy_environ = {'REQUEST_METHOD': 'COPY',
                         'swift_versioned_copy': True
                         }
         creq = Request.blank(copy_path, headers=copy_headers,
                              environ=copy_environ)
         copy_resp = self.COPY(creq)
         if is_client_error(copy_resp.status_int):
             # some user error, maybe permissions
             return HTTPPreconditionFailed(request=req)
         elif not is_success(copy_resp.status_int):
             # could not copy the data, bail
             return HTTPServiceUnavailable(request=req)
         # reset these because the COPY changed them
         self.container_name = lcontainer
         self.object_name = last_item['name']
         new_del_req = Request.blank(copy_path, environ=req.environ)
         container_info = self.container_info(
             self.account_name, self.container_name, req)
         container_partition = container_info['partition']
         containers = container_info['nodes']
         new_del_req.acl = container_info['write_acl']
         new_del_req.path_info = copy_path
         req = new_del_req
         # remove 'X-If-Delete-At', since it is not for the older copy
         if 'X-If-Delete-At' in req.headers:
             del req.headers['X-If-Delete-At']
 if 'swift.authorize' in req.environ:
Example #12
0
    def _handle_object_versions(self, req):
        """
        This method handles versionining of objects in containers that
        have the feature enabled.

        When a new PUT request is sent, the proxy checks for previous versions
        of that same object name. If found, it is copied to a different
        container and the new version is stored in its place.

        This method was added as part of the PUT method refactoring and the
        functionality is expected to be moved to middleware
        """
        container_info = self.container_info(self.account_name,
                                             self.container_name, req)
        policy_index = req.headers.get('X-Backend-Storage-Policy-Index',
                                       container_info['storage_policy'])
        obj_ring = self.app.get_object_ring(policy_index)
        partition, nodes = obj_ring.get_nodes(self.account_name,
                                              self.container_name,
                                              self.object_name)
        object_versions = container_info['versions']

        # do a HEAD request for checking object versions
        if object_versions and not req.environ.get('swift_versioned_copy'):
            # make sure proxy-server uses the right policy index
            _headers = {
                'X-Backend-Storage-Policy-Index': policy_index,
                'X-Newest': 'True'
            }
            hreq = Request.blank(req.path_info,
                                 headers=_headers,
                                 environ={'REQUEST_METHOD': 'HEAD'})
            hresp = self.GETorHEAD_base(hreq, _('Object'), obj_ring, partition,
                                        hreq.swift_entity_path)

            is_manifest = 'X-Object-Manifest' in req.headers or \
                          'X-Object-Manifest' in hresp.headers
            if hresp.status_int != HTTP_NOT_FOUND and not is_manifest:
                # This is a version manifest and needs to be handled
                # differently. First copy the existing data to a new object,
                # then write the data from this request to the version manifest
                # object.
                lcontainer = object_versions.split('/')[0]
                prefix_len = '%03x' % len(self.object_name)
                lprefix = prefix_len + self.object_name + '/'
                ts_source = hresp.environ.get('swift_x_timestamp')
                if ts_source is None:
                    ts_source = time.mktime(
                        time.strptime(hresp.headers['last-modified'],
                                      '%a, %d %b %Y %H:%M:%S GMT'))
                new_ts = Timestamp(ts_source).internal
                vers_obj_name = lprefix + new_ts
                copy_headers = {
                    'Destination': '%s/%s' % (lcontainer, vers_obj_name)
                }
                copy_environ = {
                    'REQUEST_METHOD': 'COPY',
                    'swift_versioned_copy': True
                }
                copy_req = Request.blank(req.path_info,
                                         headers=copy_headers,
                                         environ=copy_environ)
                copy_resp = self.COPY(copy_req)
                if is_client_error(copy_resp.status_int):
                    # missing container or bad permissions
                    raise HTTPPreconditionFailed(request=req)
                elif not is_success(copy_resp.status_int):
                    # could not copy the data, bail
                    raise HTTPServiceUnavailable(request=req)
Example #13
0
    def __call__(self, env, start_response):
        def my_start_response(status, headers, exc_info=None):
            self.status = status
            self.headers = list(headers)
            self.exc_info = exc_info
        self.env = env
        self.start_response = start_response

        # If request was already processed by autosync
        # (here or at the original cluster where it first hit)
        if 'HTTP_X_ORIG_CLUSTER' in env:
            print >> sys.stderr, 'HTTP_X_ORIG_CLUSTER found!'
            if self.override_auth:
                env['swift_owner'] = True
            return self.app(env, start_response)

        # If it is a local call or a tempurl object call
        if 'swift.authorize_override' in env:
            return self.app(env, start_response)

        # Get Placement parameters
        if 'swift.my_cluster' in env:
            self.my_cluster = env['swift.my_cluster']
        else:
            self.my_cluster = self.default_my_cluster

        if 'swift.placement' in env:
            placement = env['swift.placement']
        else:
            placement = self.default_placement or self.my_cluster
        
        if not self.my_cluster or not placement:
            return self.app(env, start_response)

        self.req = Request(env)
        # For now we support only placement here and in one other place
        if self.my_cluster not in placement:
            return HTTPInternalServerError(request=self.req)
        #   return self.redirect()

        peers = [p for p in placement if p != self.my_cluster]
        if len(peers) != 1:
            return HTTPInternalServerError(request=self.req) 

        # This request needs to be handled localy
        try:
            (version, account, container, obj) = \
                self.req.split_path(2, 4, True)
        except ValueError:
            return self.app(env, start_response)
        if obj or self.req.method in ('OPTIONS', 'GET', 'HEAD'):
            # business as usual - I will serve the request locally and be done
            # TBD, in case of 404 returned from GET object, try a remote copy?
            return self.app(env, start_response)

        # Lets see, its either PUT, POST or DELETE account/container
        # Otherwise said - 'we need to change the account/container'
        # both here and with peers...

        # As part of any container creation/modification (PUT/POST):
        # Create a new key  to protect the container communication from now
        # and until the next time the container is updated.
        # Note that race may occur with container-sync daemons resulting in
        # container-sync failing due to misaligned keys.
        # Changing the keys per update help support changes in the placement
        # and can serve as a simple mechanism for replacing conatienr sync keys
        # If this turns out to be an issue, we may extract and reuse the same
        # key for the duration of the container existance. 
        if container and self.req.method in ['POST', 'PUT']:
            key = ''.join(choice(self.keychars) for x in range(64))
            # Add the key to the env when calling the local cluster 
            env['HTTP_X_CONTAINER_SYNC_KEY'] = key
            # Set the container replica of the local cluster to sync to the
            # last cluster in the list of peers
            sync_to_peer = peers[-1]  # Sync to the prev peer
            sync_to = sync_to_peer + self.env['PATH_INFO']
            env['HTTP_X_CONTAINER_SYNC_TO'] = sync_to
        else:
            key = None  # Signals that there are no Container-Sync headers

        # Try localy, if we fail and not DELETE respond with a faliure.
        resp_data = self.app(self.env, my_start_response)
        data = ''.join(iter(resp_data))
        if hasattr(resp_data, 'close'):
            resp_data.close()
        resp_status_int = int(self.status[:3])


        # Faliure at local cluster during anything but DELETE... abandon ship
        if not is_success(resp_status_int) and self.req.method != 'DELETE':
            # Dont even try the peers
            start_response(self.status, self.headers, self.exc_info)
            return data

        # Call peers and accomulate responses
        try:
            # Note that key is None if not during container PUT/POST
            resps = self.send_to_peers(peers, key)
            # Append the local cluster response
            resps.append((resp_status_int, self.headers, data))
        except:
            return HTTPServiceUnavailable(request=self.req)

        resp = None
        if self.req.method == 'DELETE':
            # Special treatment to DELETE - respond with the best we have
            resp = self.highest_response(resps, swap={'404': '1'})
        else:  # PUT/POST - respond only if all success
            if self.all_success(resps):
                resp = self.highest_response(resps)
            else:
                # PUT/POST with local success and remote faliure
                resp = HTTPServiceUnavailable(request=self.req)
        return resp(env, start_response)