Ejemplo n.º 1
0
def analyze_shard_ranges(args):
    shard_data = _load_and_validate_shard_data(args, require_index=False)
    for data in shard_data:
        # allow for incomplete shard range data that may have been scraped from
        # swift-container-info output
        data.setdefault('epoch', None)
    shard_ranges = [ShardRange.from_dict(data) for data in shard_data]
    whole_sr = ShardRange('whole/namespace', 0)
    try:
        find_repair_solution(shard_ranges, whole_sr, args)
    except ManageShardRangesException:
        return 1
    return 0
Ejemplo n.º 2
0
 def _filter_resp_shard_ranges(self, req, cached_ranges):
     # filter returned shard ranges according to request constraints
     marker = get_param(req, 'marker', '')
     end_marker = get_param(req, 'end_marker')
     includes = get_param(req, 'includes')
     reverse = config_true_value(get_param(req, 'reverse'))
     if reverse:
         marker, end_marker = end_marker, marker
     shard_ranges = [
         ShardRange.from_dict(shard_range) for shard_range in cached_ranges
     ]
     shard_ranges = filter_shard_ranges(shard_ranges, includes, marker,
                                        end_marker)
     if reverse:
         shard_ranges.reverse()
     return json.dumps([dict(sr) for sr in shard_ranges]).encode('ascii')
Ejemplo n.º 3
0
    def PUT(self, req):
        """Handle HTTP PUT request."""
        drive, part, account, container, obj = get_obj_name_and_placement(req)
        req_timestamp = valid_timestamp(req)
        if 'x-container-sync-to' in req.headers:
            err, sync_to, realm, realm_key = validate_sync_to(
                req.headers['x-container-sync-to'], self.allowed_sync_hosts,
                self.realms_conf)
            if err:
                return HTTPBadRequest(err)
        try:
            check_drive(self.root, drive, self.mount_check)
        except ValueError:
            return HTTPInsufficientStorage(drive=drive, request=req)
        if not self.check_free_space(drive):
            return HTTPInsufficientStorage(drive=drive, request=req)
        requested_policy_index = self.get_and_validate_policy_index(req)
        broker = self._get_container_broker(drive, part, account, container)
        if obj:  # put container object
            # obj put expects the policy_index header, default is for
            # legacy support during upgrade.
            obj_policy_index = requested_policy_index or 0
            self._maybe_autocreate(broker, req_timestamp, account,
                                   obj_policy_index)
            # redirect if a shard exists for this object name
            response = self._redirect_to_shard(req, broker, obj)
            if response:
                return response

            broker.put_object(
                obj, req_timestamp.internal, int(req.headers['x-size']),
                wsgi_to_str(req.headers['x-content-type']),
                wsgi_to_str(req.headers['x-etag']), 0, obj_policy_index,
                wsgi_to_str(req.headers.get('x-content-type-timestamp')),
                wsgi_to_str(req.headers.get('x-meta-timestamp')))
            return HTTPCreated(request=req)

        record_type = req.headers.get('x-backend-record-type', '').lower()
        if record_type == RECORD_TYPE_SHARD:
            try:
                # validate incoming data...
                shard_ranges = [
                    ShardRange.from_dict(sr) for sr in json.loads(req.body)
                ]
            except (ValueError, KeyError, TypeError) as err:
                return HTTPBadRequest('Invalid body: %r' % err)
            created = self._maybe_autocreate(broker, req_timestamp, account,
                                             requested_policy_index)
            self._update_metadata(req, broker, req_timestamp, 'PUT')
            if shard_ranges:
                # TODO: consider writing the shard ranges into the pending
                # file, but if so ensure an all-or-none semantic for the write
                broker.merge_shard_ranges(shard_ranges)
        else:  # put container
            if requested_policy_index is None:
                # use the default index sent by the proxy if available
                new_container_policy = req.headers.get(
                    'X-Backend-Storage-Policy-Default', int(POLICIES.default))
            else:
                new_container_policy = requested_policy_index
            created = self._update_or_create(req, broker,
                                             req_timestamp.internal,
                                             new_container_policy,
                                             requested_policy_index)
            self._update_metadata(req, broker, req_timestamp, 'PUT')
            resp = self.account_update(req, account, container, broker)
            if resp:
                return resp
        if created:
            return HTTPCreated(request=req,
                               headers={
                                   'x-backend-storage-policy-index':
                                   broker.storage_policy_index
                               })
        else:
            return HTTPAccepted(request=req,
                                headers={
                                    'x-backend-storage-policy-index':
                                    broker.storage_policy_index
                                })
Ejemplo n.º 4
0
    def PUT(self, req):
        """Handle HTTP PUT request."""
        drive, part, account, container, obj = split_and_validate_path(
            req, 4, 5, True)
        req_timestamp = valid_timestamp(req)
        if 'x-container-sync-to' in req.headers:
            err, sync_to, realm, realm_key = validate_sync_to(
                req.headers['x-container-sync-to'], self.allowed_sync_hosts,
                self.realms_conf)
            if err:
                return HTTPBadRequest(err)
        try:
            check_drive(self.root, drive, self.mount_check)
        except ValueError:
            return HTTPInsufficientStorage(drive=drive, request=req)
        if not self.check_free_space(drive):
            return HTTPInsufficientStorage(drive=drive, request=req)
        requested_policy_index = self.get_and_validate_policy_index(req)
        broker = self._get_container_broker(drive, part, account, container)
        if obj:     # put container object
            # obj put expects the policy_index header, default is for
            # legacy support during upgrade.
            obj_policy_index = requested_policy_index or 0
            self._maybe_autocreate(broker, req_timestamp, account,
                                   obj_policy_index)
            # redirect if a shard exists for this object name
            response = self._redirect_to_shard(req, broker, obj)
            if response:
                return response

            broker.put_object(obj, req_timestamp.internal,
                              int(req.headers['x-size']),
                              wsgi_to_str(req.headers['x-content-type']),
                              wsgi_to_str(req.headers['x-etag']), 0,
                              obj_policy_index,
                              wsgi_to_str(req.headers.get(
                                  'x-content-type-timestamp')),
                              wsgi_to_str(req.headers.get('x-meta-timestamp')))
            return HTTPCreated(request=req)

        record_type = req.headers.get('x-backend-record-type', '').lower()
        if record_type == RECORD_TYPE_SHARD:
            try:
                # validate incoming data...
                shard_ranges = [ShardRange.from_dict(sr)
                                for sr in json.loads(req.body)]
            except (ValueError, KeyError, TypeError) as err:
                return HTTPBadRequest('Invalid body: %r' % err)
            created = self._maybe_autocreate(broker, req_timestamp, account,
                                             requested_policy_index)
            self._update_metadata(req, broker, req_timestamp, 'PUT')
            if shard_ranges:
                # TODO: consider writing the shard ranges into the pending
                # file, but if so ensure an all-or-none semantic for the write
                broker.merge_shard_ranges(shard_ranges)
        else:   # put container
            if requested_policy_index is None:
                # use the default index sent by the proxy if available
                new_container_policy = req.headers.get(
                    'X-Backend-Storage-Policy-Default', int(POLICIES.default))
            else:
                new_container_policy = requested_policy_index
            created = self._update_or_create(req, broker,
                                             req_timestamp.internal,
                                             new_container_policy,
                                             requested_policy_index)
            self._update_metadata(req, broker, req_timestamp, 'PUT')
            resp = self.account_update(req, account, container, broker)
            if resp:
                return resp
        if created:
            return HTTPCreated(request=req,
                               headers={'x-backend-storage-policy-index':
                                        broker.storage_policy_index})
        else:
            return HTTPAccepted(request=req,
                                headers={'x-backend-storage-policy-index':
                                         broker.storage_policy_index})
Ejemplo n.º 5
0
    def _get_from_shards(self, req, resp):
        # construct listing using shards described by the response body
        shard_ranges = [
            ShardRange.from_dict(data) for data in json.loads(resp.body)
        ]
        self.app.logger.debug('GET listing from %s shards for: %s',
                              len(shard_ranges), req.path_qs)
        if not shard_ranges:
            # can't find ranges or there was a problem getting the ranges. So
            # return what we have.
            return resp

        objects = []
        req_limit = int(req.params.get('limit', CONTAINER_LISTING_LIMIT))
        params = req.params.copy()
        params.pop('states', None)
        req.headers.pop('X-Backend-Record-Type', None)
        reverse = config_true_value(params.get('reverse'))
        marker = params.get('marker')
        end_marker = params.get('end_marker')

        limit = req_limit
        for shard_range in shard_ranges:
            params['limit'] = limit
            # Always set marker to ensure that object names less than or equal
            # to those already in the listing are not fetched; if the listing
            # is empty then the original request marker, if any, is used. This
            # allows misplaced objects below the expected shard range to be
            # included in the listing.
            if objects:
                last_name = objects[-1].get('name',
                                            objects[-1].get('subdir', u''))
                params['marker'] = last_name.encode('utf-8')
            elif marker:
                params['marker'] = marker
            else:
                params['marker'] = ''
            # Always set end_marker to ensure that misplaced objects beyond the
            # expected shard range are not fetched. This prevents a misplaced
            # object obscuring correctly placed objects in the next shard
            # range.
            if end_marker and end_marker in shard_range:
                params['end_marker'] = end_marker
            elif reverse:
                params['end_marker'] = str_to_wsgi(shard_range.lower_str)
            else:
                params['end_marker'] = str_to_wsgi(shard_range.end_marker)

            if (shard_range.account == self.account_name
                    and shard_range.container == self.container_name):
                # directed back to same container - force GET of objects
                headers = {'X-Backend-Record-Type': 'object'}
            else:
                headers = None
            self.app.logger.debug('Getting from %s %s with %s', shard_range,
                                  shard_range.name, headers)
            objs, shard_resp = self._get_container_listing(
                req,
                shard_range.account,
                shard_range.container,
                headers=headers,
                params=params)

            if not objs:
                # tolerate errors or empty shard containers
                continue

            objects.extend(objs)
            limit -= len(objs)

            if limit <= 0:
                break
            if (end_marker and reverse
                    and (wsgi_to_bytes(end_marker) >=
                         objects[-1]['name'].encode('utf-8'))):
                break
            if (end_marker and not reverse
                    and (wsgi_to_bytes(end_marker) <=
                         objects[-1]['name'].encode('utf-8'))):
                break

        resp.body = json.dumps(objects).encode('ascii')
        constrained = any(
            req.params.get(constraint)
            for constraint in ('marker', 'end_marker', 'path', 'prefix',
                               'delimiter'))
        if not constrained and len(objects) < req_limit:
            self.app.logger.debug('Setting object count to %s' % len(objects))
            # prefer the actual listing stats over the potentially outdated
            # root stats. This condition is only likely when a sharded
            # container is shrinking or in tests; typically a sharded container
            # will have more than CONTAINER_LISTING_LIMIT objects so any
            # unconstrained listing will be capped by the limit and total
            # object stats cannot therefore be inferred from the listing.
            resp.headers['X-Container-Object-Count'] = len(objects)
            resp.headers['X-Container-Bytes-Used'] = sum(
                [o['bytes'] for o in objects])
        return resp
Ejemplo n.º 6
0
    def _get_from_shards(self, req, resp):
        # Construct listing using shards described by the response body.
        # The history of containers that have returned shard ranges is
        # maintained in the request environ so that loops can be avoided by
        # forcing an object listing if the same container is visited again.
        # This can happen in at least two scenarios:
        #   1. a container has filled a gap in its shard ranges with a
        #      shard range pointing to itself
        #   2. a root container returns a (stale) shard range pointing to a
        #      shard that has shrunk into the root, in which case the shrunken
        #      shard may return the root's shard range.
        shard_listing_history = req.environ.setdefault(
            'swift.shard_listing_history', [])
        shard_listing_history.append((self.account_name, self.container_name))
        shard_ranges = [
            ShardRange.from_dict(data) for data in json.loads(resp.body)
        ]
        self.app.logger.debug('GET listing from %s shards for: %s',
                              len(shard_ranges), req.path_qs)
        if not shard_ranges:
            # can't find ranges or there was a problem getting the ranges. So
            # return what we have.
            return resp

        objects = []
        req_limit = constrain_req_limit(req, CONTAINER_LISTING_LIMIT)
        params = req.params.copy()
        params.pop('states', None)
        req.headers.pop('X-Backend-Record-Type', None)
        reverse = config_true_value(params.get('reverse'))
        marker = wsgi_to_str(params.get('marker'))
        end_marker = wsgi_to_str(params.get('end_marker'))
        prefix = wsgi_to_str(params.get('prefix'))

        limit = req_limit
        for i, shard_range in enumerate(shard_ranges):
            params['limit'] = limit
            # Always set marker to ensure that object names less than or equal
            # to those already in the listing are not fetched; if the listing
            # is empty then the original request marker, if any, is used. This
            # allows misplaced objects below the expected shard range to be
            # included in the listing.
            if objects:
                last_name = objects[-1].get('name',
                                            objects[-1].get('subdir', u''))
                params['marker'] = bytes_to_wsgi(last_name.encode('utf-8'))
            elif marker:
                params['marker'] = str_to_wsgi(marker)
            else:
                params['marker'] = ''
            # Always set end_marker to ensure that misplaced objects beyond the
            # expected shard range are not fetched. This prevents a misplaced
            # object obscuring correctly placed objects in the next shard
            # range.
            if end_marker and end_marker in shard_range:
                params['end_marker'] = str_to_wsgi(end_marker)
            elif reverse:
                params['end_marker'] = str_to_wsgi(shard_range.lower_str)
            else:
                params['end_marker'] = str_to_wsgi(shard_range.end_marker)

            headers = {}
            if ((shard_range.account, shard_range.container)
                    in shard_listing_history):
                # directed back to same container - force GET of objects
                headers['X-Backend-Record-Type'] = 'object'
            if config_true_value(req.headers.get('x-newest', False)):
                headers['X-Newest'] = 'true'

            if prefix:
                if prefix > shard_range:
                    continue
                try:
                    just_past = prefix[:-1] + chr(ord(prefix[-1]) + 1)
                except ValueError:
                    pass
                else:
                    if just_past < shard_range:
                        continue

            self.app.logger.debug(
                'Getting listing part %d from shard %s %s with %s', i,
                shard_range, shard_range.name, headers)
            objs, shard_resp = self._get_container_listing(
                req,
                shard_range.account,
                shard_range.container,
                headers=headers,
                params=params)

            sharding_state = shard_resp.headers.get('x-backend-sharding-state',
                                                    'unknown')

            if objs is None:
                # tolerate errors
                self.app.logger.debug(
                    'Failed to get objects from shard (state=%s), total = %d',
                    sharding_state, len(objects))
                continue

            self.app.logger.debug(
                'Found %d objects in shard (state=%s), total = %d', len(objs),
                sharding_state,
                len(objs) + len(objects))

            if not objs:
                # tolerate empty shard containers
                continue

            objects.extend(objs)
            limit -= len(objs)

            if limit <= 0:
                break
            last_name = objects[-1].get('name', objects[-1].get('subdir', u''))
            if six.PY2:
                last_name = last_name.encode('utf8')
            if end_marker and reverse and end_marker >= last_name:
                break
            if end_marker and not reverse and end_marker <= last_name:
                break

        resp.body = json.dumps(objects).encode('ascii')
        constrained = any(
            req.params.get(constraint)
            for constraint in ('marker', 'end_marker', 'path', 'prefix',
                               'delimiter'))
        if not constrained and len(objects) < req_limit:
            self.app.logger.debug('Setting object count to %s' % len(objects))
            # prefer the actual listing stats over the potentially outdated
            # root stats. This condition is only likely when a sharded
            # container is shrinking or in tests; typically a sharded container
            # will have more than CONTAINER_LISTING_LIMIT objects so any
            # unconstrained listing will be capped by the limit and total
            # object stats cannot therefore be inferred from the listing.
            resp.headers['X-Container-Object-Count'] = len(objects)
            resp.headers['X-Container-Bytes-Used'] = sum(
                [o['bytes'] for o in objects])
        return resp
Ejemplo n.º 7
0
    def _get_from_shards(self, req, resp):
        # construct listing using shards described by the response body
        shard_ranges = [ShardRange.from_dict(data)
                        for data in json.loads(resp.body)]
        self.app.logger.debug('GET listing from %s shards for: %s',
                              len(shard_ranges), req.path_qs)
        if not shard_ranges:
            # can't find ranges or there was a problem getting the ranges. So
            # return what we have.
            return resp

        objects = []
        req_limit = int(req.params.get('limit', CONTAINER_LISTING_LIMIT))
        params = req.params.copy()
        params.pop('states', None)
        req.headers.pop('X-Backend-Record-Type', None)
        reverse = config_true_value(params.get('reverse'))
        marker = params.get('marker')
        end_marker = params.get('end_marker')

        limit = req_limit
        for shard_range in shard_ranges:
            params['limit'] = limit
            # Always set marker to ensure that object names less than or equal
            # to those already in the listing are not fetched; if the listing
            # is empty then the original request marker, if any, is used. This
            # allows misplaced objects below the expected shard range to be
            # included in the listing.
            if objects:
                last_name = objects[-1].get('name',
                                            objects[-1].get('subdir', u''))
                params['marker'] = last_name.encode('utf-8')
            elif marker:
                params['marker'] = marker
            else:
                params['marker'] = ''
            # Always set end_marker to ensure that misplaced objects beyond the
            # expected shard range are not fetched. This prevents a misplaced
            # object obscuring correctly placed objects in the next shard
            # range.
            if end_marker and end_marker in shard_range:
                params['end_marker'] = end_marker
            elif reverse:
                params['end_marker'] = str_to_wsgi(shard_range.lower_str)
            else:
                params['end_marker'] = str_to_wsgi(shard_range.end_marker)

            if (shard_range.account == self.account_name and
                    shard_range.container == self.container_name):
                # directed back to same container - force GET of objects
                headers = {'X-Backend-Record-Type': 'object'}
            else:
                headers = None
            self.app.logger.debug('Getting from %s %s with %s',
                                  shard_range, shard_range.name, headers)
            objs, shard_resp = self._get_container_listing(
                req, shard_range.account, shard_range.container,
                headers=headers, params=params)

            if not objs:
                # tolerate errors or empty shard containers
                continue

            objects.extend(objs)
            limit -= len(objs)

            if limit <= 0:
                break
            if (end_marker and reverse and
                (wsgi_to_bytes(end_marker) >=
                 objects[-1]['name'].encode('utf-8'))):
                break
            if (end_marker and not reverse and
                (wsgi_to_bytes(end_marker) <=
                 objects[-1]['name'].encode('utf-8'))):
                break

        resp.body = json.dumps(objects).encode('ascii')
        constrained = any(req.params.get(constraint) for constraint in (
            'marker', 'end_marker', 'path', 'prefix', 'delimiter'))
        if not constrained and len(objects) < req_limit:
            self.app.logger.debug('Setting object count to %s' % len(objects))
            # prefer the actual listing stats over the potentially outdated
            # root stats. This condition is only likely when a sharded
            # container is shrinking or in tests; typically a sharded container
            # will have more than CONTAINER_LISTING_LIMIT objects so any
            # unconstrained listing will be capped by the limit and total
            # object stats cannot therefore be inferred from the listing.
            resp.headers['X-Container-Object-Count'] = len(objects)
            resp.headers['X-Container-Bytes-Used'] = sum(
                [o['bytes'] for o in objects])
        return resp