class Meta2IndexingWorker(object): """ Indexing worker responsible for a single volume. """ def __init__(self, volume_path, conf, pool_manager=None): """ Initializes an Indexing worker for indexing meta2 databases. Possible values of conf relating to this worker are: - interval: (int) in sec time between two full scans. Default: half an hour. - report_interval: (int) in sec, time between two reports: Default: 300 - scanned_per_second: (int) maximum number of indexed databases /s. - try_removing_faulty_indexes : In the event where we encounter a database that's not supposed to be handled by this volume, attempt to remove it from this volume rdir index if it exists WARNING: The decision is based off of a proxy response, that could be affected by cache inconsistencies for example, use at your own risk. Default: False :param volume_path: The volume path to be indexed :param conf: The configuration to be passed to the needed services :param pool_manager: A connection pool manager. If none is given, a new one with a default size of 10 will be created. """ self.logger = get_logger(conf) self._stop = False self.volume = volume_path self.success_nb = 0 self.failed_nb = 0 self.full_scan_nb = 0 self.last_report_time = 0 self.last_scan_time = 0 self.last_index_time = 0 self.start_time = 0 self.indexed_since_last_report = 0 self.scans_interval = int_value( conf.get('interval'), 1800) self.report_interval = int_value( conf.get('report_interval'), 300) self.max_indexed_per_second = int_value( conf.get('scanned_per_second'), 3000) self.namespace, self.volume_id = check_volume_for_service_type( self.volume, "meta2") self.attempt_bad_index_removal = boolean_value( conf.get('try_removing_faulty_indexes'), False) if not pool_manager: pool_manager = get_pool_manager(pool_connections=10) self.index_client = RdirClient(conf, logger=self.logger, pool_manager=pool_manager) self.dir_client = DirectoryClient(conf, logger=self.logger, pool_manager=pool_manager) def report(self, tag): """ Log the status of indexer :param tag: One of three: starting, running, ended. """ total = self.success_nb + self.failed_nb now = time.time() elapsed = (now - self.start_time) or 0.00001 since_last_rprt = (now - self.last_report_time) or 0.00001 self.logger.info( 'volume_id=%(volume_id)s %(tag)s=%(current_time)s ' 'elapsed=%(elapsed).02f ' 'pass=%(pass)d ' 'errors=%(errors)d ' 'containers_indexed=%(total_indexed)d %(index_rate).2f/s', { 'volume_id': self.volume_id, 'tag': tag, 'current_time': datetime.fromtimestamp( int(now)).isoformat(), 'pass': self.full_scan_nb, 'errors': self.failed_nb, 'total_indexed': total, 'index_rate': self.indexed_since_last_report / since_last_rprt, 'elapsed': elapsed } ) self.last_report_time = now self.indexed_since_last_report = 0 def warn(self, msg, container_id): self.logger.warn( 'volume_id=%(volume_id)s container_id=%(container_id)s %(error)s', { 'volume_id': self.volume_id, 'container_id': container_id, 'error': msg } ) def _attempt_index_removal(self, db_path, cid): """ Fail safe removal attempt. """ try: self.index_client.meta2_index_delete(self.volume_id, db_path, cid) except exc.OioException as exception: self.warn( container_id=cid, msg="Unable to remove database from the volume " "index : {0}".format(str(exception)) ) def index_meta2_database(self, db_id): """ Add a meta2 database to the rdir index. Fails if the database isn't handled by the current volume. :param db_id: The ContentID representing the reference to the database. """ if len(db_id) < STRLEN_REFERENCEID: self.warn('Not a valid container ID', db_id) return try: srvcs = self.dir_client.list(cid=db_id) account, container = srvcs['account'], srvcs['name'] is_peer = self.volume_id in [x['host'] for x in srvcs['srv'] if x['type'] == 'meta2'] container_id = db_id.rsplit(".")[0] if six.PY2: if isinstance(account, six.text_type): account = account.encode('utf-8') if isinstance(container, six.text_type): container = container.encode('utf-8') cont_url = "{0}/{1}/{2}".format(self.namespace, account, container) if not is_peer: self.warn("Trying to index a container that isn't handled by" "this volume", db_id) if self.attempt_bad_index_removal: self._attempt_index_removal(cont_url, container_id) return self.index_client.meta2_index_push(volume_id=self.volume_id, container_url=cont_url, mtime=time.time(), container_id=container_id) self.success_nb += 1 except exc.OioException as exception: self.failed_nb += 1 self.warn("Unable to to index container: %s" % str(exception), db_id) self.indexed_since_last_report += 1 def crawl_volume(self): """ Crawl the volume assigned to this worker, and index every database. """ paths = paths_gen(self.volume) self.full_scan_nb += 1 self.success_nb = 0 self.failed_nb = 0 now = time.time() self.last_report_time = now self.report("starting") for db_path in paths: # Graceful exit, hopefully if self._stop: break db_id = db_path.rsplit("/")[-1].rsplit(".") if len(db_id) != 3: self.warn("Malformed db file name !", db_path) continue db_id = ".".join(db_id[:2]) self.index_meta2_database(db_id) self.last_index_time = ratelimit( self.last_index_time, self.max_indexed_per_second ) now = time.time() if now - self.last_report_time >= self.report_interval: self.report("running") self.report("ended") def run(self): """ Main worker loop """ self.start_time = time.time() while not self._stop: try: self.crawl_volume() self.last_scan_time = time.time() time.sleep(self.scans_interval) except exc.OioException as exception: self.logger.exception("ERROR during indexing meta2: %s", exception) def stop(self): """ Could be needed for eventually gracefully stopping. """ self._stop = True
class RdirClient(HttpApi): """ Client class for rdir services. """ base_url = { 'rawx': 'rdir', 'meta2': 'rdir/meta2', } def __init__(self, conf, **kwargs): super(RdirClient, self).__init__(service_type='rdir', **kwargs) self.directory = DirectoryClient(conf, **kwargs) self.ns = conf['namespace'] self._addr_cache = dict() def _clear_cache(self, volume_id): self._addr_cache.pop(volume_id, None) def _get_rdir_addr(self, volume_id, reqid=None): # Initial lookup in the cache if volume_id in self._addr_cache: return self._addr_cache[volume_id] # Not cached, try a direct lookup try: headers = {REQID_HEADER: reqid or request_id()} resp = self.directory.list(RDIR_ACCT, volume_id, service_type='rdir', headers=headers) host = _filter_rdir_host(resp) # Add the new service to the cache self._addr_cache[volume_id] = host return host except NotFound: raise VolumeException('No rdir assigned to volume %s' % volume_id) def _make_uri(self, action, volume_id, reqid=None, service_type='rawx'): rdir_host = self._get_rdir_addr(volume_id, reqid) return 'http://%s/v1/%s/%s' % (rdir_host, self.__class__.base_url[service_type], action) @ensure_headers @ensure_request_id def _rdir_request(self, volume, method, action, create=False, params=None, service_type='rawx', **kwargs): if params is None: params = dict() params['vol'] = volume if create: params['create'] = '1' uri = self._make_uri(action, volume, reqid=kwargs['headers'][REQID_HEADER], service_type=service_type) try: resp, body = self._direct_request(method, uri, params=params, **kwargs) except OioNetworkException: self._clear_cache(volume) raise return resp, body def create(self, volume_id, service_type='rawx', **kwargs): """Create the database for `volume_id` on the appropriate rdir""" self._rdir_request(volume_id, 'POST', 'create', service_type=service_type, **kwargs) def chunk_push(self, volume_id, container_id, content_id, chunk_id, headers=None, **data): """Reference a chunk in the reverse directory""" body = {'container_id': container_id, 'content_id': content_id, 'chunk_id': chunk_id} for key, value in data.iteritems(): body[key] = value self._rdir_request(volume_id, 'POST', 'push', create=True, json=body, headers=headers) def chunk_delete(self, volume_id, container_id, content_id, chunk_id, **kwargs): """Unreference a chunk from the reverse directory""" body = {'container_id': container_id, 'content_id': content_id, 'chunk_id': chunk_id} self._rdir_request(volume_id, 'DELETE', 'delete', json=body, **kwargs) def chunk_fetch(self, volume, limit=1000, rebuild=False, container_id=None, max_attempts=3, start_after=None, shuffle=False, **kwargs): """ Fetch the list of chunks belonging to the specified volume. :param volume: the volume to get chunks from :type volume: `str` :param limit: maximum number of results to return per request to the rdir server. :type limit: `int` :param rebuild: fetch only the chunks that were there before the last incident. :type rebuild: `bool` :keyword container_id: get only chunks belonging to the specified container :type container_id: `str` :keyword start_after: fetch only chunk that appear after this container ID :type start_after: `str` """ req_body = {'limit': limit} if rebuild: req_body['rebuild'] = True if container_id: req_body['container_id'] = container_id if start_after: req_body['start_after'] = start_after while True: for i in range(max_attempts): try: _resp, resp_body = self._rdir_request( volume, 'POST', 'fetch', json=req_body, **kwargs) break except OioNetworkException: # Monotonic backoff if i < max_attempts - 1: sleep(i * 1.0) continue # Too many attempts raise truncated = _resp.headers.get( HEADER_PREFIX + 'list-truncated') if truncated is None: # TODO(adu): Delete when it will no longer be used if not resp_body: break truncated = True req_body['start_after'] = resp_body[-1][0] else: truncated = true_value(truncated) if truncated: req_body['start_after'] = _resp.headers[ HEADER_PREFIX + 'list-marker'] if shuffle: random.shuffle(resp_body) for (key, value) in resp_body: container, content, chunk = key.split('|') yield container, content, chunk, value if not truncated: break def admin_incident_set(self, volume, date, **kwargs): body = {'date': int(float(date))} self._rdir_request(volume, 'POST', 'admin/incident', json=body, **kwargs) def admin_incident_get(self, volume, **kwargs): _resp, body = self._rdir_request(volume, 'GET', 'admin/incident', **kwargs) return body.get('date') def admin_lock(self, volume, who, **kwargs): body = {'who': who} self._rdir_request(volume, 'POST', 'admin/lock', json=body, **kwargs) def admin_unlock(self, volume, **kwargs): self._rdir_request(volume, 'POST', 'admin/unlock', **kwargs) def admin_show(self, volume, **kwargs): _resp, body = self._rdir_request(volume, 'GET', 'admin/show', **kwargs) return body def admin_clear(self, volume, clear_all=False, before_incident=False, repair=False, **kwargs): params = {'all': clear_all, 'before_incident': before_incident, 'repair': repair} _resp, resp_body = self._rdir_request( volume, 'POST', 'admin/clear', params=params, **kwargs) return resp_body def status(self, volume, max=1000, prefix=None, marker=None, max_attempts=3, **kwargs): """ Get the status of chunks belonging to the specified volume. :param volume: the volume to get chunks from :type volume: `str` :param max: maximum number of results to return per request to the rdir server. :type max: `int` :keyword prefix: get only chunks belonging to the specified prefix :type prefix: `str` :keyword marker: fetch only chunk that appear after this marker :type marker: `str` """ req_params = {'max': max} if prefix: req_params['prefix'] = prefix if marker: req_params['marker'] = marker chunks = dict() containers = dict() while True: for i in range(max_attempts): try: _resp, resp_body = self._rdir_request( volume, 'GET', 'status', params=req_params, **kwargs) break except OioNetworkException: # Monotonic backoff if i < max_attempts - 1: sleep(i * 1.0) continue # Too many attempts raise for (key, value) in resp_body.get('chunk', dict()).items(): chunks[key] = chunks.get(key, 0) + value for (cid, info) in resp_body.get('container', dict()).items(): for (key, value) in info.items(): containers[cid][key] = containers.setdefault( cid, dict()).get(key, 0) + value if not true_value(_resp.headers.get( HEADER_PREFIX + 'list-truncated')): break req_params['marker'] = _resp.headers[HEADER_PREFIX + 'list-marker'] return {'chunk': chunks, 'container': containers} def meta2_index_create(self, volume_id, **kwargs): """ Create a new meta2 rdir index. :param volume_id: The meta2 volume. """ return self.create(volume_id, service_type='meta2', **kwargs) def meta2_index_push(self, volume_id, container_url, container_id, mtime, **kwargs): """ Add a newly created container to the list of containers handled by the meta2 server in question. :param volume_id: The meta2 volume. :param container_url: The container path (NS/account/container) :param container_id: The container ID. :param mtime: The last time it was spotted on this volume. :param headers: Optional headers to pass along to the request. """ body = {'container_url': container_url, 'container_id': container_id, 'mtime': int(mtime)} for key, value in kwargs.iteritems(): body[key] = value return self._rdir_request(volume=volume_id, method='POST', action='push', create=True, json=body, service_type='meta2', **kwargs) def _resolve_cid_to_path(self, cid): """ Resolves a container ID into a a container path. :param cid: The container ID. :return: NS/account/container path. """ resp = self.directory.list(cid=cid) return '{0}/{1}/{2}'.format( self.ns, resp['account'], resp['name'] ) def meta2_index_delete(self, volume_id, container_path=None, container_id=None, **kwargs): """ Remove a meta2 record from the volume's index. Either the container ID or the container path have to be given. :param volume_id: The meta2 volume. :param container_id: The container ID. :param container_path: The container path """ if not container_path and container_id: container_path = self._resolve_cid_to_path(container_id) elif container_path and not container_id: _tmp = container_path.rsplit("/") container_id = cid_from_name(_tmp[1], _tmp[3]) elif not container_path and not container_id: raise ValueError("At least the container ID or the container path " "should be given.") body = {'container_url': container_path, 'container_id': container_id} for key, value in kwargs.iteritems(): body[key] = value return self._rdir_request(volume=volume_id, method='POST', action='delete', create=False, json=body, service_type='meta2', **kwargs) def meta2_index_fetch(self, volume_id, prefix=None, marker=None, limit=4096, **kwargs): """ Fetch specific meta2 records, or a range of records. :param volume_id: The meta2 volume. :param prefix: The prefix all meta2 records should have. :param marker: The container path from which the API will start the listing. The marker will not be included in the result. :param limit: The number of records to be returned. Capped at 4096 :return: A dictionary containing the following entries: - records: A list containing the actual records. - truncated: A boolean value representing whether there are still records left that fulfill this query. """ params = {} if prefix: params['prefix'] = prefix if marker: # FIXME(ABO): Validate this one. params['marker'] = marker if limit: params['limit'] = limit _resp, body = self._rdir_request(volume=volume_id, method='POST', action='fetch', json=params, service_type='meta2', **kwargs) return body def meta2_index_fetch_all(self, volume_id, **kwargs): """ A wrapper around meta2_index_fetch that loops until no more records are available, returning all the records in a certain volume's index. WARNING: For testing purposes only """ return depaginate( self.meta2_index_fetch, volume_id=volume_id, listing_key=lambda x: x['records'], truncated_key=lambda x: x['truncated'], # The following is only called when the list is truncated # So we can assume there are records in the list marker_key=lambda x: x['records'][-1]['container_url'], **kwargs )
class RdirDispatcher(object): def __init__(self, conf, rdir_client=None, **kwargs): self.conf = conf self.ns = conf['namespace'] self.logger = get_logger(conf) self.directory = DirectoryClient(conf, logger=self.logger, **kwargs) if rdir_client: self.rdir = rdir_client else: self.rdir = RdirClient(conf, logger=self.logger, **kwargs) self._cs = None self._pool_options = None @property def cs(self): if not self._cs: self._cs = ConscienceClient(self.conf, logger=self.logger, pool_manager=self.rdir.pool_manager) return self._cs def get_assignments(self, service_type, **kwargs): """ Get rdir assignments for all services of the specified type. :returns: a tuple with a list all services of the specified type, and a list of all rdir services. :rtype: `tuple<list<dict>,list<dict>>` """ all_services = self.cs.all_services(service_type, **kwargs) all_rdir = self.cs.all_services('rdir', True, **kwargs) by_id = {_make_id(self.ns, 'rdir', x['addr']): x for x in all_rdir} for service in all_services: try: ref = service.get('tags', {}).get('tag.service_id') resp = self.directory.list(RDIR_ACCT, ref or service['addr'], service_type='rdir', **kwargs) rdir_host = _filter_rdir_host(resp) try: service['rdir'] = by_id[ _make_id(self.ns, 'rdir', rdir_host)] except KeyError: self.logger.warn("rdir %s linked to %s %s seems down", rdir_host, service_type, service['addr']) service['rdir'] = {"addr": rdir_host, "tags": dict()} loc_rdir = service['rdir'] by_id[_make_id(self.ns, 'rdir', rdir_host)] = loc_rdir except NotFound: self.logger.info("No rdir linked to %s", service['addr']) except OioException as exc: self.logger.warn('Failed to get rdir linked to %s: %s', service['addr'], exc) return all_services, all_rdir def assign_services(self, service_type, max_per_rdir=None, min_dist=None, **kwargs): """ Assign an rdir service to all `service_type` servers that aren't already assigned one. :param max_per_rdir: Maximum number of services an rdir can handle. :type max_per_rdir: `int` :param min_dist: minimum required distance between any service and its assigned rdir service. :type min_dist: `int` :returns: The list of `service_type` services that were assigned rdir services. """ all_services = self.cs.all_services(service_type, **kwargs) all_rdir = self.cs.all_services('rdir', True, **kwargs) if len(all_rdir) <= 0: raise ServiceUnavailable("No rdir service found in %s" % self.ns) by_id = {_make_id(self.ns, 'rdir', x['addr']): x for x in all_rdir} errors = list() for provider in all_services: provider_id = provider['tags'].get('tag.service_id', provider['addr']) try: resp = self.directory.list(RDIR_ACCT, provider_id, service_type='rdir', **kwargs) rdir_host = _filter_rdir_host(resp) try: provider['rdir'] = by_id[_make_id(self.ns, 'rdir', rdir_host)] except KeyError: self.logger.warn("rdir %s linked to %s %s seems down", rdir_host, service_type, provider_id) except NotFound: try: rdir = self._smart_link_rdir(provider_id, all_rdir, service_type=service_type, max_per_rdir=max_per_rdir, min_dist=min_dist, **kwargs) except OioException as exc: self.logger.warn("Failed to link an rdir to %s %s: %s", service_type, provider_id, exc) errors.append((provider_id, exc)) continue n_bases = by_id[rdir]['tags'].get("stat.opened_db_count", 0) by_id[rdir]['tags']["stat.opened_db_count"] = n_bases + 1 provider['rdir'] = by_id[rdir] except OioException as exc: self.logger.warn("Failed to check rdir linked to %s %s " "(thus won't try to make the link): %s", service_type, provider_id, exc) errors.append((provider_id, exc)) if errors: # group_chunk_errors is flexible enough to accept service addresses errors = group_chunk_errors(errors) if len(errors) == 1: err, addrs = errors.popitem() oio_reraise(type(err), err, str(addrs)) else: raise OioException('Several errors encountered: %s' % errors) return all_services def assign_all_meta2(self, max_per_rdir=None, **kwargs): """ Assign an rdir service to all meta2 servers that aren't already assigned one. :param max_per_rdir: Maximum number of services an rdir can handle. :type max_per_rdir: `int` :returns: The list of meta2 that were assigned rdir services. """ return self.assign_services("meta2", max_per_rdir, **kwargs) def assign_all_rawx(self, max_per_rdir=None, **kwargs): """ Find an rdir service for all rawx that don't have one already. :param max_per_rdir: maximum number or rawx services that an rdir can be linked to :type max_per_rdir: `int` """ return self.assign_services("rawx", max_per_rdir, **kwargs) def _smart_link_rdir(self, volume_id, all_rdir, max_per_rdir=None, max_attempts=7, service_type='rawx', min_dist=None, **kwargs): """ Force the load balancer to avoid services that already host more bases than the average (or more than `max_per_rdir`) while selecting rdir services. """ opened_db = [x['tags'].get('stat.opened_db_count', 0) for x in all_rdir if x['score'] > 0] if len(opened_db) <= 0: raise ServiceUnavailable( "No valid rdir service found in %s" % self.ns) if not max_per_rdir: upper_limit = sum(opened_db) / float(len(opened_db)) else: upper_limit = max_per_rdir - 1 avoids = [_make_id(self.ns, "rdir", x['addr']) for x in all_rdir if x['score'] > 0 and x['tags'].get('stat.opened_db_count', 0) > upper_limit] known = [_make_id(self.ns, service_type, volume_id)] try: polled = self._poll_rdir(avoid=avoids, known=known, min_dist=min_dist, **kwargs) except ClientException as exc: if exc.status != 481 or max_per_rdir: raise # Retry without `avoids`, hoping the next iteration will rebalance polled = self._poll_rdir(known=known, min_dist=min_dist, **kwargs) # Associate the rdir to the rawx forced = {'host': polled['addr'], 'type': 'rdir', 'seq': 1, 'args': "", 'id': polled['id']} for i in range(max_attempts): try: self.directory.force(RDIR_ACCT, volume_id, 'rdir', forced, autocreate=True, **kwargs) break except ClientException as ex: # Already done done = (455,) if ex.status in done: break if ex.message.startswith( 'META1 error: (SQLITE_CONSTRAINT) ' 'UNIQUE constraint failed'): self.logger.info( "Ignored exception (already0): %s", ex) break if ex.message.startswith( 'META1 error: (SQLITE_CONSTRAINT) ' 'columns cid, srvtype, seq are not unique'): self.logger.info( "Ignored exception (already1): %s", ex) break # Manage several unretriable errors retry = (406, 450, 503, 504) if ex.status >= 400 and ex.status not in retry: raise # Monotonic backoff (retriable and net erorrs) if i < max_attempts - 1: sleep(i * 1.0) continue # Too many attempts raise # Do the creation in the rdir itself try: self.rdir.create(volume_id, service_type=service_type, **kwargs) except Exception as exc: self.logger.warn("Failed to create database for %s on %s: %s", volume_id, polled['addr'], exc) return polled['id'] def _create_special_pool(self, options=None, force=False, **kwargs): """ Create the special pool for rdir services. :param options: dictionary of custom options for the pool. :param force: overwrite the pool if it exists already. """ self.cs.lb.create_pool( '__rawx_rdir', ((1, JOKER_SVC_TARGET), (1, 'rdir')), options=options, force=force, **kwargs) def _poll_rdir(self, avoid=None, known=None, min_dist=None, **kwargs): """ Call the special rdir service pool (created if missing). :param min_dist: minimum distance to ensure between the known service and the selected rdir service. """ if not known or len(known) > 1: raise ValueError('There should be exactly one "known" service') options = dict() if min_dist is not None: options['min_dist'] = min_dist if options != self._pool_options: # Options have changed, overwrite the pool. self._pool_options = options self._create_special_pool(self._pool_options, force=True, **kwargs) try: svcs = self.cs.poll('__rawx_rdir', avoid=avoid, known=known, **kwargs) except ClientException as exc: if exc.status != 400: raise self._create_special_pool(self._pool_options, **kwargs) svcs = self.cs.poll('__rawx_rdir', avoid=avoid, known=known, **kwargs) for svc in svcs: # FIXME: we should include the service type in a dedicated field if 'rdir' in svc['id']: return svc raise ServerException("LB returned incoherent result: %s" % svcs)
class RdirClient(HttpApi): """ Client class for rdir services. """ def __init__(self, conf, **kwargs): super(RdirClient, self).__init__(conf, **kwargs) self.directory = DirectoryClient(conf, **kwargs) self._addr_cache = dict() def _clear_cache(self, volume_id): del self._addr_cache[volume_id] def _get_rdir_addr(self, volume_id): # Initial lookup in the cache if volume_id in self._addr_cache: return self._addr_cache[volume_id] # Not cached, try a direct lookup try: resp = self.directory.list(RDIR_ACCT, volume_id, service_type='rdir') host = _filter_rdir_host(resp) # Add the new service to the cache self._addr_cache[volume_id] = host return host except NotFound: raise VolumeException('No rdir assigned to volume %s' % volume_id) def _make_uri(self, action, volume_id): rdir_host = self._get_rdir_addr(volume_id) return 'http://%s/v1/rdir/%s' % (rdir_host, action) def _rdir_request(self, volume, method, action, create=False, **kwargs): params = {'vol': volume} if create: params['create'] = '1' uri = self._make_uri(action, volume) try: resp, body = self._direct_request(method, uri, params=params, **kwargs) except OioNetworkException: self._clear_cache(volume) raise return resp, body def create(self, volume_id): """Create the database for `volume_id` on the appropriate rdir""" self._rdir_request(volume_id, 'POST', 'create') def chunk_push(self, volume_id, container_id, content_id, chunk_id, **data): """Reference a chunk in the reverse directory""" body = { 'container_id': container_id, 'content_id': content_id, 'chunk_id': chunk_id } for key, value in data.iteritems(): body[key] = value self._rdir_request(volume_id, 'POST', 'push', create=True, json=body) def chunk_delete(self, volume_id, container_id, content_id, chunk_id): """Unreference a chunk from the reverse directory""" body = { 'container_id': container_id, 'content_id': content_id, 'chunk_id': chunk_id } self._rdir_request(volume_id, 'DELETE', 'delete', json=body) def chunk_fetch(self, volume, limit=100, rebuild=False, container_id=None): """ Fetch the list of chunks belonging to the specified volume. :param volume: the volume to get chunks from :type volume: `str` :param limit: maximum number of results to return :type limit: `int` :param rebuild: :type rebuild: `bool` :keyword container_id: get only chunks belonging to the specified container :type container_id: `str` """ req_body = {'limit': limit} if rebuild: req_body['rebuild'] = True if container_id: req_body['container_id'] = container_id while True: resp, resp_body = self._rdir_request(volume, 'POST', 'fetch', json=req_body) if len(resp_body) == 0: break for (key, value) in resp_body: container, content, chunk = key.split('|') yield container, content, chunk, value req_body['start_after'] = key def admin_incident_set(self, volume, date): body = {'date': int(float(date))} self._rdir_request(volume, 'POST', 'admin/incident', json=body) def admin_incident_get(self, volume): resp, resp_body = self._rdir_request(volume, 'GET', 'admin/incident') return resp_body.get('date') def admin_lock(self, volume, who): body = {'who': who} self._rdir_request(volume, 'POST', 'admin/lock', json=body) def admin_unlock(self, volume): self._rdir_request(volume, 'POST', 'admin/unlock') def admin_show(self, volume): resp, resp_body = self._rdir_request(volume, 'GET', 'admin/show') return resp_body def admin_clear(self, volume, clear_all=False): body = {'all': clear_all} resp, resp_body = self._rdir_request(volume, 'POST', 'admin/clear', json=body) return resp_body def status(self, volume): resp, resp_body = self._rdir_request(volume, 'GET', 'status') return resp_body
class RdirDispatcher(object): def __init__(self, conf, **kwargs): self.conf = conf self.ns = conf['namespace'] self.logger = get_logger(conf) self.directory = DirectoryClient(conf, logger=self.logger, **kwargs) self.rdir = RdirClient(conf, logger=self.logger, **kwargs) self._cs = None @property def cs(self): if not self._cs: self._cs = ConscienceClient(self.conf, logger=self.logger) return self._cs def get_assignation(self): all_rawx = self.cs.all_services('rawx') all_rdir = self.cs.all_services('rdir', True) by_id = {_make_id(self.ns, 'rdir', x['addr']): x for x in all_rdir} for rawx in all_rawx: try: # Verify that there is no rdir linked resp = self.directory.list(RDIR_ACCT, rawx['addr'], service_type='rdir') rdir_host = _filter_rdir_host(resp) try: rawx['rdir'] = by_id[_make_id(self.ns, 'rdir', rdir_host)] except KeyError: self.logger.warn("rdir %s linked to rawx %s seems down", rdir_host, rawx['addr']) rawx['rdir'] = {"addr": rdir_host, "tags": dict()} by_id[_make_id(self.ns, 'rdir', rdir_host)] = rawx['rdir'] except NotFound: self.logger.info("No rdir linked to %s", rawx['addr']) return all_rawx, all_rdir def assign_all_rawx(self, max_per_rdir=None): """ Find a rdir service for all rawx that don't have one already. :param max_per_rdir: maximum number or rawx services that an rdir can be linked to :type max_per_rdir: `int` """ all_rawx = self.cs.all_services('rawx') all_rdir = self.cs.all_services('rdir', True) if len(all_rdir) <= 0: raise ServiceUnavailable("No rdir service found in %s" % self.ns) by_id = {_make_id(self.ns, 'rdir', x['addr']): x for x in all_rdir} for rawx in all_rawx: try: # Verify that there is no rdir linked resp = self.directory.list(RDIR_ACCT, rawx['addr'], service_type='rdir') rdir_host = _filter_rdir_host(resp) try: rawx['rdir'] = by_id[_make_id(self.ns, 'rdir', rdir_host)] except KeyError: self.logger.warn("rdir %s linked to rawx %s seems down", rdir_host, rawx['addr']) except (NotFound, ClientException): if rawx['score'] <= 0: self.logger.warn( "rawx %s has score %s, and thus cannot be" " affected a rdir (load balancer " "limitation)", rawx['addr'], rawx['score']) continue rdir = self._smart_link_rdir(rawx['addr'], all_rdir, max_per_rdir) n_bases = by_id[rdir]['tags'].get("stat.opened_db_count", 0) by_id[rdir]['tags']["stat.opened_db_count"] = n_bases + 1 rawx['rdir'] = by_id[rdir] return all_rawx def _smart_link_rdir(self, volume_id, all_rdir, max_per_rdir=None): """ Force the load balancer to avoid services that already host more bases than the average (or more than `max_per_rdir`) while selecting rdir services. """ opened_db = [ x['tags']['stat.opened_db_count'] for x in all_rdir if x['score'] > 0 ] if len(opened_db) <= 0: raise ServiceUnavailable("No valid rdir service found in %s" % self.ns) if not max_per_rdir: upper_limit = sum(opened_db) / float(len(opened_db)) else: upper_limit = max_per_rdir - 1 avoids = [ _make_id(self.ns, "rdir", x['addr']) for x in all_rdir if x['score'] > 0 and x['tags']['stat.opened_db_count'] > upper_limit ] known = [_make_id(self.ns, "rawx", volume_id)] try: polled = self._poll_rdir(avoid=avoids, known=known) except ClientException as exc: if exc.status != 481 or max_per_rdir: raise # Retry without `avoids`, hoping the next iteration will rebalance polled = self._poll_rdir(known=known) forced = { 'host': polled['addr'], 'type': 'rdir', 'seq': 1, 'args': "", 'id': polled['id'] } self.directory.force(RDIR_ACCT, volume_id, 'rdir', forced, autocreate=True) try: self.rdir.create(volume_id) except Exception as exc: self.logger.warn("Failed to create database for %s on %s: %s", volume_id, polled['addr'], exc) return polled['id'] def _poll_rdir(self, avoid=None, known=None): """Call the special rdir service pool (created if missing)""" try: svcs = self.cs.poll('__rawx_rdir', avoid=avoid, known=known) except ClientException as exc: if exc.status != 400: raise self.cs.lb.create_pool('__rawx_rdir', ((1, 'rawx'), (1, 'rdir'))) svcs = self.cs.poll('__rawx_rdir', avoid=avoid, known=known) for svc in svcs: # FIXME: we should include the service type in a dedicated field if 'rdir' in svc['id']: return svc raise ServerException("LB returned incoherent result: %s" % svcs)
class TestMeta2Indexing(BaseTestCase): def setUp(self): super(TestMeta2Indexing, self).setUp() self.rdir_client = RdirClient(self.conf) self.directory_client = DirectoryClient(self.conf) self.container_client = ContainerClient(self.conf) self.containers = [random_str(14) for _ in range(0, randint(1, 10))] self.containers_svcs = {} self.event_agent_name = 'event-agent-1' def tearDown(self): super(TestMeta2Indexing, self).tearDown() self._containers_cleanup() self._service(self.event_agent_name, 'start', wait=3) def _containers_cleanup(self): for container in self.containers: self.container_client.container_delete(self.account, container) for svc in self.containers_svcs[container]: self.rdir_client.meta2_index_delete( volume_id=svc['host'], container_path="{0}/{1}/{2}".format( self.ns, self.account, container), container_id=cid_from_name(self.account, container)) def _filter_by_managing_svc(self, all_containers, svc_of_interest): """ Filters through the containers returning only those that have svc_of_interest in their list of managing services. """ containers_list = [] for key in all_containers.keys(): if svc_of_interest in [x['host'] for x in all_containers[key]]: containers_list.append(key) return sorted(containers_list) def test_volume_indexing_worker(self): """ Test steps: - Generate a list of container names and create them - Collect their respective meta2 servers - For each meta2 server: - Run a meta2 indexing worker - List all rdir index records and match then with the services we're expecting. :return: """ self._service(self.event_agent_name, "stop", wait=3) for container in self.containers: self.container_client.container_create(account=self.account, reference=container) for container in self.containers: self.containers_svcs[container] = [ x for x in self.directory_client.list(account=self.account, reference=container)['srv'] if x['type'] == 'meta2' ] meta2_data_paths = {} for svc in self.conf['services']['meta2']: svc_host = svc.get('service_id', svc['addr']) meta2_data_paths[svc_host] = svc['path'] distinct_meta2_servers = set() for svc_list in self.containers_svcs.values(): for svc in svc_list: distinct_meta2_servers.add(svc['host']) for svc in distinct_meta2_servers: expected_containers = self._filter_by_managing_svc( self.containers_svcs, svc) worker = Meta2IndexingWorker(meta2_data_paths[svc], self.conf) worker.crawl_volume() indexed_containers = sorted([ x['container_url'].split('/')[-1] for x in self.rdir_client.meta2_index_fetch_all(volume_id=svc) ]) for cont in expected_containers: self.assertIn(cont, indexed_containers)
class TestDirectoryAPI(BaseTestCase): def setUp(self): super(TestDirectoryAPI, self).setUp() self.api = DirectoryClient({'namespace': self.ns}, endpoint=self.uri) def _create(self, name, metadata=None): return self.api.create(self.account, name, properties=metadata) def _delete(self, name): self.api.delete(self.account, name) def _clean(self, name, clear=False): if clear: # must clean properties before self.api.del_properties(self.account, name, []) self._delete(name) def _get_properties(self, name, properties=None): return self.api.get_properties( self.account, name, properties=properties) def _set_properties(self, name, properties=None): return self.api.set_properties( self.account, name, properties=properties) def test_list(self): # get on unknown reference name = random_str(32) self.assertRaises(exc.NotFound, self.api.list, self.account, name) self._create(name) # get on existing reference res = self.api.list(self.account, name) self.assertIsNot(res['dir'], None) self.assertIsNot(res['srv'], None) self._delete(name) # get on deleted reference self.assertRaises(exc.NotFound, self.api.list, self.account, name) def test_create(self): name = random_str(32) res = self._create(name) self.assertEqual(res, True) # second create res = self._create(name) self.assertEqual(res, False) # clean self._delete(name) def test_create_properties(self): name = random_str(32) metadata = { random_str(32): random_str(32), random_str(32): random_str(32), } res = self._create(name, metadata) self.assertEqual(res, True) data = self._get_properties(name) self.assertEqual(data['properties'], metadata) # clean self._clean(name, True) def test_create_without_account(self): account = random_str(32) name = random_str(32) account_client = AccountClient(self.conf) self.assertRaises(exc.NotFound, account_client.account_show, account) self.api.create(account, name) time.sleep(0.5) # ensure account event have been processed self.assertEqual(account_client.account_show(account)['id'], account) # clean self.api.delete(account, name) account_client.account_delete(account) def test_delete(self): name = random_str(32) # delete on unknown reference self.assertRaises(exc.NotFound, self.api.delete, self.account, name) res = self._create(name) self.assertEqual(res, True) # delete on existing reference self._delete(name) # verify deleted self.assertRaises(exc.NotFound, self.api.list, self.account, name) # second delete self.assertRaises(exc.NotFound, self.api.delete, self.account, name) # verify deleted self.assertRaises(exc.NotFound, self.api.list, self.account, name) def test_get_properties(self): name = random_str(32) # get_properties on unknown reference self.assertRaises( exc.NotFound, self.api.get_properties, self.account, name) res = self._create(name) self.assertEqual(res, True) # get_properties on existing reference data = self.api.get_properties(self.account, name) self.assertEqual(data['properties'], {}) # get_properties metadata = { random_str(32): random_str(32), random_str(32): random_str(32), } self._set_properties(name, metadata) data = self.api.get_properties(self.account, name) self.assertEqual(data['properties'], metadata) # get_properties specify key key = metadata.keys().pop(0) data = self.api.get_properties(self.account, name, [key]) self.assertEqual(data['properties'], {key: metadata[key]}) # clean self._clean(name, True) # get_properties on deleted reference self.assertRaises( exc.NotFound, self.api.get_properties, self.account, name) def test_set_properties(self): name = random_str(32) metadata = { random_str(32): random_str(32), random_str(32): random_str(32), } # set_properties on unknown reference self.assertRaises( exc.NotFound, self.api.set_properties, self.account, name, metadata) res = self._create(name) self.assertEqual(res, True) # set_properties on existing reference self.api.set_properties(self.account, name, metadata) data = self._get_properties(name) self.assertEqual(data['properties'], metadata) # set_properties key = random_str(32) value = random_str(32) metadata2 = {key: value} self._set_properties(name, metadata2) metadata.update(metadata2) data = self._get_properties(name) self.assertEqual(data['properties'], metadata) # set_properties overwrite key key = metadata.keys().pop(0) value = random_str(32) metadata3 = {key: value} metadata.update(metadata3) self.api.set_properties(self.account, name, metadata3) data = self._get_properties(name) self.assertEqual(data['properties'], metadata) # clean self._clean(name, True) # set_properties on deleted reference self.assertRaises( exc.NotFound, self.api.set_properties, self.account, name, metadata) def test_del_properties(self): name = random_str(32) metadata = { random_str(32): random_str(32), random_str(32): random_str(32), } # del_properties on unknown reference self.assertRaises( exc.NotFound, self.api.del_properties, self.account, name, []) res = self._create(name, metadata) self.assertEqual(res, True) key = metadata.keys().pop() del metadata[key] # del_properties on existing reference self.api.del_properties(self.account, name, [key]) data = self._get_properties(name) self.assertEqual(data['properties'], metadata) # del_properties on unknown key key = random_str(32) # We do not check if a property exists before deleting it # self.assertRaises( # exc.NotFound, self.api.del_properties, self.account, name, # [key]) self.api.del_properties(self.account, name, [key]) data = self._get_properties(name) self.assertEqual(data['properties'], metadata) # clean self._clean(name, True) # del_properties on deleted reference self.assertRaises( exc.NotFound, self.api.set_properties, self.account, name, metadata) def test_list_services(self): # list_services on unknown reference name = random_str(32) echo = 'echo' self.assertRaises( exc.NotFound, self.api.list, self.account, name, service_type=echo) self._create(name) # list_services on existing reference res = self.api.list(self.account, name, service_type=echo) self.assertIsNot(res['dir'], None) self.assertIsNot(res['srv'], None) self._delete(name) # get on deleted reference self.assertRaises(exc.NotFound, self.api.list, self.account, name) def test_rdir_linking_old(self): """ Tests that rdir services linked to rawx services are not on the same locations """ self.skipTest('Deprecated way of linking rdir services') self._reload_proxy() cs = ConscienceClient({'namespace': self.ns}) rawx_list = cs.all_services('rawx') rdir_dict = {x['addr']: x for x in cs.all_services('rdir')} # Link the services for rawx in rawx_list: self.api.link('_RDIR_TEST', rawx['addr'], 'rdir', autocreate=True) # Do the checks for rawx in rawx_list: linked_rdir = self.api.list( '_RDIR_TEST', rawx['addr'], service_type='rdir')['srv'] rdir = rdir_dict[linked_rdir[0]['host']] rawx_loc = rawx['tags'].get('tag.loc') rdir_loc = rdir['tags'].get('tag.loc') self.assertNotEqual(rawx_loc, rdir_loc) # Unlink the services for rawx in rawx_list: self.api.unlink('_RDIR_TEST', rawx['addr'], 'rdir') self.api.delete('_RDIR_TEST', rawx['addr']) def test_link_rdir_to_zero_scored_rawx(self): client = RdirClient({'namespace': self.ns}) disp = RdirDispatcher({'namespace': self.ns}) # Register a service, with score locked to zero new_rawx = self._srv('rawx', {'tag.loc': 'whatever'}) new_rawx['score'] = 0 self._register_srv(new_rawx) self._reload_proxy() all_rawx = disp.assign_all_rawx() all_rawx_keys = [x['addr'] for x in all_rawx] self.assertIn(new_rawx['addr'], all_rawx_keys) rdir_addr = client._get_rdir_addr(new_rawx['addr']) self.assertIsNotNone(rdir_addr) try: self.api.unlink('_RDIR', new_rawx['addr'], 'rdir') self.api.delete('_RDIR', new_rawx['addr']) # self._flush_cs('rawx') except Exception: pass def test_rdir_repartition(self): client = RdirDispatcher({'namespace': self.ns}) self._reload_proxy() all_rawx = client.assign_all_rawx() self.assertGreater(len(all_rawx), 0) by_rdir = dict() total = 0 for rawx in all_rawx: count = by_rdir.get(rawx['rdir']['addr'], 0) total += 1 by_rdir[rawx['rdir']['addr']] = count + 1 avg = total / float(len(by_rdir)) print("Ideal number of bases per rdir: ", avg) print("Current repartition: ", by_rdir) for count in by_rdir.itervalues(): self.assertLessEqual(count, avg + 1)
class TestDirectoryAPI(BaseTestCase): def setUp(self): super(TestDirectoryAPI, self).setUp() self.api = DirectoryClient({'namespace': self.ns}, endpoint=self.uri) def _create(self, name, metadata=None): return self.api.create(self.account, name, properties=metadata) def _delete(self, name): self.api.delete(self.account, name) def _clean(self, name, clear=False): if clear: # must clean properties before self.api.del_properties(self.account, name, []) self._delete(name) def _get_properties(self, name, properties=None): return self.api.get_properties(self.account, name, properties=properties) def _set_properties(self, name, properties=None): return self.api.set_properties(self.account, name, properties=properties) def test_list(self): # get on unknown reference name = random_str(32) self.assertRaises(exc.NotFound, self.api.list, self.account, name) self._create(name) # get on existing reference res = self.api.list(self.account, name) self.assertIsNot(res['dir'], None) self.assertIsNot(res['srv'], None) self.assertEqual(res['name'], name) self.assertEqual(res['account'], self.account) self._delete(name) # get on deleted reference self.assertRaises(exc.NotFound, self.api.list, self.account, name) def test_show_by_cid(self): name = random_str(32) self._create(name) res = self.api.list(cid=cid_from_name(self.account, name)) self.assertIsNotNone(res['dir']) self.assertIsNotNone(res['srv']) self.assertEqual(res['name'], name) self.assertEqual(res['account'], self.account) self._delete(name) def test_create(self): name = random_str(32) res = self._create(name) self.assertEqual(res, True) # second create res = self._create(name) self.assertEqual(res, False) # clean self._delete(name) def test_create_properties(self): name = random_str(32) metadata = { random_str(32): random_str(32), random_str(32): random_str(32), } res = self._create(name, metadata) self.assertEqual(res, True) data = self._get_properties(name) self.assertEqual(data['properties'], metadata) # clean self._clean(name, True) def test_create_without_account(self): account = random_str(32) name = random_str(32) account_client = AccountClient(self.conf) self.assertRaises(exc.NotFound, account_client.account_show, account) self.api.create(account, name) time.sleep(0.5) # ensure account event have been processed self.assertEqual(account_client.account_show(account)['id'], account) # clean self.api.delete(account, name) account_client.account_delete(account) def test_delete(self): name = random_str(32) # delete on unknown reference self.assertRaises(exc.NotFound, self.api.delete, self.account, name) res = self._create(name) self.assertEqual(res, True) # delete on existing reference self._delete(name) # verify deleted self.assertRaises(exc.NotFound, self.api.list, self.account, name) # second delete self.assertRaises(exc.NotFound, self.api.delete, self.account, name) # verify deleted self.assertRaises(exc.NotFound, self.api.list, self.account, name) def test_get_properties(self): name = random_str(32) # get_properties on unknown reference self.assertRaises(exc.NotFound, self.api.get_properties, self.account, name) res = self._create(name) self.assertEqual(res, True) # get_properties on existing reference data = self.api.get_properties(self.account, name) self.assertEqual(data['properties'], {}) # get_properties metadata = { random_str(32): random_str(32), random_str(32): random_str(32), } self._set_properties(name, metadata) data = self.api.get_properties(self.account, name) self.assertEqual(data['properties'], metadata) # get_properties specify key key, old_val = metadata.popitem() data = self.api.get_properties(self.account, name, [key]) self.assertEqual(data['properties'], {key: old_val}) # clean self._clean(name, True) # get_properties on deleted reference self.assertRaises(exc.NotFound, self.api.get_properties, self.account, name) def test_set_properties(self): name = random_str(32) metadata = { random_str(32): random_str(32), random_str(32): random_str(32), } # set_properties on unknown reference self.assertRaises(exc.NotFound, self.api.set_properties, self.account, name, metadata) res = self._create(name) self.assertEqual(res, True) # set_properties on existing reference self.api.set_properties(self.account, name, metadata) data = self._get_properties(name) self.assertEqual(data['properties'], metadata) # set_properties key = random_str(32) value = random_str(32) metadata2 = {key: value} self._set_properties(name, metadata2) metadata.update(metadata2) data = self._get_properties(name) self.assertEqual(data['properties'], metadata) # set_properties overwrite key key, _ = metadata.popitem() value = random_str(32) metadata3 = {key: value} metadata.update(metadata3) self.api.set_properties(self.account, name, metadata3) data = self._get_properties(name) self.assertEqual(data['properties'], metadata) # set_properties overwrite key with empty value key = list(metadata.keys())[0] metadata4 = {key: ''} del metadata[key] self.api.set_properties(self.account, name, metadata4) data = self._get_properties(name) self.assertEqual(data['properties'], metadata) # clean self._clean(name, True) # set_properties on deleted reference self.assertRaises(exc.NotFound, self.api.set_properties, self.account, name, metadata) def test_del_properties(self): name = random_str(32) metadata = { random_str(32): random_str(32), random_str(32): random_str(32), } # del_properties on unknown reference self.assertRaises(exc.NotFound, self.api.del_properties, self.account, name, []) res = self._create(name, metadata) self.assertEqual(res, True) key, _ = metadata.popitem() # del_properties on existing reference self.api.del_properties(self.account, name, [key]) data = self._get_properties(name) self.assertEqual(data['properties'], metadata) # del_properties on unknown key key = random_str(32) # We do not check if a property exists before deleting it # self.assertRaises( # exc.NotFound, self.api.del_properties, self.account, name, # [key]) self.api.del_properties(self.account, name, [key]) data = self._get_properties(name) self.assertEqual(data['properties'], metadata) # clean self._clean(name, True) # del_properties on deleted reference self.assertRaises(exc.NotFound, self.api.set_properties, self.account, name, metadata) def test_list_services(self): # list_services on unknown reference name = random_str(32) echo = 'echo' self.assertRaises(exc.NotFound, self.api.list, self.account, name, service_type=echo) self._create(name) # list_services on existing reference res = self.api.list(self.account, name, service_type=echo) self.assertIsNot(res['dir'], None) self.assertIsNot(res['srv'], None) self.assertEqual(res['name'], name) self.assertEqual(res['account'], self.account) self._delete(name) # get on deleted reference self.assertRaises(exc.NotFound, self.api.list, self.account, name) def test_link_rdir_to_zero_scored_rawx(self): disp = RdirDispatcher({'namespace': self.ns}, pool_manager=self.http_pool) # Register a service, with score locked to zero new_rawx = self._srv('rawx', {'tag.loc': _fake_location}) new_rawx['score'] = 0 self._register_srv(new_rawx) self._reload_proxy() all_rawx = disp.assign_all_rawx() all_rawx_keys = [x['addr'] for x in all_rawx] self.assertIn(new_rawx['addr'], all_rawx_keys) rdir_addr = disp.rdir._get_rdir_addr(new_rawx['addr']) self.assertIsNotNone(rdir_addr) try: self.api.unlink(RDIR_ACCT, new_rawx['addr'], 'rdir') self.api.delete(RDIR_ACCT, new_rawx['addr']) # self._flush_cs('rawx') except Exception: pass def test_link_rdir_unachievable_min_dist(self): disp = RdirDispatcher({'namespace': self.ns}, pool_manager=self.http_pool) # Register a service, with score locked to zero new_rawx = self._srv('rawx', {'tag.loc': _fake_location}) new_rawx['score'] = 90 self._register_srv(new_rawx) self._reload_proxy() self.assertRaises(exc.OioException, disp.assign_all_rawx, min_dist=4) all_rawx, _ = disp.get_assignments('rawx') all_rawx_keys = [x['addr'] for x in all_rawx] self.assertIn(new_rawx['addr'], all_rawx_keys) self.assertRaises(exc.VolumeException, disp.rdir._get_rdir_addr, new_rawx['addr']) def _generate_services(self, types, score=50): all_srvs = dict() for type_, count in types.items(): srvs = [ self._srv(type_, {'tag.loc': 'whatever%d' % i}) for i in range(count) ] for srv in srvs: srv['score'] = score srv['id'] = _make_id(self.ns, type_, srv['addr']) all_srvs[type_] = srvs return all_srvs def _test_link_rdir_fail_to_force(self, side_effects, expected_exc): disp = RdirDispatcher({'namespace': self.ns}) # Mock rdir and rawx services so we do not pollute following tests all_srvs = self._generate_services({'rdir': 3, 'rawx': 3}) def _all_services(type_, *args, **kwargs): """Return all mocked services of specified type""" return all_srvs[type_] def _poll(*args, **kwargs): """Pick one mocked random service""" return [random.choice(all_srvs['rdir'])] disp.cs.all_services = Mock(side_effect=_all_services) disp.cs.poll = Mock(side_effect=_poll) # Mock the check method to avoid calling the proxy disp.directory.list = Mock(side_effect=exc.NotFound) # Mock the assignation methods so we can check the calls disp._smart_link_rdir = \ Mock(wraps=disp._smart_link_rdir) disp.directory.force = \ Mock(wraps=disp.directory.force, side_effect=side_effects) # Expect an exception since some assignations will fail self.assertRaises(expected_exc, disp.assign_all_rawx, max_attempts=1) # But ensure all calls have been made link_calls = [ call(rawx['addr'], ANY, max_per_rdir=ANY, max_attempts=1, min_dist=ANY, service_type='rawx', reassign=False) for rawx in all_srvs['rawx'] ] disp._smart_link_rdir.assert_has_calls(link_calls) force_calls = \ [call(RDIR_ACCT, rawx['addr'], 'rdir', ANY, autocreate=True, replace=ANY) for rawx in all_srvs['rawx']] disp.directory.force.assert_has_calls(force_calls) def test_link_rdir_fail_to_force_one(self): """ Verify that the failure of one 'force' operation does not break the whole operation. """ self._test_link_rdir_fail_to_force( [exc.ServiceBusy(message='Failed :(', status=503), None, None], exc.ServiceBusy) def test_link_rdir_fail_to_force_several(self): """ Verify that the failure of two 'force' operations does not break the whole operation. """ self._test_link_rdir_fail_to_force([ exc.ServiceBusy(message='Failed :(', status=503), exc.OioTimeout('Timeout :('), None ], exc.OioException) def test_rdir_repartition(self): # FIXME(FVE): this test will fail if run after self._flush_cs('rawx') client = RdirDispatcher({'namespace': self.ns}) self._reload_proxy() all_rawx = client.assign_all_rawx() self.assertGreater(len(all_rawx), 0) by_rdir = dict() total = 0 for rawx in all_rawx: count = by_rdir.get(rawx['rdir']['addr'], 0) total += 1 by_rdir[rawx['rdir']['addr']] = count + 1 avg = total / float(len(by_rdir)) print("Ideal number of bases per rdir: ", avg) print("Current repartition: ", by_rdir) for count in by_rdir.values(): self.assertLessEqual(count, avg + 1)
class RdirDispatcher(object): def __init__(self, conf, **kwargs): self.conf = conf self.ns = conf['namespace'] self.logger = get_logger(conf) self.directory = DirectoryClient(conf, logger=self.logger, **kwargs) self.rdir = RdirClient(conf, logger=self.logger, **kwargs) self._cs = None @property def cs(self): if not self._cs: self._cs = ConscienceClient(self.conf, logger=self.logger) return self._cs def get_assignation(self, **kwargs): all_rawx = self.cs.all_services('rawx', **kwargs) all_rdir = self.cs.all_services('rdir', True, **kwargs) by_id = {_make_id(self.ns, 'rdir', x['addr']): x for x in all_rdir} for rawx in all_rawx: try: # Verify that there is no rdir linked resp = self.directory.list(RDIR_ACCT, rawx['addr'], service_type='rdir', **kwargs) rdir_host = _filter_rdir_host(resp) try: rawx['rdir'] = by_id[_make_id(self.ns, 'rdir', rdir_host)] except KeyError: self.logger.warn("rdir %s linked to rawx %s seems down", rdir_host, rawx['addr']) rawx['rdir'] = {"addr": rdir_host, "tags": dict()} by_id[_make_id(self.ns, 'rdir', rdir_host)] = rawx['rdir'] except NotFound: self.logger.info("No rdir linked to %s", rawx['addr']) return all_rawx, all_rdir def assign_all_rawx(self, max_per_rdir=None, **kwargs): """ Find a rdir service for all rawx that don't have one already. :param max_per_rdir: maximum number or rawx services that an rdir can be linked to :type max_per_rdir: `int` """ all_rawx = self.cs.all_services('rawx', **kwargs) all_rdir = self.cs.all_services('rdir', True, **kwargs) if len(all_rdir) <= 0: raise ServiceUnavailable("No rdir service found in %s" % self.ns) by_id = {_make_id(self.ns, 'rdir', x['addr']): x for x in all_rdir} for rawx in all_rawx: try: # Verify that there is no rdir linked resp = self.directory.list(RDIR_ACCT, rawx['addr'], service_type='rdir', **kwargs) rdir_host = _filter_rdir_host(resp) try: rawx['rdir'] = by_id[_make_id(self.ns, 'rdir', rdir_host)] except KeyError: self.logger.warn("rdir %s linked to rawx %s seems down", rdir_host, rawx['addr']) except (NotFound, ClientException): rdir = self._smart_link_rdir(rawx['addr'], all_rdir, max_per_rdir, **kwargs) n_bases = by_id[rdir]['tags'].get("stat.opened_db_count", 0) by_id[rdir]['tags']["stat.opened_db_count"] = n_bases + 1 rawx['rdir'] = by_id[rdir] return all_rawx def _smart_link_rdir(self, volume_id, all_rdir, max_per_rdir=None, **kwargs): """ Force the load balancer to avoid services that already host more bases than the average (or more than `max_per_rdir`) while selecting rdir services. """ opened_db = [ x['tags']['stat.opened_db_count'] for x in all_rdir if x['score'] > 0 ] if len(opened_db) <= 0: raise ServiceUnavailable("No valid rdir service found in %s" % self.ns) if not max_per_rdir: upper_limit = sum(opened_db) / float(len(opened_db)) else: upper_limit = max_per_rdir - 1 avoids = [ _make_id(self.ns, "rdir", x['addr']) for x in all_rdir if x['score'] > 0 and x['tags']['stat.opened_db_count'] > upper_limit ] known = [_make_id(self.ns, "rawx", volume_id)] try: polled = self._poll_rdir(avoid=avoids, known=known, **kwargs) except ClientException as exc: if exc.status != 481 or max_per_rdir: raise # Retry without `avoids`, hoping the next iteration will rebalance polled = self._poll_rdir(known=known, **kwargs) # Associate the rdir to the rawx forced = { 'host': polled['addr'], 'type': 'rdir', 'seq': 1, 'args': "", 'id': polled['id'] } max_attempts = 7 for i in range(max_attempts): try: self.directory.force(RDIR_ACCT, volume_id, 'rdir', forced, autocreate=True, **kwargs) break except ClientException as ex: # Already done done = (455, ) if ex.status in done: break if ex.message.startswith('META1 error: (SQLITE_CONSTRAINT) ' 'UNIQUE constraint failed'): self.logger.info("Ignored exception (already0): %s", ex) break if ex.message.startswith( 'META1 error: (SQLITE_CONSTRAINT) ' 'columns cid, srvtype, seq are not unique'): self.logger.info("Ignored exception (already1): %s", ex) break # Manage several unretriable errors retry = (406, 450, 503, 504) if ex.status >= 400 and ex.status not in retry: raise # Monotonic backoff (retriable and net erorrs) if i < max_attempts - 1: from time import sleep sleep(i * 1.0) continue # Too many attempts raise # Do the creation in the rdir itself try: self.rdir.create(volume_id, **kwargs) except Exception as exc: self.logger.warn("Failed to create database for %s on %s: %s", volume_id, polled['addr'], exc) return polled['id'] def _poll_rdir(self, avoid=None, known=None, **kwargs): """Call the special rdir service pool (created if missing)""" try: svcs = self.cs.poll('__rawx_rdir', avoid=avoid, known=known, **kwargs) except ClientException as exc: if exc.status != 400: raise self.cs.lb.create_pool('__rawx_rdir', ((1, JOKER_SVC_TARGET), (1, 'rdir')), **kwargs) svcs = self.cs.poll('__rawx_rdir', avoid=avoid, known=known, **kwargs) for svc in svcs: # FIXME: we should include the service type in a dedicated field if 'rdir' in svc['id']: return svc raise ServerException("LB returned incoherent result: %s" % svcs)
class TestDirectoryAPI(BaseTestCase): def setUp(self): super(TestDirectoryAPI, self).setUp() self.api = DirectoryClient({'namespace': self.ns}, endpoint=self.uri) def _create(self, name, metadata=None): return self.api.create(self.account, name, properties=metadata) def _delete(self, name): self.api.delete(self.account, name) def _clean(self, name, clear=False): if clear: # must clean properties before self.api.del_properties(self.account, name, []) self._delete(name) def _get_properties(self, name, properties=None): return self.api.get_properties(self.account, name, properties=properties) def _set_properties(self, name, properties=None): return self.api.set_properties(self.account, name, properties=properties) def test_list(self): # get on unknown reference name = random_str(32) self.assertRaises(exc.NotFound, self.api.list, self.account, name) self._create(name) # get on existing reference res = self.api.list(self.account, name) self.assertIsNot(res['dir'], None) self.assertIsNot(res['srv'], None) self._delete(name) # get on deleted reference self.assertRaises(exc.NotFound, self.api.list, self.account, name) def test_create(self): name = random_str(32) res = self._create(name) self.assertEqual(res, True) # second create res = self._create(name) self.assertEqual(res, False) # clean self._delete(name) def test_create_properties(self): name = random_str(32) metadata = { random_str(32): random_str(32), random_str(32): random_str(32), } res = self._create(name, metadata) self.assertEqual(res, True) data = self._get_properties(name) self.assertEqual(data['properties'], metadata) # clean self._clean(name, True) def test_delete(self): name = random_str(32) # delete on unknown reference self.assertRaises(exc.NotFound, self.api.delete, self.account, name) res = self._create(name) self.assertEqual(res, True) # delete on existing reference self._delete(name) # verify deleted self.assertRaises(exc.NotFound, self.api.list, self.account, name) # second delete self.assertRaises(exc.NotFound, self.api.delete, self.account, name) # verify deleted self.assertRaises(exc.NotFound, self.api.list, self.account, name) def test_get_properties(self): name = random_str(32) # get_properties on unknown reference self.assertRaises(exc.NotFound, self.api.get_properties, self.account, name) res = self._create(name) self.assertEqual(res, True) # get_properties on existing reference data = self.api.get_properties(self.account, name) self.assertEqual(data['properties'], {}) # get_properties metadata = { random_str(32): random_str(32), random_str(32): random_str(32), } self._set_properties(name, metadata) data = self.api.get_properties(self.account, name) self.assertEqual(data['properties'], metadata) # get_properties specify key key = metadata.keys().pop(0) data = self.api.get_properties(self.account, name, [key]) self.assertEqual(data['properties'], {key: metadata[key]}) # clean self._clean(name, True) # get_properties on deleted reference self.assertRaises(exc.NotFound, self.api.get_properties, self.account, name) def test_set_properties(self): name = random_str(32) metadata = { random_str(32): random_str(32), random_str(32): random_str(32), } # set_properties on unknown reference self.assertRaises(exc.NotFound, self.api.set_properties, self.account, name, metadata) res = self._create(name) self.assertEqual(res, True) # set_properties on existing reference self.api.set_properties(self.account, name, metadata) data = self._get_properties(name) self.assertEqual(data['properties'], metadata) # set_properties key = random_str(32) value = random_str(32) metadata2 = {key: value} self._set_properties(name, metadata2) metadata.update(metadata2) data = self._get_properties(name) self.assertEqual(data['properties'], metadata) # set_properties overwrite key key = metadata.keys().pop(0) value = random_str(32) metadata3 = {key: value} metadata.update(metadata3) self.api.set_properties(self.account, name, metadata3) data = self._get_properties(name) self.assertEqual(data['properties'], metadata) # clean self._clean(name, True) # set_properties on deleted reference self.assertRaises(exc.NotFound, self.api.set_properties, self.account, name, metadata) def test_del_properties(self): name = random_str(32) metadata = { random_str(32): random_str(32), random_str(32): random_str(32), } # del_properties on unknown reference self.assertRaises(exc.NotFound, self.api.del_properties, self.account, name, []) res = self._create(name, metadata) self.assertEqual(res, True) key = metadata.keys().pop() del metadata[key] # del_properties on existing reference self.api.del_properties(self.account, name, [key]) data = self._get_properties(name) self.assertEqual(data['properties'], metadata) # del_properties on unknown key key = random_str(32) # We do not check if a property exists before deleting it # self.assertRaises( # exc.NotFound, self.api.del_properties, self.account, name, # [key]) self.api.del_properties(self.account, name, [key]) data = self._get_properties(name) self.assertEqual(data['properties'], metadata) # clean self._clean(name, True) # del_properties on deleted reference self.assertRaises(exc.NotFound, self.api.set_properties, self.account, name, metadata) def test_list_services(self): # list_services on unknown reference name = random_str(32) echo = 'echo' self.assertRaises(exc.NotFound, self.api.list, self.account, name, service_type=echo) self._create(name) # list_services on existing reference res = self.api.list(self.account, name, service_type=echo) self.assertIsNot(res['dir'], None) self.assertIsNot(res['srv'], None) self._delete(name) # get on deleted reference self.assertRaises(exc.NotFound, self.api.list, self.account, name) def test_rdir_linking(self): """ Tests that rdir services linked to rawx services are not on the same locations """ self._reload() cs = ConscienceClient({'namespace': self.ns}) rawx_list = cs.all_services('rawx') rdir_dict = {x['addr']: x for x in cs.all_services('rdir')} # Link the services for rawx in rawx_list: self.api.link('_RDIR_TEST', rawx['addr'], 'rdir', autocreate=True) # Do the checks for rawx in rawx_list: linked_rdir = self.api.list('_RDIR_TEST', rawx['addr'], service_type='rdir')['srv'] rdir = rdir_dict[linked_rdir[0]['host']] rawx_loc = rawx['tags'].get('tag.loc') rdir_loc = rdir['tags'].get('tag.loc') self.assertNotEqual(rawx_loc, rdir_loc) # Unlink the services for rawx in rawx_list: self.api.unlink('_RDIR_TEST', rawx['addr'], 'rdir') self.api.delete('_RDIR_TEST', rawx['addr']) def test_rdir_repartition(self): client = RdirDispatcher({'namespace': self.ns}) all_rawx = client.assign_all_rawx() by_rdir = dict() total = 0 for rawx in all_rawx: count = by_rdir.get(rawx['rdir']['addr'], 0) total += 1 by_rdir[rawx['rdir']['addr']] = count + 1 avg = total / float(len(by_rdir)) print "Ideal number of bases per rdir: ", avg print "Current repartition: ", by_rdir for count in by_rdir.itervalues(): self.assertLessEqual(count, avg + 1)