class RdirDispatcher(object): def __init__(self, conf, rdir_client=None, **kwargs): self.conf = conf self.ns = conf['namespace'] self.logger = get_logger(conf) self.directory = DirectoryClient(conf, logger=self.logger, **kwargs) if rdir_client: self.rdir = rdir_client else: self.rdir = RdirClient(conf, logger=self.logger, **kwargs) self._cs = None self._pool_options = None @property def cs(self): if not self._cs: self._cs = ConscienceClient(self.conf, logger=self.logger, pool_manager=self.rdir.pool_manager) return self._cs def get_assignments(self, service_type, **kwargs): """ Get rdir assignments for all services of the specified type. :returns: a tuple with a list all services of the specified type, and a list of all rdir services. :rtype: `tuple<list<dict>,list<dict>>` """ all_services = self.cs.all_services(service_type, **kwargs) all_rdir = self.cs.all_services('rdir', True, **kwargs) by_id = {_make_id(self.ns, 'rdir', x['addr']): x for x in all_rdir} for service in all_services: try: ref = service.get('tags', {}).get('tag.service_id') resp = self.directory.list(RDIR_ACCT, ref or service['addr'], service_type='rdir', **kwargs) rdir_host = _filter_rdir_host(resp) try: service['rdir'] = by_id[ _make_id(self.ns, 'rdir', rdir_host)] except KeyError: self.logger.warn("rdir %s linked to %s %s seems down", rdir_host, service_type, service['addr']) service['rdir'] = {"addr": rdir_host, "tags": dict()} loc_rdir = service['rdir'] by_id[_make_id(self.ns, 'rdir', rdir_host)] = loc_rdir except NotFound: self.logger.info("No rdir linked to %s", service['addr']) except OioException as exc: self.logger.warn('Failed to get rdir linked to %s: %s', service['addr'], exc) return all_services, all_rdir def assign_services(self, service_type, max_per_rdir=None, min_dist=None, **kwargs): """ Assign an rdir service to all `service_type` servers that aren't already assigned one. :param max_per_rdir: Maximum number of services an rdir can handle. :type max_per_rdir: `int` :param min_dist: minimum required distance between any service and its assigned rdir service. :type min_dist: `int` :returns: The list of `service_type` services that were assigned rdir services. """ all_services = self.cs.all_services(service_type, **kwargs) all_rdir = self.cs.all_services('rdir', True, **kwargs) if len(all_rdir) <= 0: raise ServiceUnavailable("No rdir service found in %s" % self.ns) by_id = {_make_id(self.ns, 'rdir', x['addr']): x for x in all_rdir} errors = list() for provider in all_services: provider_id = provider['tags'].get('tag.service_id', provider['addr']) try: resp = self.directory.list(RDIR_ACCT, provider_id, service_type='rdir', **kwargs) rdir_host = _filter_rdir_host(resp) try: provider['rdir'] = by_id[_make_id(self.ns, 'rdir', rdir_host)] except KeyError: self.logger.warn("rdir %s linked to %s %s seems down", rdir_host, service_type, provider_id) except NotFound: try: rdir = self._smart_link_rdir(provider_id, all_rdir, service_type=service_type, max_per_rdir=max_per_rdir, min_dist=min_dist, **kwargs) except OioException as exc: self.logger.warn("Failed to link an rdir to %s %s: %s", service_type, provider_id, exc) errors.append((provider_id, exc)) continue n_bases = by_id[rdir]['tags'].get("stat.opened_db_count", 0) by_id[rdir]['tags']["stat.opened_db_count"] = n_bases + 1 provider['rdir'] = by_id[rdir] except OioException as exc: self.logger.warn("Failed to check rdir linked to %s %s " "(thus won't try to make the link): %s", service_type, provider_id, exc) errors.append((provider_id, exc)) if errors: # group_chunk_errors is flexible enough to accept service addresses errors = group_chunk_errors(errors) if len(errors) == 1: err, addrs = errors.popitem() oio_reraise(type(err), err, str(addrs)) else: raise OioException('Several errors encountered: %s' % errors) return all_services def assign_all_meta2(self, max_per_rdir=None, **kwargs): """ Assign an rdir service to all meta2 servers that aren't already assigned one. :param max_per_rdir: Maximum number of services an rdir can handle. :type max_per_rdir: `int` :returns: The list of meta2 that were assigned rdir services. """ return self.assign_services("meta2", max_per_rdir, **kwargs) def assign_all_rawx(self, max_per_rdir=None, **kwargs): """ Find an rdir service for all rawx that don't have one already. :param max_per_rdir: maximum number or rawx services that an rdir can be linked to :type max_per_rdir: `int` """ return self.assign_services("rawx", max_per_rdir, **kwargs) def _smart_link_rdir(self, volume_id, all_rdir, max_per_rdir=None, max_attempts=7, service_type='rawx', min_dist=None, **kwargs): """ Force the load balancer to avoid services that already host more bases than the average (or more than `max_per_rdir`) while selecting rdir services. """ opened_db = [x['tags'].get('stat.opened_db_count', 0) for x in all_rdir if x['score'] > 0] if len(opened_db) <= 0: raise ServiceUnavailable( "No valid rdir service found in %s" % self.ns) if not max_per_rdir: upper_limit = sum(opened_db) / float(len(opened_db)) else: upper_limit = max_per_rdir - 1 avoids = [_make_id(self.ns, "rdir", x['addr']) for x in all_rdir if x['score'] > 0 and x['tags'].get('stat.opened_db_count', 0) > upper_limit] known = [_make_id(self.ns, service_type, volume_id)] try: polled = self._poll_rdir(avoid=avoids, known=known, min_dist=min_dist, **kwargs) except ClientException as exc: if exc.status != 481 or max_per_rdir: raise # Retry without `avoids`, hoping the next iteration will rebalance polled = self._poll_rdir(known=known, min_dist=min_dist, **kwargs) # Associate the rdir to the rawx forced = {'host': polled['addr'], 'type': 'rdir', 'seq': 1, 'args': "", 'id': polled['id']} for i in range(max_attempts): try: self.directory.force(RDIR_ACCT, volume_id, 'rdir', forced, autocreate=True, **kwargs) break except ClientException as ex: # Already done done = (455,) if ex.status in done: break if ex.message.startswith( 'META1 error: (SQLITE_CONSTRAINT) ' 'UNIQUE constraint failed'): self.logger.info( "Ignored exception (already0): %s", ex) break if ex.message.startswith( 'META1 error: (SQLITE_CONSTRAINT) ' 'columns cid, srvtype, seq are not unique'): self.logger.info( "Ignored exception (already1): %s", ex) break # Manage several unretriable errors retry = (406, 450, 503, 504) if ex.status >= 400 and ex.status not in retry: raise # Monotonic backoff (retriable and net erorrs) if i < max_attempts - 1: sleep(i * 1.0) continue # Too many attempts raise # Do the creation in the rdir itself try: self.rdir.create(volume_id, service_type=service_type, **kwargs) except Exception as exc: self.logger.warn("Failed to create database for %s on %s: %s", volume_id, polled['addr'], exc) return polled['id'] def _create_special_pool(self, options=None, force=False, **kwargs): """ Create the special pool for rdir services. :param options: dictionary of custom options for the pool. :param force: overwrite the pool if it exists already. """ self.cs.lb.create_pool( '__rawx_rdir', ((1, JOKER_SVC_TARGET), (1, 'rdir')), options=options, force=force, **kwargs) def _poll_rdir(self, avoid=None, known=None, min_dist=None, **kwargs): """ Call the special rdir service pool (created if missing). :param min_dist: minimum distance to ensure between the known service and the selected rdir service. """ if not known or len(known) > 1: raise ValueError('There should be exactly one "known" service') options = dict() if min_dist is not None: options['min_dist'] = min_dist if options != self._pool_options: # Options have changed, overwrite the pool. self._pool_options = options self._create_special_pool(self._pool_options, force=True, **kwargs) try: svcs = self.cs.poll('__rawx_rdir', avoid=avoid, known=known, **kwargs) except ClientException as exc: if exc.status != 400: raise self._create_special_pool(self._pool_options, **kwargs) svcs = self.cs.poll('__rawx_rdir', avoid=avoid, known=known, **kwargs) for svc in svcs: # FIXME: we should include the service type in a dedicated field if 'rdir' in svc['id']: return svc raise ServerException("LB returned incoherent result: %s" % svcs)
class CheckMeta2(CheckService): account_name = "_meta2_probe" def __init__(self, namespace, **kwargs): ep_parts = ["http:/", load_namespace_conf(namespace).get('proxy'), "v3.0", namespace, "content"] super(CheckMeta2, self).__init__(namespace, "meta2", endpoint="/".join(ep_parts), **kwargs) self.account = AccountClient({"namespace": self.ns}) self.container = ContainerClient({"namespace": self.ns}) self.directory = DirectoryClient({"namespace": self.ns}) self.reference = random_buffer('0123456789ABCDEF', 64) def _get_params(self): path = random_buffer('0123456789ABCDEF', 64) return {'acct': self.account_name, 'ref': self.reference, 'path': path} def _compare_chunks(self, chunks1, chunks2): def light_chunks(chunks): new_chunks = [] for chunk in chunks: new_chunk = dict() new_chunk["url"] = chunk["url"] new_chunk["hash"] = chunk["hash"] new_chunks.append(new_chunk) return new_chunks try: chunks1 = light_chunks(chunks1) chunks1.sort() chunks2 = light_chunks(chunks2) chunks2.sort() return cmp(chunks1, chunks2) == 0 except TypeError: return False def _cycle(self, meta2_host): self.directory.unlink( account=self.account_name, reference=self.reference, service_type=self.service_type) service = {"host": meta2_host, "type": self.service_type, "args": "", "seq": 1} self.directory.force( account=self.account_name, reference=self.reference, service_type=self.service_type, services=service) params = self._get_params() global_success = True _, body, success = self._request( "GET", "/locate", params=params, expected_status=404) global_success &= success headers = {'X-oio-action-mode': 'autocreate'} _, body, success = self._request( "POST", "/prepare", params=params, headers=headers, json={"size": "1024"}, expected_status=200) global_success &= success chunks = body _, body, success = self._request( "GET", "/locate", params=params, expected_status=404) global_success &= success headers = {"x-oio-content-meta-length": "1024"} _, _, success = self._request( "POST", "/create", params=params, headers=headers, json=chunks, expected_status=204) global_success &= success _, body, success = self._request( "GET", "/locate", params=params, expected_status=200) global_success &= success success = self._compare_chunks(chunks, body) global_success &= success _, _, success = self._request( "POST", "/delete", params=params, expected_status=204) global_success &= success _, body, success = self._request( "GET", "/locate", params=params, expected_status=404) global_success &= success return global_success def run(self): try: self.container.container_create(account=self.account_name, reference=self.reference) super(CheckMeta2, self).run() self.container.container_delete(account=self.account_name, reference=self.reference) sleep(1) self.account.account_delete(self.account_name) except Exception as exc: print("Exception - " + str(exc))
class RdirDispatcher(object): def __init__(self, conf, **kwargs): self.conf = conf self.ns = conf['namespace'] self.logger = get_logger(conf) self.directory = DirectoryClient(conf, logger=self.logger, **kwargs) self.rdir = RdirClient(conf, logger=self.logger, **kwargs) self._cs = None @property def cs(self): if not self._cs: self._cs = ConscienceClient(self.conf, logger=self.logger) return self._cs def get_assignation(self): all_rawx = self.cs.all_services('rawx') all_rdir = self.cs.all_services('rdir', True) by_id = {_make_id(self.ns, 'rdir', x['addr']): x for x in all_rdir} for rawx in all_rawx: try: # Verify that there is no rdir linked resp = self.directory.list(RDIR_ACCT, rawx['addr'], service_type='rdir') rdir_host = _filter_rdir_host(resp) try: rawx['rdir'] = by_id[_make_id(self.ns, 'rdir', rdir_host)] except KeyError: self.logger.warn("rdir %s linked to rawx %s seems down", rdir_host, rawx['addr']) rawx['rdir'] = {"addr": rdir_host, "tags": dict()} by_id[_make_id(self.ns, 'rdir', rdir_host)] = rawx['rdir'] except NotFound: self.logger.info("No rdir linked to %s", rawx['addr']) return all_rawx, all_rdir def assign_all_rawx(self, max_per_rdir=None): """ Find a rdir service for all rawx that don't have one already. :param max_per_rdir: maximum number or rawx services that an rdir can be linked to :type max_per_rdir: `int` """ all_rawx = self.cs.all_services('rawx') all_rdir = self.cs.all_services('rdir', True) if len(all_rdir) <= 0: raise ServiceUnavailable("No rdir service found in %s" % self.ns) by_id = {_make_id(self.ns, 'rdir', x['addr']): x for x in all_rdir} for rawx in all_rawx: try: # Verify that there is no rdir linked resp = self.directory.list(RDIR_ACCT, rawx['addr'], service_type='rdir') rdir_host = _filter_rdir_host(resp) try: rawx['rdir'] = by_id[_make_id(self.ns, 'rdir', rdir_host)] except KeyError: self.logger.warn("rdir %s linked to rawx %s seems down", rdir_host, rawx['addr']) except (NotFound, ClientException): if rawx['score'] <= 0: self.logger.warn( "rawx %s has score %s, and thus cannot be" " affected a rdir (load balancer " "limitation)", rawx['addr'], rawx['score']) continue rdir = self._smart_link_rdir(rawx['addr'], all_rdir, max_per_rdir) n_bases = by_id[rdir]['tags'].get("stat.opened_db_count", 0) by_id[rdir]['tags']["stat.opened_db_count"] = n_bases + 1 rawx['rdir'] = by_id[rdir] return all_rawx def _smart_link_rdir(self, volume_id, all_rdir, max_per_rdir=None): """ Force the load balancer to avoid services that already host more bases than the average (or more than `max_per_rdir`) while selecting rdir services. """ opened_db = [ x['tags']['stat.opened_db_count'] for x in all_rdir if x['score'] > 0 ] if len(opened_db) <= 0: raise ServiceUnavailable("No valid rdir service found in %s" % self.ns) if not max_per_rdir: upper_limit = sum(opened_db) / float(len(opened_db)) else: upper_limit = max_per_rdir - 1 avoids = [ _make_id(self.ns, "rdir", x['addr']) for x in all_rdir if x['score'] > 0 and x['tags']['stat.opened_db_count'] > upper_limit ] known = [_make_id(self.ns, "rawx", volume_id)] try: polled = self._poll_rdir(avoid=avoids, known=known) except ClientException as exc: if exc.status != 481 or max_per_rdir: raise # Retry without `avoids`, hoping the next iteration will rebalance polled = self._poll_rdir(known=known) forced = { 'host': polled['addr'], 'type': 'rdir', 'seq': 1, 'args': "", 'id': polled['id'] } self.directory.force(RDIR_ACCT, volume_id, 'rdir', forced, autocreate=True) try: self.rdir.create(volume_id) except Exception as exc: self.logger.warn("Failed to create database for %s on %s: %s", volume_id, polled['addr'], exc) return polled['id'] def _poll_rdir(self, avoid=None, known=None): """Call the special rdir service pool (created if missing)""" try: svcs = self.cs.poll('__rawx_rdir', avoid=avoid, known=known) except ClientException as exc: if exc.status != 400: raise self.cs.lb.create_pool('__rawx_rdir', ((1, 'rawx'), (1, 'rdir'))) svcs = self.cs.poll('__rawx_rdir', avoid=avoid, known=known) for svc in svcs: # FIXME: we should include the service type in a dedicated field if 'rdir' in svc['id']: return svc raise ServerException("LB returned incoherent result: %s" % svcs)
class RdirDispatcher(object): def __init__(self, conf, **kwargs): self.conf = conf self.ns = conf['namespace'] self.logger = get_logger(conf) self.directory = DirectoryClient(conf, logger=self.logger, **kwargs) self.rdir = RdirClient(conf, logger=self.logger, **kwargs) self._cs = None @property def cs(self): if not self._cs: self._cs = ConscienceClient(self.conf, logger=self.logger) return self._cs def get_assignation(self, **kwargs): all_rawx = self.cs.all_services('rawx', **kwargs) all_rdir = self.cs.all_services('rdir', True, **kwargs) by_id = {_make_id(self.ns, 'rdir', x['addr']): x for x in all_rdir} for rawx in all_rawx: try: # Verify that there is no rdir linked resp = self.directory.list(RDIR_ACCT, rawx['addr'], service_type='rdir', **kwargs) rdir_host = _filter_rdir_host(resp) try: rawx['rdir'] = by_id[_make_id(self.ns, 'rdir', rdir_host)] except KeyError: self.logger.warn("rdir %s linked to rawx %s seems down", rdir_host, rawx['addr']) rawx['rdir'] = {"addr": rdir_host, "tags": dict()} by_id[_make_id(self.ns, 'rdir', rdir_host)] = rawx['rdir'] except NotFound: self.logger.info("No rdir linked to %s", rawx['addr']) return all_rawx, all_rdir def assign_all_rawx(self, max_per_rdir=None, **kwargs): """ Find a rdir service for all rawx that don't have one already. :param max_per_rdir: maximum number or rawx services that an rdir can be linked to :type max_per_rdir: `int` """ all_rawx = self.cs.all_services('rawx', **kwargs) all_rdir = self.cs.all_services('rdir', True, **kwargs) if len(all_rdir) <= 0: raise ServiceUnavailable("No rdir service found in %s" % self.ns) by_id = {_make_id(self.ns, 'rdir', x['addr']): x for x in all_rdir} for rawx in all_rawx: try: # Verify that there is no rdir linked resp = self.directory.list(RDIR_ACCT, rawx['addr'], service_type='rdir', **kwargs) rdir_host = _filter_rdir_host(resp) try: rawx['rdir'] = by_id[_make_id(self.ns, 'rdir', rdir_host)] except KeyError: self.logger.warn("rdir %s linked to rawx %s seems down", rdir_host, rawx['addr']) except (NotFound, ClientException): rdir = self._smart_link_rdir(rawx['addr'], all_rdir, max_per_rdir, **kwargs) n_bases = by_id[rdir]['tags'].get("stat.opened_db_count", 0) by_id[rdir]['tags']["stat.opened_db_count"] = n_bases + 1 rawx['rdir'] = by_id[rdir] return all_rawx def _smart_link_rdir(self, volume_id, all_rdir, max_per_rdir=None, **kwargs): """ Force the load balancer to avoid services that already host more bases than the average (or more than `max_per_rdir`) while selecting rdir services. """ opened_db = [ x['tags']['stat.opened_db_count'] for x in all_rdir if x['score'] > 0 ] if len(opened_db) <= 0: raise ServiceUnavailable("No valid rdir service found in %s" % self.ns) if not max_per_rdir: upper_limit = sum(opened_db) / float(len(opened_db)) else: upper_limit = max_per_rdir - 1 avoids = [ _make_id(self.ns, "rdir", x['addr']) for x in all_rdir if x['score'] > 0 and x['tags']['stat.opened_db_count'] > upper_limit ] known = [_make_id(self.ns, "rawx", volume_id)] try: polled = self._poll_rdir(avoid=avoids, known=known, **kwargs) except ClientException as exc: if exc.status != 481 or max_per_rdir: raise # Retry without `avoids`, hoping the next iteration will rebalance polled = self._poll_rdir(known=known, **kwargs) # Associate the rdir to the rawx forced = { 'host': polled['addr'], 'type': 'rdir', 'seq': 1, 'args': "", 'id': polled['id'] } max_attempts = 7 for i in range(max_attempts): try: self.directory.force(RDIR_ACCT, volume_id, 'rdir', forced, autocreate=True, **kwargs) break except ClientException as ex: # Already done done = (455, ) if ex.status in done: break if ex.message.startswith('META1 error: (SQLITE_CONSTRAINT) ' 'UNIQUE constraint failed'): self.logger.info("Ignored exception (already0): %s", ex) break if ex.message.startswith( 'META1 error: (SQLITE_CONSTRAINT) ' 'columns cid, srvtype, seq are not unique'): self.logger.info("Ignored exception (already1): %s", ex) break # Manage several unretriable errors retry = (406, 450, 503, 504) if ex.status >= 400 and ex.status not in retry: raise # Monotonic backoff (retriable and net erorrs) if i < max_attempts - 1: from time import sleep sleep(i * 1.0) continue # Too many attempts raise # Do the creation in the rdir itself try: self.rdir.create(volume_id, **kwargs) except Exception as exc: self.logger.warn("Failed to create database for %s on %s: %s", volume_id, polled['addr'], exc) return polled['id'] def _poll_rdir(self, avoid=None, known=None, **kwargs): """Call the special rdir service pool (created if missing)""" try: svcs = self.cs.poll('__rawx_rdir', avoid=avoid, known=known, **kwargs) except ClientException as exc: if exc.status != 400: raise self.cs.lb.create_pool('__rawx_rdir', ((1, JOKER_SVC_TARGET), (1, 'rdir')), **kwargs) svcs = self.cs.poll('__rawx_rdir', avoid=avoid, known=known, **kwargs) for svc in svcs: # FIXME: we should include the service type in a dedicated field if 'rdir' in svc['id']: return svc raise ServerException("LB returned incoherent result: %s" % svcs)