Ejemplo n.º 1
0
 def _get_rain_addr(self, on_the_fly=False):
     try:
         rainx_instance = self.cs_client.next_instance("rainx")
         rainx_addr = "http://%s" % rainx_instance.get('addr')
     except Exception as e:
         self.logger.error("No rainx service found (%s)" % e.message)
         raise ServiceUnavailable("No rainx service found (%s)" % e.message)
     if on_the_fly:
         rainx_addr += "/on-the-fly"
     return rainx_addr
Ejemplo n.º 2
0
    def assign_services(self, service_type, max_per_rdir=None, **kwargs):
        all_services = self.cs.all_services(service_type, **kwargs)
        all_rdir = self.cs.all_services('rdir', True, **kwargs)
        if len(all_rdir) <= 0:
            raise ServiceUnavailable("No rdir service found in %s" % self.ns)

        by_id = {_make_id(self.ns, 'rdir', x['addr']): x for x in all_rdir}

        errors = list()
        for provider in all_services:
            provider_id = provider['tags'].get('tag.service_id',
                                               provider['addr'])

            try:
                resp = self.directory.list(RDIR_ACCT,
                                           provider_id,
                                           service_type='rdir',
                                           **kwargs)
                rdir_host = _filter_rdir_host(resp)
                try:
                    provider['rdir'] = by_id[_make_id(self.ns, 'rdir',
                                                      rdir_host)]
                except KeyError:
                    self.logger.warn("rdir %s linked to %s %s seems down",
                                     rdir_host, service_type, provider_id)
            except NotFound:
                try:
                    rdir = self._smart_link_rdir(provider_id,
                                                 all_rdir,
                                                 service_type=service_type,
                                                 max_per_rdir=max_per_rdir,
                                                 **kwargs)
                except OioException as exc:
                    self.logger.warn("Failed to link an rdir to %s %s: %s",
                                     service_type, provider_id, exc)
                    errors.append((provider_id, exc))
                    continue
                n_bases = by_id[rdir]['tags'].get("stat.opened_db_count", 0)
                by_id[rdir]['tags']["stat.opened_db_count"] = n_bases + 1
                provider['rdir'] = by_id[rdir]
            except OioException as exc:
                self.logger.warn(
                    "Failed to check rdir linked to %s %s "
                    "(thus won't try to make the link): %s", service_type,
                    provider_id, exc)
                errors.append((provider_id, exc))
        if errors:
            # group_chunk_errors is flexible enough to accept service addresses
            errors = group_chunk_errors(errors)
            if len(errors) == 1:
                err, addrs = errors.popitem()
                oio_reraise(type(err), err, str(addrs))
            else:
                raise OioException('Several errors encountered: %s' % errors)
        return all_services
Ejemplo n.º 3
0
 def _smart_link_rdir(self, volume_id, all_rdir, max_per_rdir=None):
     """
     Force the load balancer to avoid services that already host more
     bases than the average (or more than `max_per_rdir`)
     while selecting rdir services.
     """
     opened_db = [
         x['tags']['stat.opened_db_count'] for x in all_rdir
         if x['score'] > 0
     ]
     if len(opened_db) <= 0:
         raise ServiceUnavailable("No valid rdir service found in %s" %
                                  self.ns)
     if not max_per_rdir:
         upper_limit = sum(opened_db) / float(len(opened_db))
     else:
         upper_limit = max_per_rdir - 1
     avoids = [
         _make_id(self.ns, "rdir", x['addr']) for x in all_rdir if
         x['score'] > 0 and x['tags']['stat.opened_db_count'] > upper_limit
     ]
     known = [_make_id(self.ns, "rawx", volume_id)]
     try:
         polled = self._poll_rdir(avoid=avoids, known=known)
     except ClientException as exc:
         if exc.status != 481 or max_per_rdir:
             raise
         # Retry without `avoids`, hoping the next iteration will rebalance
         polled = self._poll_rdir(known=known)
     forced = {
         'host': polled['addr'],
         'type': 'rdir',
         'seq': 1,
         'args': "",
         'id': polled['id']
     }
     self.directory.force(RDIR_ACCT,
                          volume_id,
                          'rdir',
                          forced,
                          autocreate=True)
     try:
         self.rdir.create(volume_id)
     except Exception as exc:
         self.logger.warn("Failed to create database for %s on %s: %s",
                          volume_id, polled['addr'], exc)
     return polled['id']
Ejemplo n.º 4
0
    def assign_all_rawx(self, max_per_rdir=None):
        """
        Find a rdir service for all rawx that don't have one already.

        :param max_per_rdir: maximum number or rawx services that an rdir
                             can be linked to
        :type max_per_rdir: `int`
        """
        cs = ConscienceClient(self.conf)
        all_rawx = cs.all_services('rawx')
        all_rdir = cs.all_services('rdir', True)
        if len(all_rdir) <= 0:
            raise ServiceUnavailable("No rdir service found in %s" % self.ns)

        by_id = {_make_id(self.ns, 'rdir', x['addr']): x
                 for x in all_rdir}

        for rawx in all_rawx:
            try:
                # Verify that there is no rdir linked
                resp = self.directory.list(RDIR_ACCT, rawx['addr'],
                                           service_type='rdir')
                rdir_host = _filter_rdir_host(resp)
                try:
                    rawx['rdir'] = by_id[_make_id(self.ns, 'rdir', rdir_host)]
                except KeyError:
                    self.logger.warn("rdir %s linked to rawx %s seems down",
                                     rdir_host, rawx['addr'])
            except (NotFound, ClientException):
                if rawx['score'] <= 0:
                    self.logger.warn("rawx %s has score %s, and thus cannot be"
                                     " affected a rdir (load balancer "
                                     "limitation)",
                                     rawx['addr'], rawx['score'])
                    continue
                rdir = self._smart_link_rdir(rawx['addr'], cs, all_rdir,
                                             max_per_rdir)
                n_bases = by_id[rdir]['tags'].get("stat.opened_db_count", 0)
                by_id[rdir]['tags']["stat.opened_db_count"] = n_bases + 1
                rawx['rdir'] = by_id[rdir]
        return all_rawx
Ejemplo n.º 5
0
    def _smart_link_rdir(self, volume_id, all_rdir, max_per_rdir=None,
                         max_attempts=7, service_type='rawx', min_dist=None,
                         **kwargs):
        """
        Force the load balancer to avoid services that already host more
        bases than the average (or more than `max_per_rdir`)
        while selecting rdir services.
        """
        opened_db = [x['tags'].get('stat.opened_db_count', 0) for x in all_rdir
                     if x['score'] > 0]
        if len(opened_db) <= 0:
            raise ServiceUnavailable(
                "No valid rdir service found in %s" % self.ns)
        if not max_per_rdir:
            upper_limit = sum(opened_db) / float(len(opened_db))
        else:
            upper_limit = max_per_rdir - 1
        avoids = [_make_id(self.ns, "rdir", x['addr'])
                  for x in all_rdir
                  if x['score'] > 0 and
                  x['tags'].get('stat.opened_db_count', 0) > upper_limit]
        known = [_make_id(self.ns, service_type, volume_id)]
        try:
            polled = self._poll_rdir(avoid=avoids, known=known,
                                     min_dist=min_dist, **kwargs)
        except ClientException as exc:
            if exc.status != 481 or max_per_rdir:
                raise
            # Retry without `avoids`, hoping the next iteration will rebalance
            polled = self._poll_rdir(known=known, min_dist=min_dist, **kwargs)

        # Associate the rdir to the rawx
        forced = {'host': polled['addr'], 'type': 'rdir',
                  'seq': 1, 'args': "", 'id': polled['id']}
        for i in range(max_attempts):
            try:
                self.directory.force(RDIR_ACCT, volume_id, 'rdir',
                                     forced, autocreate=True, **kwargs)
                break
            except ClientException as ex:
                # Already done
                done = (455,)
                if ex.status in done:
                    break
                if ex.message.startswith(
                        'META1 error: (SQLITE_CONSTRAINT) '
                        'UNIQUE constraint failed'):
                    self.logger.info(
                        "Ignored exception (already0): %s", ex)
                    break
                if ex.message.startswith(
                        'META1 error: (SQLITE_CONSTRAINT) '
                        'columns cid, srvtype, seq are not unique'):
                    self.logger.info(
                        "Ignored exception (already1): %s", ex)
                    break
                # Manage several unretriable errors
                retry = (406, 450, 503, 504)
                if ex.status >= 400 and ex.status not in retry:
                    raise
                # Monotonic backoff (retriable and net erorrs)
                if i < max_attempts - 1:
                    sleep(i * 1.0)
                    continue
                # Too many attempts
                raise

        # Do the creation in the rdir itself
        try:
            self.rdir.create(volume_id, service_type=service_type, **kwargs)
        except Exception as exc:
            self.logger.warn("Failed to create database for %s on %s: %s",
                             volume_id, polled['addr'], exc)
        return polled['id']
Ejemplo n.º 6
0
    def assign_services(self,
                        service_type,
                        max_per_rdir=None,
                        min_dist=None,
                        service_id=None,
                        reassign=False,
                        **kwargs):
        """
        Assign an rdir service to all `service_type` servers that aren't
        already assigned one.

        :param max_per_rdir: Maximum number of services an rdir can handle.
        :type max_per_rdir: `int`
        :param min_dist: Minimum required distance between any service and
            its assigned rdir service.
        :type min_dist: `int`
        :param service_id: Assign only this service ID.
        :type service_id: `str`
        :param reassign: Reassign an rdir service.
        :type reassign: `bool`
        :param dry_run: Display actions but do nothing.
        :type dry_run: `bool`
        :returns: The list of `service_type` services that were assigned
            rdir services.
        """
        all_services = self.cs.all_services(service_type, **kwargs)
        if service_id:
            for provider in all_services:
                provider_id = provider['tags'].get('tag.service_id',
                                                   provider['addr'])
                if service_id == provider_id:
                    break
            else:
                raise ValueError('%s isn\'t a %s' % (service_id, service_type))
            all_services = [provider]
        all_rdir = self.cs.all_services('rdir', True, **kwargs)
        if len(all_rdir) <= 0:
            raise ServiceUnavailable("No rdir service found in %s" % self.ns)

        by_id = _build_dict_by_id(self.ns, all_rdir)

        errors = list()
        for provider in all_services:
            provider_id = provider['tags'].get('tag.service_id',
                                               provider['addr'])

            try:
                resp = self.directory.list(RDIR_ACCT,
                                           provider_id,
                                           service_type='rdir',
                                           **kwargs)
                rdir_host = _filter_rdir_host(resp)
                try:
                    rdir = by_id[_make_id(self.ns, 'rdir', rdir_host)]
                    if reassign:
                        rdir['tags']['stat.opened_db_count'] = \
                            rdir['tags'].get('stat.opened_db_count', 0) - 1
                        # TODO(adu) Delete database
                        raise NotFound('Reassign an rdir services')
                    provider['rdir'] = rdir
                except KeyError:
                    self.logger.warn("rdir %s linked to %s %s seems down",
                                     rdir_host, service_type, provider_id)
                    if reassign:
                        raise NotFound('Reassign an rdir services')
            except NotFound:
                try:
                    rdir = self._smart_link_rdir(provider_id,
                                                 all_rdir,
                                                 service_type=service_type,
                                                 max_per_rdir=max_per_rdir,
                                                 min_dist=min_dist,
                                                 reassign=reassign,
                                                 **kwargs)
                except OioException as exc:
                    self.logger.warn("Failed to link an rdir to %s %s: %s",
                                     service_type, provider_id, exc)
                    errors.append((provider_id, exc))
                    continue
                n_bases = by_id[rdir]['tags'].get("stat.opened_db_count", 0)
                by_id[rdir]['tags']["stat.opened_db_count"] = n_bases + 1
                provider['rdir'] = by_id[rdir]
            except OioException as exc:
                self.logger.warn(
                    "Failed to check rdir linked to %s %s "
                    "(thus won't try to make the link): %s", service_type,
                    provider_id, exc)
                errors.append((provider_id, exc))
        if errors:
            # group_chunk_errors is flexible enough to accept service addresses
            errors = group_chunk_errors(errors)
            if len(errors) == 1:
                err, addrs = errors.popitem()
                oio_reraise(type(err), err, str(addrs))
            else:
                raise OioException('Several errors encountered: %s' % errors)
        return all_services