Beispiel #1
0
 def _smart_link_rdir(self, volume_id, cs=None, all_rdir=None):
     """
     Force the load balancer to avoid services that already host more
     bases than the average while selecting rdir services.
     """
     if not cs:
         cs = ConscienceClient(self.conf)
     if not all_rdir:
         all_rdir = cs.all_services("rdir", True)
     avail_base_count = [x["tags"]["stat.opened_db_count"] for x in all_rdir if x["score"] > 0]
     mean = sum(avail_base_count) / float(len(avail_base_count))
     avoids = [
         _make_id(self.ns, "rdir", x["addr"])
         for x in all_rdir
         if x["score"] > 0 and x["tags"]["stat.opened_db_count"] > mean
     ]
     known = [_make_id(self.ns, "rawx", volume_id)]
     try:
         polled = cs.poll("rdir", avoid=avoids, known=known)[0]
     except ClientException as exc:
         if exc.status != 481:
             raise
         # Retry without `avoids`, hoping the next iteration will rebalance
         polled = cs.poll("rdir", known=known)[0]
     forced = {"host": polled["addr"], "type": "rdir", "seq": 1, "args": "", "id": polled["id"]}
     self.directory.force(RDIR_ACCT, volume_id, "rdir", forced, autocreate=True)
     return polled["id"]
Beispiel #2
0
    def __init__(self, conf, service, **kwargs):
        self.conf = conf
        self.running = False

        for k in ['host', 'port', 'type']:
            if k not in service:
                raise Exception(
                    'Missing field "%s" in service configuration' % k)
        self.name = '%s|%s' % \
            (service['host'], service['port'])

        self.check_interval = float_value(conf.get('check_interval'), 1)
        self.service = service

        self.rise = int_value(conf.get('rise'), 1)
        self.fall = int_value(conf.get('fall'), 1)

        self.logger = get_logger(self.conf)
        self.cs = ConscienceClient(self.conf)
        self.client = Client(self.conf)
        self.last_status = False
        self.failed = False
        self.service_definition = {
            'ns': self.conf['namespace'],
            'type': self.service['type'],
            'addr': '%s:%s' % (self.service['host'], self.service['port']),
            'score': 0,
            'tags': {}}
        if self.service.get('location', None):
            self.service_definition['tags']['tag.loc'] = \
                    self.service['location']
        self.service_checks = list()
        self.service_stats = list()
        self.init_checkers(service)
        self.init_stats(service)
Beispiel #3
0
class Harasser(object):
    def __init__(self, ns, max_containers=256, max_contents=256):
        conf = {'namespace': ns}
        self.cs = ConscienceClient(conf)
        self.rdir = RdirClient(conf)
        self.rawx_list = [x['addr'] for x in self.cs.all_services('rawx')]
        self.sent = set()
        self.max_containers = max_containers
        self.max_contents = max_contents

    def harass_put(self, loops=None):
        if loops is None:
            loops = random.randint(1000, 2000)
        print "Pushing %d fake chunks" % loops
        loop = loops
        count_start_container = random.randrange(2**20)
        count_start_content = random.randrange(2**20)
        start = time.time()
        nb_rawx = len(self.rawx_list)
        while loop > 0:
            args = {'mtime': int(start)}
            # vol_id = random.choice(self.rawx_list)
            # container_id = "%064X" % (random.randrange(self.max_containers))
            # content_id = "%032X" % (random.randrange(self.max_contents))
            vol_id = self.rawx_list[loop % nb_rawx]
            container_id = "%064X" % (loop + count_start_container)
            content_id = "%032X" % (loop + count_start_content)
            chunk_id = "http://%s/%064X" \
                % (vol_id, random.randrange(2**128))
            self.rdir.chunk_push(
                vol_id, container_id, content_id, chunk_id, **args)
            self.sent.add((vol_id, container_id, content_id, chunk_id))
            loop -= 1
        end = time.time()
        print "%d pushed in %.3fs, %d req/s" \
            % (loops, end-start, loops/(end-start))

    def harass_del(self, min_loops=0):
        min_loops = min(min_loops, len(self.sent))
        loops = random.randint(min_loops, len(self.sent))
        print "Removing %d fake chunks" % loops
        loop = loops
        start = time.time()
        while loop > 0:
            args = self.sent.pop()
            self.rdir.chunk_delete(*args)
            loop -= 1
        end = time.time()
        print "%d removed in %.3fs, %d req/s" \
            % (loops, end-start, loops/(end-start))

    def __call__(self):
        try:
            while True:
                self.harass_put()
                self.harass_del()
        except KeyboardInterrupt:
            print "Cleaning..."
            self.harass_del(len(self.sent))
Beispiel #4
0
 def __init__(self, ns, max_containers=256, max_contents=256):
     conf = {'namespace': ns}
     self.cs = ConscienceClient(conf)
     self.rdir = RdirClient(conf)
     self.rawx_list = [x['addr'] for x in self.cs.all_services('rawx')]
     self.sent = set()
     self.max_containers = max_containers
     self.max_contents = max_contents
Beispiel #5
0
 def assign_all_rawx(self):
     """
     Find a rdir service for all rawx that don't have one already.
     """
     cs = ConscienceClient(self.conf)
     all_rawx = cs.all_services("rawx")
     all_rdir = cs.all_services("rdir", True)
     by_id = {_make_id(self.ns, "rdir", x["addr"]): x for x in all_rdir}
     for rawx in all_rawx:
         try:
             # Verify that there is no rdir linked
             resp = self.directory.get(RDIR_ACCT, rawx["addr"], service_type="rdir")
             rawx["rdir"] = by_id[_make_id(self.ns, "rdir", self._lookup_rdir_host(resp))]
         except (NotFound, ClientException):
             rdir = self._smart_link_rdir(rawx["addr"], cs, all_rdir)
             n_bases = by_id[rdir]["tags"].get("stat.opened_db_count", 0)
             by_id[rdir]["tags"]["stat.opened_db_count"] = n_bases + 1
             rawx["rdir"] = by_id[rdir]
     return all_rawx
Beispiel #6
0
 def __init__(self, conf, name, context, **kwargs):
     self.conf = conf
     self.name = name
     verbose = kwargs.pop("verbose", False)
     self.logger = get_logger(self.conf, verbose=verbose)
     self.init_zmq(context)
     self.cs = ConscienceClient(self.conf)
     self.rdir = RdirClient(self.conf)
     self._acct_addr = None
     self.acct_update = 0
     self.acct_refresh_interval = int_value(conf.get("acct_refresh_interval"), 60)
     self.acct_update = true_value(conf.get("acct_update", True))
     self.rdir_update = true_value(conf.get("rdir_update", True))
     self.session = requests.Session()
     self.failed = False
Beispiel #7
0
 def init(self):
     eventlet.monkey_patch(os=False)
     self.session = requests.Session()
     self.cs = ConscienceClient(self.conf)
     self.rdir = RdirClient(self.conf)
     self._acct_addr = None
     self.acct_update = 0
     self.graceful_timeout = 1
     self.acct_refresh_interval = int_value(
         self.conf.get('acct_refresh_interval'), 60
     )
     self.concurrency = int_value(self.conf.get('concurrency'), 1000)
     self.acct_update = true_value(self.conf.get('acct_update', True))
     self.rdir_update = true_value(self.conf.get('rdir_update', True))
     super(EventWorker, self).init()
Beispiel #8
0
 def test_rdir_linking(self):
     """
     Tests that rdir services linked to rawx services
     are not on the same locations
     """
     cs = ConscienceClient({'namespace': self.ns})
     rawx_list = cs.all_services('rawx')
     rdir_dict = {x['addr']: x for x in cs.all_services('rdir')}
     # Link the services
     for rawx in rawx_list:
         self.api.link('_RDIR_TEST', rawx['addr'], 'rdir',
                       autocreate=True)
     # Do the checks
     for rawx in rawx_list:
         linked_rdir = self.api.get(
             '_RDIR_TEST', rawx['addr'], service_type='rdir')['srv']
         rdir = rdir_dict[linked_rdir[0]['host']]
         rawx_loc = rawx['tags'].get('tag.loc')
         rdir_loc = rdir['tags'].get('tag.loc')
         self.assertNotEqual(rawx_loc, rdir_loc)
     # Unlink the services
     for rawx in rawx_list:
         self.api.unlink('_RDIR_TEST', rawx['addr'], 'rdir')
         self.api.delete('_RDIR_TEST', rawx['addr'])
Beispiel #9
0
 def init(self):
     eventlet.monkey_patch(os=False)
     self.tube = self.conf.get("tube", DEFAULT_TUBE)
     self.session = requests.Session()
     self.cs = ConscienceClient(self.conf)
     self.rdir = RdirClient(self.conf)
     self._acct_addr = None
     self.acct_update = 0
     self.graceful_timeout = 1
     self.acct_refresh_interval = int_value(
         self.conf.get('acct_refresh_interval'), 60
     )
     self.acct_update = true_value(self.conf.get('acct_update', True))
     self.rdir_update = true_value(self.conf.get('rdir_update', True))
     if 'handlers_conf' not in self.conf:
         raise ValueError("'handlers_conf' path not defined in conf")
     self.handlers = loadhandlers(
         self.conf.get('handlers_conf'), evt_types, app=self)
     super(EventWorker, self).init()
Beispiel #10
0
class AccountClient(Client):
    def __init__(self, conf, **kwargs):
        super(AccountClient, self).__init__(conf, **kwargs)
        self.cs = ConscienceClient(self.conf)

    # TODO keep account srv addr in local cache to avoid lookup requests
    def _get_account_addr(self):
        try:
            acct_instance = self.cs.next_instance('account')
            acct_addr = acct_instance.get('addr')
        except Exception:
            raise ClientException("No Account service found")
        return acct_addr

    def _make_uri(self, action):
        account_addr = self._get_account_addr()
        uri = 'http://%s/v1.0/account/%s' % (account_addr, action)
        return uri

    def _account_request(self, account, method, action, params={}):
        uri = self._make_uri(action)
        params['id'] = account
        resp, body = self._direct_request(method, uri, params=params)
        return resp, body

    def account_create(self, account):
        self._account_request(account, 'PUT', 'create')

    def account_delete(self, account):
        self._account_request(account, 'POST', 'delete')

    def containers_list(self, account, marker=None, limit=None):
        params = {}
        if marker is not None:
            params['marker'] = marker
        if limit is not None:
            params['limit'] = limit

        resp, body = self._account_request(account,
                                           'GET', 'containers', params)
        return body
Beispiel #11
0
class EventWorker(object):
    def __init__(self, conf, name, context, **kwargs):
        self.conf = conf
        self.name = name
        verbose = kwargs.pop('verbose', False)
        self.logger = get_logger(self.conf, verbose=verbose)
        self.init_zmq(context)
        self.cs = ConscienceClient(self.conf)
        self.rdir = RdirClient(self.conf)
        self._acct_addr = None
        self.acct_update = 0
        self.acct_refresh_interval = int_value(
            conf.get('acct_refresh_interval'), 60
        )
        self.acct_update = true_value(
            conf.get('acct_update', True))
        self.session = requests.Session()
        self.failed = False

    def start(self):
        self.logger.info('worker "%s" starting', self.name)
        self.running = True
        self.run()

    def stop(self):
        self.logger.info('worker "%s" stopping', self.name)
        self.running = False

    def init_zmq(self, context):
        socket = context.socket(zmq.REP)
        socket.connect('inproc://event-front')
        self.socket = socket

    def safe_ack(self, msg):
        try:
            self.socket.send_multipart(msg)
        except Exception:
            self.logger.warn('Unable to ack event')

    def run(self):
        try:
            while self.running:
                msg = self.socket.recv_multipart()
                self.logger.debug("msg received: %s" % msg)
                event = decode_msg(msg)
                success = self.process_event(event)
                f = "0" if success else ""
                self.safe_ack([msg[0], f])
        except Exception as e:
            self.logger.warn('ERROR in worker "%s"', e)
            self.failed = True
            raise e
        finally:
            self.logger.info('worker "%s" stopped', self.name)

    def process_event(self, event):
        handler = self.get_handler(event)
        if not handler:
            self.logger.warn("No handler found")
            # mark as success
            return True
        success = True
        try:
            handler(event)
        except Exception:
            success = False
        finally:
            return success

    def get_handler(self, event):
        event_type = event.get('event')
        if not event_type:
            return None

        if event_type == EventType.CONTAINER_PUT:
            return self.handle_container_put
        elif event_type == EventType.CONTAINER_DESTROY:
            return self.handle_container_destroy
        elif event_type == EventType.CONTAINER_UPDATE:
            return self.handle_container_update
        elif event_type == EventType.OBJECT_PUT:
            return self.handle_object_put
        elif event_type == EventType.OBJECT_DELETE:
            return self.handle_object_delete
        elif event_type == EventType.REFERENCE_UPDATE:
            return self.handle_reference_update
        elif event_type == EventType.CHUNK_PUT:
            return self.handle_chunk_put
        elif event_type == EventType.CHUNK_DELETE:
            return self.handle_chunk_delete
        elif event_type == EventType.PING:
            return self.handle_ping
        else:
            return None

    @property
    def acct_addr(self):
        if not self._acct_addr or self.acct_refresh():
            try:
                acct_instance = self.cs.next_instance(ACCOUNT_SERVICE)
                self._acct_addr = acct_instance.get('addr')
                self.acct_update = time.time()
            except Exception:
                self.logger.warn('Unable to find account instance')
        return self._acct_addr

    def acct_refresh(self):
        return (time.time() - self.acct_update) > self.acct_refresh_interval

    def handle_container_put(self, event):
        """
        Handle container creation.
        :param event:
        """
        self.logger.debug('worker "%s" handle container put', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'mtime': mtime, 'name': name}
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_container_update(self, event):
        """
        Handle container update.
        :param event:
        """
        self.logger.debug('worker "%s" handle container update', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = event.get('url').get('user')
        account = event.get('url').get('account')
        bytes_count = data.get('bytes-count', 0)
        object_count = data.get('object-count', 0)

        event = {
            'mtime': mtime,
            'name': name,
            'bytes': bytes_count,
            'objects': object_count
        }
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_container_destroy(self, event):
        """
        Handle container destroy.
        :param event:
        """
        self.logger.debug('worker "%s" handle container destroy', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        dtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'dtime': dtime, 'name': name}
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_object_delete(self, event):
        """
        Handle object deletion.
        Delete the chunks of the object.
        :param event:
        """
        self.logger.debug('worker "%s" handle object delete', self.name)
        pile = GreenPile(PARALLEL_CHUNKS_DELETE)

        chunks = []

        for item in event.get('data'):
            if item.get('type') == 'chunks':
                chunks.append(item)
        if not len(chunks):
            self.logger.warn('No chunks found in event data')
            return

        def delete_chunk(chunk):
            resp = None
            try:
                with Timeout(CHUNK_TIMEOUT):
                    resp = self.session.delete(chunk['id'])
            except (Exception, Timeout) as e:
                self.logger.exception(e)
            return resp

        for chunk in chunks:
            pile.spawn(delete_chunk, chunk)

        resps = [resp for resp in pile if resp]

        for resp in resps:
            if resp.status_code == 204:
                self.logger.info('deleted chunk %s' % resp.url)
            else:
                self.logger.warn('failed to delete chunk %s' % resp.url)

    def handle_object_put(self, event):
        """
        Handle object creation.
        TODO
        :param event:
        """
        self.logger.debug('worker "%s" handle object put', self.name)

    def handle_reference_update(self, event):
        """
        Handle reference update.
        TODO
        :param event
        """
        self.logger.debug('worker "%s" handle reference update', self.name)

    def handle_chunk_put(self, event):
        """
        Handle chunk creation.
        :param event
        """
        self.logger.debug('worker "%s" handle chunk creation', self.name)

        when = event.get('when')
        data = event.get('data')
        volume_id = data.get('volume_id')
        del data['volume_id']
        container_id = data.get('container_id')
        del data['container_id']
        content_id = data.get('content_id')
        del data['content_id']
        chunk_id = data.get('chunk_id')
        del data['chunk_id']
        data['mtime'] = when
        self.rdir.chunk_push(volume_id, container_id, content_id, chunk_id,
                             **data)

    def handle_chunk_delete(self, event):
        """
        Handle chunk deletion.
        :param event
        """
        self.logger.debug('worker "%s" handle chunk deletion', self.name)

        data = event.get('data')
        volume_id = data.get('volume_id')
        container_id = data.get('container_id')
        content_id = data.get('content_id')
        chunk_id = data.get('chunk_id')
        self.rdir.chunk_delete(volume_id, container_id, content_id, chunk_id)

    def handle_ping(self, event):
        """
        Handle ping
        :param event
        """
        self.logger.debug('worker "%s" handle ping', self.name)
Beispiel #12
0
class ContainerClient(ProxyClient):
    """
    Intermediate level class to manage containers.
    """
    def __init__(self, conf, refresh_rawx_scores_delay=30.0, **kwargs):
        super(ContainerClient, self).__init__(conf,
                                              request_prefix="/container",
                                              **kwargs)

        # to refresh the rawx scores from cache
        kwargs.pop('pool_manager', None)
        self.conscience_client = ConscienceClient(
            self.conf, pool_manager=self.pool_manager, **kwargs)
        self.rawx_scores = dict()
        self._refresh_rawx_scores_delay = refresh_rawx_scores_delay
        self._last_refresh_rawx_scores = 0.0

    def _make_uri(self, target):
        """
        Build URIs for request that don't use the same prefix as the one
        set in this class' constructor.
        """
        uri = '%s://%s/v3.0/%s/%s' % (self.proxy_scheme, self.proxy_netloc,
                                      self.ns, target)
        return uri

    def _make_params(self,
                     account=None,
                     reference=None,
                     path=None,
                     cid=None,
                     content=None,
                     version=None,
                     **kwargs):
        if cid:
            params = {'cid': cid}
        else:
            params = {'acct': account, 'ref': reference}
        if path:
            params.update({'path': path})
        if content:
            params.update({'content': content})
        if version:
            params.update({'version': version})
        return params

    def _get_rawx_scores(self):
        rawx_services = self.conscience_client.all_services('rawx')
        rawx_scores = dict()
        for rawx_service in rawx_services:
            rawx_scores[rawx_service['id']] = \
                rawx_service['score']
        return rawx_scores

    def _refresh_rawx_scores(self, now=None, **kwargs):
        """Refresh rawx service scores."""
        self.rawx_scores = self._get_rawx_scores()
        if not now:
            now = time.time()
        self._last_refresh_rawx_scores = now

    def _maybe_refresh_rawx_scores(self, **kwargs):
        """Refresh rawx service scores if delay has been reached."""
        if self._refresh_rawx_scores_delay >= 0.0 or not self.rawx_scores:
            now = time.time()
            if now - self._last_refresh_rawx_scores \
                    > self._refresh_rawx_scores_delay:
                try:
                    self._refresh_rawx_scores(now, **kwargs)
                except OioNetworkException as exc:
                    self.logger.warn(
                        "Failed to refresh rawx service scores: %s", exc)
                except Exception:
                    self.logger.exception(
                        "Failed to refresh rawx service scores")

    def container_create(self,
                         account,
                         reference,
                         properties=None,
                         system=None,
                         **kwargs):
        """
        Create a container.

        :param account: account in which to create the container
        :type account: `str`
        :param reference: name of the container
        :type reference: `str`
        :param properties: properties to set on the container
        :type properties: `dict`
        :param system: system properties to set on the container
        :type system: `dict`
        :keyword headers: extra headers to send to the proxy
        :type headers: `dict`
        :returns: True if the container has been created,
                  False if it already exists
        """
        params = self._make_params(account, reference)
        data = json.dumps({
            'properties': properties or {},
            'system': system or {}
        })
        resp, body = self._request('POST',
                                   '/create',
                                   params=params,
                                   data=data,
                                   **kwargs)
        if resp.status not in (204, 201):
            raise exceptions.from_response(resp, body)
        return resp.status == 201

    def container_create_many(self,
                              account,
                              containers,
                              properties=None,
                              **kwargs):
        """
        Create several containers.

        :param account: account in which to create the containers
        :type account: `str`
        :param containers: names of the containers
        :type containers: iterable of `str`
        :param properties: properties to set on the containers
        :type properties: `dict`
        :keyword headers: extra headers to send to the proxy
        :type headers: `dict`
        :returns: a list of tuples with the name of the container and
            a boolean telling if the container has been created
        :rtype: `list` of `tuple`
        """
        results = list()
        try:
            params = self._make_params(account)
            unformatted_data = list()
            for container in containers:
                unformatted_data.append({
                    'name': container,
                    'properties': properties or {},
                    'system': kwargs.get('system', {})
                })
            data = json.dumps({"containers": unformatted_data})
            resp, body = self._request('POST',
                                       '/create_many',
                                       params=params,
                                       data=data,
                                       **kwargs)
            if resp.status not in (204, 200):
                raise exceptions.from_response(resp, body)
            for container in body["containers"]:
                results.append((container["name"], container["status"] == 201))
            return results
        except exceptions.TooLarge:
            # Batch too large for the proxy
            pivot = len(containers) // 2
            head = containers[:pivot]
            tail = containers[pivot:]
            if head:
                results += self.container_create_many(account,
                                                      head,
                                                      properties=properties,
                                                      **kwargs)
            if tail:
                results += self.container_create_many(account,
                                                      tail,
                                                      properties=properties,
                                                      **kwargs)
            return results
        except exceptions.NotFound:
            # Batches not supported by the proxy
            for container in containers:
                try:
                    rc = self.container_create(account,
                                               container,
                                               properties=properties,
                                               **kwargs)
                    results.append((container, rc))
                except Exception:
                    results.append((container, False))
            return results

    def container_delete(self,
                         account=None,
                         reference=None,
                         cid=None,
                         **kwargs):
        """
        Delete a container.

        :param account: account from which to delete the container
        :type account: `str`
        :param reference: name of the container
        :type reference: `str`
        :param cid: container id that can be used instead of account
            and reference
        :type cid: `str`
        :keyword headers: extra headers to send to the proxy
        :type headers: `dict`
        """
        params = self._make_params(account, reference, cid=cid)

        del_cached_container_metadata(account=account,
                                      reference=reference,
                                      cid=cid,
                                      **kwargs)

        try:
            self._request('POST', '/destroy', params=params, **kwargs)
        except exceptions.Conflict as exc:
            raise exceptions.ContainerNotEmpty(exc)

    def container_show(self, account=None, reference=None, cid=None, **kwargs):
        """
        Get information about a container (like user properties).

        :param account: account in which the container is
        :type account: `str`
        :param reference: name of the container
        :type reference: `str`
        :param cid: container id that can be used instead of account
            and reference
        :type cid: `str`
        :keyword headers: extra headers to send to the proxy
        :type headers: `dict`
        :returns: a `dict` with "properties" containing a `dict` of
            user properties.
        :deprecated: use `container_get_properties` instead
        """
        params = self._make_params(account, reference, cid=cid)
        _resp, body = self._request('GET', '/show', params=params, **kwargs)
        return body

    def container_snapshot(self,
                           account=None,
                           reference=None,
                           dst_account=None,
                           dst_reference=None,
                           cid=None,
                           **kwargs):
        """
        Create a snapshot of a the container.

        This function duplicates only the database. It doesn't duplicate the
        chunks of the contents.

        :param account: account in which the container is
        :type account: `str`
        :param reference: name of the container
        :type reference: `str`
        :param cid: container id that can be used instead of account
            and reference
        :type cid: `str`
        :param dst_account: account in which the snapshot will be created
        :type dst_account: `str`
        :param dst_reference: name of the snapshot
        :type dst_reference: `str`
        """
        params = self._make_params(account, reference, cid=cid)
        data = json.dumps({"account": dst_account, "container": dst_reference})
        resp, _ = self._request('POST',
                                '/snapshot',
                                params=params,
                                data=data,
                                **kwargs)
        return resp

    def container_enable(self,
                         account=None,
                         reference=None,
                         cid=None,
                         **kwargs):
        """
        Change the status of a container database to enable

        :param account: account in which the container is
        :type account: `str`
        :param reference: name of the container
        :type reference: `str`
        :param cid: container id that can be used instead of account
            and reference
        """
        uri = self._make_uri('admin/enable')
        params = self._make_params(account, reference, cid=cid)
        params.update({"type": "meta2"})

        del_cached_container_metadata(account=account,
                                      reference=reference,
                                      cid=cid,
                                      **kwargs)

        resp, _ = self._direct_request('POST', uri, params=params, **kwargs)
        return resp

    def container_freeze(self,
                         account=None,
                         reference=None,
                         cid=None,
                         **kwargs):
        """
        Freeze the database of a container

        :param account: account in which the container is
        :type account: `str`
        :param reference: name of the container
        :type reference: name of the container
        :param cid: container id that can be used instead of account
            and reference
        """
        uri = self._make_uri('admin/freeze')
        params = self._make_params(account, reference, cid=cid)
        params.update({"type": "meta2"})

        del_cached_container_metadata(account=account,
                                      reference=reference,
                                      cid=cid,
                                      **kwargs)

        resp, _ = self._direct_request('POST', uri, params=params, **kwargs)
        return resp

    @extract_reference_params
    def container_get_properties(self,
                                 account=None,
                                 reference=None,
                                 properties=None,
                                 cid=None,
                                 params=None,
                                 **kwargs):
        """
        Get information about a container (user and system properties).

        :param account: account in which the container is
        :type account: `str`
        :param reference: name of the container
        :type reference: `str`
        :param cid: container id that can be used instead of account
            and reference
        :type cid: `str`
        :keyword headers: extra headers to send to the proxy
        :type headers: `dict`
        :returns: a `dict` with "properties" and "system" entries,
            containing respectively a `dict` of user properties and
            a `dict` of system properties.
        """
        container_meta = get_cached_container_metadata(account=account,
                                                       reference=reference,
                                                       cid=cid,
                                                       **kwargs)
        if container_meta is not None:
            return container_meta

        if not properties:
            properties = list()
        data = json.dumps(properties)
        _resp, container_meta = self._request('POST',
                                              '/get_properties',
                                              data=data,
                                              params=params,
                                              **kwargs)

        set_cached_container_metadata(container_meta,
                                      account=account,
                                      reference=reference,
                                      cid=cid,
                                      **kwargs)

        return container_meta

    def container_set_properties(self,
                                 account=None,
                                 reference=None,
                                 properties=None,
                                 clear=False,
                                 cid=None,
                                 system=None,
                                 **kwargs):
        params = self._make_params(account, reference, cid=cid)
        if clear:
            params["flush"] = 1
        data = json.dumps({
            'properties': properties or {},
            'system': system or {}
        })

        del_cached_container_metadata(account=account,
                                      reference=reference,
                                      cid=cid,
                                      **kwargs)

        _resp, body = self._request('POST',
                                    '/set_properties',
                                    data=data,
                                    params=params,
                                    **kwargs)
        return body

    def container_del_properties(self,
                                 account=None,
                                 reference=None,
                                 properties=[],
                                 cid=None,
                                 **kwargs):
        params = self._make_params(account, reference, cid=cid)
        data = json.dumps(properties)

        del_cached_container_metadata(account=account,
                                      reference=reference,
                                      cid=cid,
                                      **kwargs)

        _resp, body = self._request('POST',
                                    '/del_properties',
                                    data=data,
                                    params=params,
                                    **kwargs)
        return body

    def container_touch(self,
                        account=None,
                        reference=None,
                        cid=None,
                        recompute=False,
                        **kwargs):
        params = self._make_params(account, reference, cid=cid)
        if recompute:
            params['recompute'] = True
        self._request('POST', '/touch', params=params, **kwargs)

    def container_dedup(self,
                        account=None,
                        reference=None,
                        cid=None,
                        **kwargs):
        params = self._make_params(account, reference, cid=cid)
        self._request('POST', '/dedup', params=params, **kwargs)

    def container_purge(self,
                        account=None,
                        reference=None,
                        cid=None,
                        maxvers=None,
                        **kwargs):
        params = self._make_params(account, reference, cid=cid)
        if maxvers is not None:
            params["maxvers"] = maxvers
        self._request('POST', '/purge', params=params, **kwargs)

    def container_raw_insert(self,
                             bean,
                             account=None,
                             reference=None,
                             cid=None,
                             **kwargs):
        params = self._make_params(account, reference, cid=cid)
        data = json.dumps((bean, ))
        if kwargs.pop("frozen", None):
            params["frozen"] = 1
        self._request('POST',
                      '/raw_insert',
                      data=data,
                      params=params,
                      **kwargs)

    def container_raw_update(self,
                             old,
                             new,
                             account=None,
                             reference=None,
                             cid=None,
                             **kwargs):
        params = self._make_params(account, reference, cid=cid)
        data = json.dumps({"old": old, "new": new})
        if kwargs.pop("frozen", None):
            params["frozen"] = 1
        self._request('POST',
                      '/raw_update',
                      data=data,
                      params=params,
                      **kwargs)

    def container_raw_delete(self,
                             account=None,
                             reference=None,
                             data=None,
                             cid=None,
                             **kwargs):
        """
        Delete raw 'beans' from a container.

        :param data: dictionaries representing the beans to delete. They must
            have a key for each column of the meta2 database, plus a 'type'
            telling which type of bean it is.
        :type data: `list` of `dict` items
        """
        params = self._make_params(account, reference, cid=cid)
        data = json.dumps(data)
        self._request('POST',
                      '/raw_delete',
                      data=data,
                      params=params,
                      **kwargs)

    def container_flush(self,
                        account=None,
                        reference=None,
                        cid=None,
                        **kwargs):
        params = self._make_params(account, reference, cid=cid)
        resp, _ = self._request('POST', '/flush', params=params, **kwargs)
        return {
            'truncated': boolean_value(resp.getheader('x-oio-truncated'),
                                       False)
        }

    @extract_reference_params
    def content_list(self,
                     account=None,
                     reference=None,
                     limit=None,
                     marker=None,
                     end_marker=None,
                     prefix=None,
                     delimiter=None,
                     properties=False,
                     cid=None,
                     versions=False,
                     deleted=False,
                     params=None,
                     **kwargs):
        """
        Get the list of contents of a container.

        :returns: a tuple with container metadata `dict` as first element
            and a `dict` with "object" and "prefixes" as second element
        """
        p_up = {
            'max': limit,
            'marker': marker,
            'end_marker': end_marker,
            'prefix': prefix,
            'delimiter': delimiter,
            'properties': properties
        }
        params.update(p_up)
        # As of 4.0.0.a3, to make it false, the 'all' parameter must be absent
        if versions:
            params['all'] = '1'
        if deleted:
            params['deleted'] = 1
        if kwargs.get('local'):
            params['local'] = 1
        resp, body = self._request('GET', '/list', params=params, **kwargs)
        return resp.headers, body

    @ensure_headers
    def content_create(self,
                       account=None,
                       reference=None,
                       path=None,
                       size=None,
                       checksum=None,
                       data=None,
                       cid=None,
                       content_id=None,
                       stgpol=None,
                       version=None,
                       mime_type=None,
                       chunk_method=None,
                       headers=None,
                       append=False,
                       change_policy=False,
                       force=False,
                       **kwargs):
        """
        Create a new object. This method does not upload any data, it just
        registers object metadata in the database.

        :param size: size of the object
        :type size: `int`
        :param checksum: checksum of the object (may be None when appending)
        :type checksum: hexadecimal `str`
        :param data: metadata of the object (list of chunks and
        dict of properties)
        :type data: `dict`
        :param cid: container id that can be used in place of `account`
            and `reference`
        :type cid: hexadecimal `str`
        :param content_id: the ID to set on the object, or the ID of the
        existing object when appending
        :param stgpol: name of the storage policy for the object
        :param version: version of the object
        :type version: `int`
        :param mime_type: MIME type to set on the object
        :param chunk_method:
        :param headers: extra headers to send to the proxy
        :param append: append to an existing object instead of creating it
        :type append: `bool`
        :param change_policy: change policy of an existing object
        :type change_policy: `bool`
        """
        uri = self._make_uri('content/create')
        params = self._make_params(account, reference, path, cid=cid)
        if append:
            params['append'] = '1'
        if change_policy:
            params['change_policy'] = '1'
        # TODO(FVE): implement 'force' parameter
        if not isinstance(data, dict):
            warnings.simplefilter('once')
            warnings.warn("'data' parameter should be a dict, not a list",
                          DeprecationWarning,
                          stacklevel=3)
        if kwargs.get('meta_pos') is not None:
            data = data['chunks']
            # TODO(FVE): change "id" into "content", and other occurrences
            params['id'] = content_id
            uri = self._make_uri('content/update')
        data = json.dumps(data)
        hdrs = {
            'x-oio-content-meta-length': str(size),
            'x-oio-content-meta-hash': checksum
        }
        hdrs.update(headers)
        if content_id is not None:
            hdrs['x-oio-content-meta-id'] = content_id
        if stgpol is not None:
            hdrs['x-oio-content-meta-policy'] = stgpol
        if version is not None:
            hdrs['x-oio-content-meta-version'] = str(version)
        if mime_type is not None:
            hdrs['x-oio-content-meta-mime-type'] = mime_type
        if chunk_method is not None:
            hdrs['x-oio-content-meta-chunk-method'] = chunk_method

        del_cached_object_metadata(account=account,
                                   reference=reference,
                                   path=path,
                                   cid=cid,
                                   version=version,
                                   **kwargs)

        resp, body = self._direct_request('POST',
                                          uri,
                                          data=data,
                                          params=params,
                                          headers=hdrs,
                                          **kwargs)
        return resp, body

    def content_drain(self,
                      account=None,
                      reference=None,
                      path=None,
                      cid=None,
                      version=None,
                      **kwargs):
        uri = self._make_uri('content/drain')
        params = self._make_params(account,
                                   reference,
                                   path,
                                   cid=cid,
                                   version=version)

        del_cached_object_metadata(account=account,
                                   reference=reference,
                                   path=path,
                                   cid=cid,
                                   version=version,
                                   **kwargs)

        resp, _ = self._direct_request('POST', uri, params=params, **kwargs)
        return resp.status == 204

    def content_delete(self,
                       account=None,
                       reference=None,
                       path=None,
                       cid=None,
                       version=None,
                       **kwargs):
        """
        Delete one object.

        :returns: True if the object has been deleted
        """
        uri = self._make_uri('content/delete')
        params = self._make_params(account,
                                   reference,
                                   path,
                                   cid=cid,
                                   version=version)

        del_cached_object_metadata(account=account,
                                   reference=reference,
                                   path=path,
                                   cid=cid,
                                   version=version,
                                   **kwargs)

        resp, _ = self._direct_request('POST', uri, params=params, **kwargs)
        return resp.status == 204

    def content_delete_many(self,
                            account=None,
                            reference=None,
                            paths=None,
                            cid=None,
                            **kwargs):
        """
        Delete several objects.

        :param paths: an iterable of object paths (should not be a generator)
        :returns: a list of tuples with the path of the content and
            a boolean telling if the content has been deleted
        :rtype: `list` of `tuple`
        """
        uri = self._make_uri('content/delete_many')
        params = self._make_params(account, reference, cid=cid)
        unformatted_data = list()
        for obj in paths:
            unformatted_data.append({'name': obj})
        data = json.dumps({"contents": unformatted_data})
        results = list()

        for path in paths:
            del_cached_object_metadata(account=account,
                                       reference=reference,
                                       path=path,
                                       cid=cid,
                                       **kwargs)

        try:
            _, resp_body = self._direct_request('POST',
                                                uri,
                                                data=data,
                                                params=params,
                                                **kwargs)
            for obj in resp_body["contents"]:
                results.append((obj["name"], obj["status"] == 204))
            return results
        except exceptions.NotFound:
            for obj in paths:
                rc = self.content_delete(account,
                                         reference,
                                         obj,
                                         cid=cid,
                                         **kwargs)
                results.append((obj, rc))
            return results
        except exceptions.TooLarge:
            pivot = len(paths) // 2
            head = paths[:pivot]
            tail = paths[pivot:]
            if head:
                results += self.content_delete_many(account,
                                                    reference,
                                                    head,
                                                    cid=cid,
                                                    **kwargs)
            if tail:
                results += self.content_delete_many(account,
                                                    reference,
                                                    tail,
                                                    cid=cid,
                                                    **kwargs)
            return results
        except Exception:
            raise

    @extract_reference_params
    def content_locate(self,
                       account=None,
                       reference=None,
                       path=None,
                       cid=None,
                       content=None,
                       version=None,
                       properties=True,
                       params=None,
                       **kwargs):
        """
        Get a description of the content along with the list of its chunks.

        :param cid: container id that can be used in place of `account`
            and `reference`
        :type cid: hexadecimal `str`
        :param content: content id that can be used in place of `path`
        :type content: hexadecimal `str`
        :param properties: should the request return object properties
            along with content description
        :type properties: `bool`
        :returns: a tuple with content metadata `dict` as first element
            and chunk `list` as second element
        """
        content_meta, chunks = get_cached_object_metadata(
            account=account,
            reference=reference,
            path=path,
            cid=cid,
            version=version,
            properties=properties,
            **kwargs)
        if content_meta is not None and chunks is not None:
            # Refresh asynchronously so as not to slow down the current request
            eventlet.spawn_n(self._maybe_refresh_rawx_scores, **kwargs)
            for chunk in chunks:
                chunk['score'] = self.rawx_scores.get(
                    chunk['url'].split('/')[2], 0)
            return content_meta, chunks

        uri = self._make_uri('content/locate')
        params['properties'] = properties
        try:
            resp, chunks = self._direct_request('GET',
                                                uri,
                                                params=params,
                                                **kwargs)
            content_meta = extract_content_headers_meta(resp.headers)
        except exceptions.OioNetworkException as exc:
            # TODO(FVE): this special behavior can be removed when
            # the 'content/locate' protocol is changed to include
            # object properties in the response body instead of headers.
            if properties and 'got more than ' in str(exc):
                params['properties'] = False
                _resp, chunks = self._direct_request('GET',
                                                     uri,
                                                     params=params,
                                                     **kwargs)
                content_meta = self.content_get_properties(account,
                                                           reference,
                                                           path,
                                                           cid=cid,
                                                           content=content,
                                                           version=version,
                                                           **kwargs)
            else:
                raise

        set_cached_object_metadata(content_meta,
                                   chunks,
                                   account=account,
                                   reference=reference,
                                   path=path,
                                   cid=cid,
                                   version=version,
                                   properties=properties,
                                   **kwargs)

        return content_meta, chunks

    @extract_reference_params
    def content_prepare(self,
                        account=None,
                        reference=None,
                        path=None,
                        position=0,
                        size=None,
                        cid=None,
                        stgpol=None,
                        content_id=None,
                        version=None,
                        params=None,
                        **kwargs):
        """
        Prepare an upload: get URLs of chunks on available rawx.

        :param position: position a the metachunk that must be prepared
        :param stgpol: name of the storage policy of the object being uploaded
        :param version: version of the object being uploaded. This is required
            only on the second and later calls to this method to get coherent
            results.
        :keyword autocreate: create container if it doesn't exist
        """
        uri = self._make_uri('content/prepare')
        data = {'size': size, 'position': position}
        if stgpol:
            data['policy'] = stgpol
        data = json.dumps(data)
        try:
            resp, body = self._direct_request('POST',
                                              uri + '2',
                                              data=data,
                                              params=params,
                                              **kwargs)
            chunks = body['chunks']
            obj_meta = extract_content_headers_meta(resp.headers)
            obj_meta['properties'] = dict()
            # pylint: disable=no-member
            obj_meta['properties'].update(body.get('properties', {}))
        except exceptions.NotFound:
            # Proxy does not support v2 request (oio < 4.3)
            resp, chunks = self._direct_request('POST',
                                                uri,
                                                data=data,
                                                params=params,
                                                **kwargs)
            obj_meta = extract_content_headers_meta(resp.headers)
        return obj_meta, chunks

    @extract_reference_params
    def content_get_properties(self,
                               account=None,
                               reference=None,
                               path=None,
                               properties=None,
                               cid=None,
                               content=None,
                               version=None,
                               params=None,
                               **kwargs):
        """
        Get a description of the content along with its user properties.
        """
        obj_meta, _ = get_cached_object_metadata(account=account,
                                                 reference=reference,
                                                 path=path,
                                                 cid=cid,
                                                 version=version,
                                                 properties=True,
                                                 **kwargs)
        if obj_meta is not None:
            return obj_meta

        uri = self._make_uri('content/get_properties')
        data = json.dumps(properties) if properties else None
        resp, body = self._direct_request('POST',
                                          uri,
                                          data=data,
                                          params=params,
                                          **kwargs)
        obj_meta = extract_content_headers_meta(resp.headers)
        obj_meta.update(body)

        set_cached_object_metadata(obj_meta,
                                   None,
                                   account=account,
                                   reference=reference,
                                   path=path,
                                   cid=cid,
                                   version=version,
                                   properties=True,
                                   **kwargs)

        return obj_meta

    def content_set_properties(self,
                               account=None,
                               reference=None,
                               path=None,
                               properties={},
                               cid=None,
                               version=None,
                               clear=False,
                               **kwargs):
        """
        Set properties on an object.

        :param properties: dictionary of properties
        """
        uri = self._make_uri('content/set_properties')
        params = self._make_params(account,
                                   reference,
                                   path,
                                   cid=cid,
                                   version=version)
        if clear:
            params['flush'] = 1
        data = json.dumps(properties)

        del_cached_object_metadata(account=account,
                                   reference=reference,
                                   path=path,
                                   cid=cid,
                                   version=version,
                                   **kwargs)

        _resp, _body = self._direct_request('POST',
                                            uri,
                                            data=data,
                                            params=params,
                                            **kwargs)

    def content_del_properties(self,
                               account=None,
                               reference=None,
                               path=None,
                               properties=[],
                               cid=None,
                               version=None,
                               **kwargs):
        """
        Delete some properties from an object.

        :param properties: list of property keys to delete
        :type properties: `list`
        :returns: True is the property has been deleted
        """
        uri = self._make_uri('content/del_properties')
        params = self._make_params(account,
                                   reference,
                                   path,
                                   cid=cid,
                                   version=version)
        # Build a list in case the parameter is a view (not serializable).
        data = json.dumps([x for x in properties])

        del_cached_object_metadata(account=account,
                                   reference=reference,
                                   path=path,
                                   cid=cid,
                                   version=version,
                                   **kwargs)

        resp, _body = self._direct_request('POST',
                                           uri,
                                           data=data,
                                           params=params,
                                           **kwargs)
        return resp.status == 204

    def content_touch(self,
                      account=None,
                      reference=None,
                      path=None,
                      cid=None,
                      version=None,
                      **kwargs):
        uri = self._make_uri('content/touch')
        params = self._make_params(account,
                                   reference,
                                   path,
                                   cid=cid,
                                   version=version)
        self._direct_request('POST', uri, params=params, **kwargs)

    @extract_reference_params
    def content_spare(self,
                      account=None,
                      reference=None,
                      path=None,
                      version=None,
                      data=None,
                      cid=None,
                      stgpol=None,
                      position=None,
                      params=None,
                      **kwargs):
        uri = self._make_uri('content/spare')
        if None in (stgpol, position):
            raise ValueError('stgpol and position cannot be None')
        params['stgpol'] = stgpol
        params['position'] = position
        data = json.dumps(data)
        _resp, body = self._direct_request('POST',
                                           uri,
                                           data=data,
                                           params=params,
                                           **kwargs)
        return body

    def content_truncate(self,
                         account=None,
                         reference=None,
                         path=None,
                         cid=None,
                         version=None,
                         size=0,
                         **kwargs):
        uri = self._make_uri('content/truncate')
        params = self._make_params(account,
                                   reference,
                                   path,
                                   cid=cid,
                                   version=version)
        params['size'] = size

        del_cached_object_metadata(account=account,
                                   reference=reference,
                                   path=path,
                                   cid=cid,
                                   version=version,
                                   **kwargs)

        _resp, body = self._direct_request('POST',
                                           uri,
                                           params=params,
                                           **kwargs)
        return body

    def content_purge(self,
                      account=None,
                      reference=None,
                      path=None,
                      cid=None,
                      maxvers=None,
                      **kwargs):
        uri = self._make_uri('content/purge')
        params = self._make_params(account, reference, path, cid=cid)
        if maxvers is not None:
            params["maxvers"] = maxvers

        del_cached_object_metadata(account=account,
                                   reference=reference,
                                   path=path,
                                   cid=cid,
                                   **kwargs)

        self._direct_request('POST', uri, params=params, **kwargs)
Beispiel #13
0
 def conscience(self):
     if not self._conscience:
         self._conscience = ConscienceClient(self.conf)
     return self._conscience
Beispiel #14
0
class ContentFactory(object):
    def __init__(self, conf):
        self.conf = conf
        self.logger = get_logger(conf)
        self.cs_client = ConscienceClient(conf)
        self.container_client = ContainerClient(conf)
        self.ns_info = self.cs_client.info()

    def _extract_datasec(self, stgpol_name):
        try:
            stgpol = self.ns_info["storage_policy"][stgpol_name]
        except KeyError:
            self.logger.error("Storage policy '%s' not found" % stgpol_name)
            raise InconsistentContent("Storage policy not found")

        stgclass_name, datasec_name, datatreat_name = stgpol.split(':')
        if datasec_name == 'NONE':
            return "DUP", {"nb_copy": "1", "distance": "0"}

        try:
            datasec = self.ns_info["data_security"][datasec_name]
        except KeyError:
            self.logger.error("Data security '%s' not found" % datasec_name)
            raise InconsistentContent("Data security not found")

        ds_type, ds_args = datasec.split(':')
        args = {}
        for arg in ds_args.split('|'):
            key, value = arg.split('=')
            args[key] = value

        return ds_type, args

    def get(self, container_id, content_id):
        try:
            meta, chunks = self.container_client.content_show(
                cid=container_id, content=content_id)
        except NotFound:
            raise ContentNotFound("Content %s/%s not found" % (container_id,
                                  content_id))

        pol_type, pol_args = self._extract_datasec(meta['policy'])

        if pol_type == "DUP":
            return DupContent(self.conf, container_id, meta, chunks, pol_args)
        elif pol_type == "RAIN":
            return RainContent(self.conf, container_id, meta, chunks, pol_args)

        raise InconsistentContent("Unknown storage policy")

    def new(self, container_id, path, size, policy):
        meta, chunks = self.container_client.content_prepare(
            cid=container_id, path=path, size=size, stgpol=policy)

        pol_type, pol_args = self._extract_datasec(meta['policy'])

        if pol_type == "DUP":
            return DupContent(self.conf, container_id, meta, chunks, pol_args)
        elif pol_type == "RAIN":
            return RainContent(self.conf, container_id, meta, chunks, pol_args)

        raise InconsistentContent("Unknown storage policy")

    def change_policy(self, container_id, content_id, new_policy):
        old_content = self.get(container_id, content_id)
        if old_content.stgpol_name == new_policy:
            return old_content

        new_content = self.new(container_id, old_content.path,
                               old_content.length, new_policy)

        stream = old_content.download()
        new_content.upload(GeneratorIO(stream))
        # the old content is automatically deleted because the new content has
        # the same name (but not the same id)
        return new_content
Beispiel #15
0
class AccountClient(HttpApi):
    """Simple client API for the account service."""
    def __init__(self,
                 conf,
                 endpoint=None,
                 proxy_endpoint=None,
                 refresh_delay=3600.0,
                 logger=None,
                 **kwargs):
        """
        Initialize a client for the account service.

        :param conf: dictionary with at least the namespace name
        :type conf: `dict`
        :param endpoint: URL of an account service
        :param proxy_endpoint: URL of the proxy
        :param refresh_interval: time between refreshes of the
        account service endpoint (if not provided at instantiation)
        :type refresh_interval: `float` seconds
        """
        super(AccountClient, self).__init__(endpoint=endpoint, **kwargs)
        self.logger = logger or get_logger(conf)
        self.cs = ConscienceClient(conf,
                                   endpoint=proxy_endpoint,
                                   logger=self.logger,
                                   **kwargs)
        self._refresh_delay = refresh_delay if not self.endpoint else -1.0
        self._last_refresh = 0.0

    def _get_account_addr(self):
        """Fetch IP and port of an account service from Conscience."""
        try:
            acct_instance = self.cs.next_instance('account')
            acct_addr = acct_instance.get('addr')
        except Exception:
            raise ClientException("No Account service found")
        return acct_addr

    def _refresh_endpoint(self, now=None):
        """Refresh account service endpoint."""
        addr = self._get_account_addr()
        self.endpoint = '/'.join(("http:/", addr, "v1.0/account"))
        if not now:
            now = time.time()
        self._last_refresh = now

    def _maybe_refresh_endpoint(self):
        """Refresh account service endpoint if delay has been reached."""
        if self._refresh_delay >= 0.0 or not self.endpoint:
            now = time.time()
            if now - self._last_refresh > self._refresh_delay:
                try:
                    self._refresh_endpoint(now)
                except ClientException:
                    if not self.endpoint:
                        # Cannot use the previous one
                        raise
                    self.logger.exception("Failed to refresh account endpoint")

    def account_request(self, account, method, action, params=None, **kwargs):
        """Make a request to the account service."""
        self._maybe_refresh_endpoint()
        if not params:
            params = dict()
        if account:
            params['id'] = quote(account)
        try:
            resp, body = self._request(method, action, params=params, **kwargs)
        except OioNetworkException as exc:
            exc_info = sys.exc_info()
            if self._refresh_delay >= 0.0:
                self.logger.info("Refreshing account endpoint after error %s",
                                 exc)
                try:
                    self._refresh_endpoint()
                except Exception as exc:
                    self.logger.warn("%s", exc)
            raise exc_info[0], exc_info[1], exc_info[2]
        return resp, body

    def account_create(self, account, **kwargs):
        """
        Create an account.

        :param account: name of the account to create
        :type account: `str`
        :returns: `True` if the account has been created
        """
        resp, _body = self.account_request(account, 'PUT', 'create', **kwargs)
        return resp.status == 201

    def account_delete(self, account, **kwargs):
        """
        Delete an account.

        :param account: name of the account to delete
        :type account: `str`
        """
        self.account_request(account, 'POST', 'delete', **kwargs)

    def account_list(self, **kwargs):
        """
        List accounts.
        """
        _resp, body = self.account_request(None, 'GET', 'list', **kwargs)
        return body

    def account_show(self, account, **kwargs):
        """
        Get information about an account.
        """
        _resp, body = self.account_request(account, 'GET', 'show', **kwargs)
        return body

    # FIXME: document this
    def account_update(self, account, metadata, to_delete, **kwargs):
        data = json.dumps({"metadata": metadata, "to_delete": to_delete})
        self.account_request(account, 'POST', 'update', data=data, **kwargs)

    def container_list(self,
                       account,
                       limit=None,
                       marker=None,
                       end_marker=None,
                       prefix=None,
                       delimiter=None,
                       **kwargs):
        """
        Get the list of containers of an account.

        :param account: account from which to get the container list
        :type account: `str`
        :keyword limit: maximum number of results to return
        :type limit: `int`
        :keyword marker: name of the container from where to start the listing
        :type marker: `str`
        :keyword end_marker:
        :keyword prefix:
        :keyword delimiter:
        """
        params = {
            "id": account,
            "limit": limit,
            "marker": marker,
            "end_marker": end_marker,
            "prefix": prefix,
            "delimiter": delimiter
        }
        _resp, body = self.account_request(account,
                                           'GET',
                                           'containers',
                                           params=params,
                                           **kwargs)
        return body

    def container_update(self, account, container, metadata=None, **kwargs):
        """
        Update account with container-related metadata.

        :param account: name of the account to update
        :type account: `str`
        :param container: name of the container whose metadata has changed
        :type container: `str`
        :param metadata: container metadata ("bytes", "objects",
        "mtime", "dtime")
        :type metadata: `dict`
        """
        metadata['name'] = container
        _resp, body = self.account_request(account,
                                           'POST',
                                           'container/update',
                                           data=json.dumps(metadata))
        return body

    def container_reset(self, account, container, mtime, **kwargs):
        """
        Reset container of an account

        :param account: name of the account
        :type account: `str`
        :param container: name of the container to reset
        :type container: `str`
        :param mtime: time of the modification
        """
        metadata = dict()
        metadata["name"] = container
        metadata["mtime"] = mtime
        self.account_request(account,
                             'POST',
                             'container/reset',
                             data=json.dumps(metadata))

    def account_refresh(self, account, **kwargs):
        """
        Refresh counters of an account

        :param account: name of the account to refresh
        :type account: `str`
        """
        self.account_request(account, 'POST', 'refresh')

    def account_flush(self, account, **kwargs):
        """
        Flush all containers of an account

        :param account: name of the account to flush
        :type account: `str`
        """
        self.account_request(account, 'POST', 'flush')
Beispiel #16
0
class XcuteClient(HttpApi):
    """Simple client API for the xcute service."""
    def __init__(self,
                 conf,
                 endpoint=None,
                 proxy_endpoint=None,
                 refresh_delay=3600.0,
                 logger=None,
                 **kwargs):
        """
        Initialize a client for the xcute service.

        :param conf: dictionary with at least the namespace name
        :type conf: `dict`
        :param endpoint: URL of an xcute service
        :param proxy_endpoint: URL of the proxy
        :param refresh_interval: time between refreshes of the
        xcute service endpoint (if not provided at instantiation)
        :type refresh_interval: `float` seconds
        """
        super(XcuteClient, self).__init__(endpoint=endpoint,
                                          service_type='xcute-service',
                                          **kwargs)
        self.conf = conf
        self.logger = logger or get_logger(self.conf)

        self.conscience = ConscienceClient(conf,
                                           endpoint=proxy_endpoint,
                                           logger=self.logger,
                                           **kwargs)

        self._refresh_delay = refresh_delay if not self.endpoint else -1.0
        self._last_refresh = 0.0

    def _get_xcute_addr(self, **kwargs):
        """Fetch IP and port of an xcute service from Conscience."""
        acct_instance = self.conscience.next_instance('xcute', **kwargs)
        acct_addr = acct_instance.get('addr')
        return acct_addr

    def _refresh_endpoint(self, now=None, **kwargs):
        """Refresh xcute service endpoint."""
        addr = self._get_xcute_addr(**kwargs)
        self.endpoint = '/'.join(("http:/", addr, "v1.0/xcute"))
        if not now:
            now = time.time()
        self._last_refresh = now

    def _maybe_refresh_endpoint(self, **kwargs):
        """Refresh xcute service endpoint if delay has been reached."""
        if self._refresh_delay >= 0.0 or not self.endpoint:
            now = time.time()
            if now - self._last_refresh > self._refresh_delay:
                try:
                    self._refresh_endpoint(now, **kwargs)
                except OioNetworkException as exc:
                    if not self.endpoint:
                        # Cannot use the previous one
                        raise
                    self.logger.warn("Failed to refresh xcute endpoint: %s",
                                     exc)
                except OioException:
                    if not self.endpoint:
                        # Cannot use the previous one
                        raise
                    self.logger.exception("Failed to refresh xcute endpoint")

    def xcute_request(self, method, action, params=None, **kwargs):
        """Make a request to the xcute service."""
        self._maybe_refresh_endpoint(**kwargs)
        if not params:
            params = dict()
        try:
            resp, body = self._request(method, action, params=params, **kwargs)
        except OioNetworkException as exc:
            exc_info = sys.exc_info()
            if self._refresh_delay >= 0.0:
                self.logger.info("Refreshing xcute endpoint after error %s",
                                 exc)
                try:
                    self._refresh_endpoint(**kwargs)
                except Exception as exc:
                    self.logger.warn("%s", exc)
            reraise(exc_info[0], exc_info[1], exc_info[2])
        return resp, body

    def job_list(self,
                 limit=None,
                 prefix=None,
                 marker=None,
                 job_status=None,
                 job_type=None,
                 job_lock=None):
        _, data = self.xcute_request('GET',
                                     '/job/list',
                                     params={
                                         'limit': limit,
                                         'prefix': prefix,
                                         'marker': marker,
                                         'status': job_status,
                                         'type': job_type,
                                         'lock': job_lock
                                     })
        return data

    def job_create(self, job_type, job_config=None):
        _, data = self.xcute_request('POST',
                                     '/job/create',
                                     params={'type': job_type},
                                     json=job_config)
        return data

    def job_show(self, job_id):
        _, data = self.xcute_request('GET', '/job/show', params={'id': job_id})
        return data

    def job_pause(self, job_id):
        _, data = self.xcute_request('POST',
                                     '/job/pause',
                                     params={'id': job_id})
        return data

    def job_resume(self, job_id):
        _, data = self.xcute_request('POST',
                                     '/job/resume',
                                     params={'id': job_id})
        return data

    def job_update(self, job_id, job_config=None):
        _, data = self.xcute_request('POST',
                                     '/job/update',
                                     params={'id': job_id},
                                     json=job_config)
        return data

    def job_delete(self, job_id):
        self.xcute_request('DELETE', '/job/delete', params={'id': job_id})

    def lock_list(self):
        _, data = self.xcute_request('GET', '/lock/list')
        return data

    def lock_show(self, lock):
        _, data = self.xcute_request('GET',
                                     '/lock/show',
                                     params={'lock': lock})
        return data
Beispiel #17
0
 def cluster(self):
     if not self._cluster:
         from oio.conscience.client import ConscienceClient
         self._cluster = ConscienceClient(self.conf)
     return self._cluster
Beispiel #18
0
class Harasser(object):
    def __init__(self, ns, max_containers=256, max_contents=256):
        conf = {'namespace': ns}
        self.cs = ConscienceClient(conf)
        self.rdir = RdirClient(conf)
        self.rawx_list = [x['addr'] for x in self.cs.all_services('rawx')]
        self.sent = set()
        self.max_containers = max_containers
        self.max_contents = max_contents
        self.pushed_count = 0
        self.pushed_time = 0
        self.removed_count = 0
        self.removed_time = 0

    def harass_put(self, loops=None):
        if loops is None:
            loops = random.randint(1000, 2000)
        print "Pushing %d fake chunks" % loops
        loop = loops
        count_start_container = random.randrange(2**20)
        count_start_content = random.randrange(2**20)
        start = time.time()
        nb_rawx = len(self.rawx_list)
        while loop > 0:
            args = {'mtime': int(start)}
            # vol_id = random.choice(self.rawx_list)
            # container_id = "%064X" % (random.randrange(self.max_containers))
            # content_id = "%032X" % (random.randrange(self.max_contents))
            vol_id = self.rawx_list[loop % nb_rawx]
            container_id = "%064X" % (loop + count_start_container)
            content_id = "%032X" % (loop + count_start_content)
            chunk_id = "http://%s/%064X" \
                % (vol_id, random.randrange(2**128))
            self.rdir.chunk_push(vol_id, container_id, content_id, chunk_id,
                                 **args)
            self.sent.add((vol_id, container_id, content_id, chunk_id))
            loop -= 1
        end = time.time()
        self.pushed_count += loops
        self.pushed_time += end - start
        print "%d pushed in %.3fs, %d req/s" \
            % (loops, end-start, loops/(end-start))

    def harass_del(self, min_loops=0):
        min_loops = min(min_loops, len(self.sent))
        loops = random.randint(min_loops, len(self.sent))
        print "Removing %d fake chunks" % loops
        loop = loops
        start = time.time()
        while loop > 0:
            args = self.sent.pop()
            self.rdir.chunk_delete(*args)
            loop -= 1
        end = time.time()
        self.removed_count += loops
        self.removed_time += end - start
        print "%d removed in %.3fs, %d req/s" \
            % (loops, end-start, loops/(end-start))

    def __call__(self):
        try:
            while True:
                self.harass_put()
                self.harass_del()
        except KeyboardInterrupt:
            print "Cleaning..."
            self.harass_del(len(self.sent))
            print "Stats:"
            print "Pushed %d in %.3fs, %d req/s" % (
                self.pushed_count, self.pushed_time,
                self.pushed_count / self.pushed_time)
            print "Removed %d in %.3fs, %d req/s" % (
                self.removed_count, self.removed_time,
                self.removed_count / self.removed_time)
Beispiel #19
0
 def conscience(self):
     if self._conscience_client is None:
         from oio.conscience.client import ConscienceClient
         self._conscience_client = ConscienceClient(
             self.client_conf, pool_manager=self.pool_manager)
     return self._conscience_client
Beispiel #20
0
class EventWorker(object):
    def __init__(self, conf, name, context, **kwargs):
        self.conf = conf
        self.name = name
        verbose = kwargs.pop('verbose', False)
        self.logger = get_logger(self.conf, verbose=verbose)
        self.init_zmq(context)
        self.cs = ConscienceClient(self.conf)
        self.rdir = RdirClient(self.conf)
        self._acct_addr = None
        self.acct_update = 0
        self.acct_refresh_interval = int_value(
            conf.get('acct_refresh_interval'), 60)
        self.acct_update = true_value(conf.get('acct_update', True))
        self.rdir_update = true_value(conf.get('rdir_update', True))
        self.session = requests.Session()
        self.failed = False

    def start(self):
        self.logger.info('worker "%s" starting', self.name)
        self.running = True
        self.run()

    def stop(self):
        self.logger.info('worker "%s" stopping', self.name)
        self.running = False

    def init_zmq(self, context):
        socket = context.socket(zmq.REP)
        socket.connect('inproc://event-front')
        self.socket = socket

    def safe_ack(self, msg):
        try:
            self.socket.send_multipart(msg)
        except Exception:
            self.logger.warn('Unable to ack event')

    def run(self):
        try:
            while self.running:
                msg = self.socket.recv_multipart()
                self.logger.debug("msg received: %s" % msg)
                event = decode_msg(msg)
                success = self.process_event(event)
                f = "0" if success else ""
                self.safe_ack([msg[0], f])
        except Exception as e:
            self.logger.warn('ERROR in worker "%s"', e)
            self.failed = True
            raise e
        finally:
            self.logger.info('worker "%s" stopped', self.name)

    def process_event(self, event):
        handler = self.get_handler(event)
        if not handler:
            self.logger.warn("No handler found")
            # mark as success
            return True
        success = True
        try:
            handler(event)
        except Exception:
            success = False
        finally:
            return success

    def get_handler(self, event):
        event_type = event.get('event')
        if not event_type:
            return None

        if event_type == EventType.CONTAINER_PUT:
            return self.handle_container_put
        elif event_type == EventType.CONTAINER_DESTROY:
            return self.handle_container_destroy
        elif event_type == EventType.CONTAINER_UPDATE:
            return self.handle_container_update
        elif event_type == EventType.OBJECT_PUT:
            return self.handle_object_put
        elif event_type == EventType.OBJECT_DELETE:
            return self.handle_object_delete
        elif event_type == EventType.REFERENCE_UPDATE:
            return self.handle_reference_update
        elif event_type == EventType.CHUNK_PUT:
            return self.handle_chunk_put
        elif event_type == EventType.CHUNK_DELETE:
            return self.handle_chunk_delete
        elif event_type == EventType.PING:
            return self.handle_ping
        else:
            return None

    @property
    def acct_addr(self):
        if not self._acct_addr or self.acct_refresh():
            try:
                acct_instance = self.cs.next_instance(ACCOUNT_SERVICE)
                self._acct_addr = acct_instance.get('addr')
                self.acct_update = time.time()
            except Exception:
                self.logger.warn('Unable to find account instance')
        return self._acct_addr

    def acct_refresh(self):
        return (time.time() - self.acct_update) > self.acct_refresh_interval

    def handle_container_put(self, event):
        """
        Handle container creation.
        :param event:
        """
        self.logger.debug('worker "%s" handle container put', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'mtime': mtime, 'name': name}
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_container_update(self, event):
        """
        Handle container update.
        :param event:
        """
        self.logger.debug('worker "%s" handle container update', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = event.get('url').get('user')
        account = event.get('url').get('account')
        bytes_count = data.get('bytes-count', 0)
        object_count = data.get('object-count', 0)

        event = {
            'mtime': mtime,
            'name': name,
            'bytes': bytes_count,
            'objects': object_count
        }
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_container_destroy(self, event):
        """
        Handle container destroy.
        :param event:
        """
        self.logger.debug('worker "%s" handle container destroy', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        dtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'dtime': dtime, 'name': name}
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_object_delete(self, event):
        """
        Handle object deletion.
        Delete the chunks of the object.
        :param event:
        """
        self.logger.debug('worker "%s" handle object delete', self.name)
        pile = GreenPile(PARALLEL_CHUNKS_DELETE)

        chunks = []

        for item in event.get('data'):
            if item.get('type') == 'chunks':
                chunks.append(item)
        if not len(chunks):
            self.logger.warn('No chunks found in event data')
            return

        def delete_chunk(chunk):
            resp = None
            try:
                with Timeout(CHUNK_TIMEOUT):
                    resp = self.session.delete(chunk['id'])
            except (Exception, Timeout) as e:
                self.logger.exception(e)
            return resp

        for chunk in chunks:
            pile.spawn(delete_chunk, chunk)

        resps = [resp for resp in pile if resp]

        for resp in resps:
            if resp.status_code == 204:
                self.logger.info('deleted chunk %s' % resp.url)
            else:
                self.logger.warn('failed to delete chunk %s' % resp.url)

    def handle_object_put(self, event):
        """
        Handle object creation.
        TODO
        :param event:
        """
        self.logger.debug('worker "%s" handle object put', self.name)

    def handle_reference_update(self, event):
        """
        Handle reference update.
        TODO
        :param event
        """
        self.logger.debug('worker "%s" handle reference update', self.name)

    def handle_chunk_put(self, event):
        """
        Handle chunk creation.
        :param event
        """
        if not self.rdir_update:
            self.logger.debug('worker "%s" skip chunk creation', self.name)
            return

        self.logger.debug('worker "%s" handle chunk creation', self.name)

        when = event.get('when')
        data = event.get('data')
        volume_id = data.get('volume_id')
        del data['volume_id']
        container_id = data.get('container_id')
        del data['container_id']
        content_id = data.get('content_id')
        del data['content_id']
        chunk_id = data.get('chunk_id')
        del data['chunk_id']
        data['mtime'] = when
        self.rdir.chunk_push(volume_id, container_id, content_id, chunk_id,
                             **data)

    def handle_chunk_delete(self, event):
        """
        Handle chunk deletion.
        :param event
        """
        if not self.rdir_update:
            self.logger.debug('worker "%s" skip chunk deletion', self.name)
            return

        self.logger.debug('worker "%s" handle chunk deletion', self.name)

        data = event.get('data')
        volume_id = data.get('volume_id')
        container_id = data.get('container_id')
        content_id = data.get('content_id')
        chunk_id = data.get('chunk_id')
        self.rdir.chunk_delete(volume_id, container_id, content_id, chunk_id)

    def handle_ping(self, event):
        """
        Handle ping
        :param event
        """
        self.logger.debug('worker "%s" handle ping', self.name)
Beispiel #21
0
class EventWorker(Worker):
    def __init__(self, *args, **kwargs):
        super(EventWorker, self).__init__(*args, **kwargs)
        self.app_env = dict()

    def init(self):
        eventlet.monkey_patch(os=False)
        self.tube = self.conf.get("tube", DEFAULT_TUBE)
        self.cs = ConscienceClient(self.conf, logger=self.logger)
        self.rdir = RdirClient(self.conf, logger=self.logger)
        self._acct_addr = None
        self.acct_update = 0
        self.graceful_timeout = 1
        self.acct_refresh_interval = int_value(
            self.conf.get('acct_refresh_interval'), 60)
        self.acct_update = true_value(self.conf.get('acct_update', True))
        self.rdir_update = true_value(self.conf.get('rdir_update', True))
        self.app_env['acct_addr'] = self.acct_addr
        if 'handlers_conf' not in self.conf:
            raise ValueError("'handlers_conf' path not defined in conf")
        self.handlers = loadhandlers(self.conf.get('handlers_conf'),
                                     global_conf=self.conf,
                                     app=self)
        super(EventWorker, self).init()

    def notify(self):
        """TODO"""
        pass

    def safe_decode_job(self, job_id, data):
        try:
            env = json.loads(data)
            env['job_id'] = job_id
            return env
        except Exception as exc:
            self.logger.warn('Failed to decode job %s: "%s"', job_id,
                             str(exc.message))
            return None

    def run(self):
        coros = []
        queue_url = self.conf.get('queue_url', '127.0.0.1:11300')
        concurrency = int_value(self.conf.get('concurrency'), 10)

        server_gt = greenthread.getcurrent()

        for i in range(concurrency):
            beanstalk = Beanstalk.from_url(queue_url)
            gt = eventlet.spawn(self.handle, beanstalk)
            gt.link(_eventlet_stop, server_gt, beanstalk)
            coros.append(gt)
            beanstalk, gt = None, None

        while self.alive:
            self.notify()
            try:
                eventlet.sleep(1.0)
            except AssertionError:
                self.alive = False
                break

        self.notify()
        try:
            with Timeout(self.graceful_timeout) as t:
                [c.kill(StopServe()) for c in coros]
                [c.wait() for c in coros]
        except Timeout as te:
            if te != t:
                raise
            [c.kill() for c in coros]

    def handle(self, beanstalk):
        conn_error = False
        try:
            if self.tube:
                beanstalk.use(self.tube)
                beanstalk.watch(self.tube)
            while True:
                try:
                    job_id, data = beanstalk.reserve()
                    if conn_error:
                        self.logger.warn("beanstalk reconnected")
                        conn_error = False
                except ConnectionError:
                    if not conn_error:
                        self.logger.warn("beanstalk connection error")
                        conn_error = True
                    eventlet.sleep(BEANSTALK_RECONNECTION)
                    continue
                event = self.safe_decode_job(job_id, data)
                try:
                    self.process_event(job_id, event, beanstalk)
                except (ClientException, OioNetworkException) as exc:
                    self.logger.warn("Burying event %s (%s): %s", job_id,
                                     event.get('event'), exc)
                    beanstalk.bury(job_id)
                except ExplicitBury:
                    self.logger.info("Burying event %s (%s)", job_id,
                                     event.get('event'))
                    beanstalk.bury(job_id)
                except Exception:
                    self.logger.exception("Burying event %s: %s", job_id,
                                          event)
                    beanstalk.bury(job_id)
        except StopServe:
            pass

    def process_event(self, job_id, event, beanstalk):
        handler = self.get_handler(event)
        if not handler:
            self.logger.warn('no handler found for %r' % event)
            beanstalk.delete(job_id)
            return

        def cb(status, msg):
            if is_success(status):
                beanstalk.delete(job_id)
            elif is_error(status):
                self.logger.warn(
                    'event %s handling failure (release with delay): %s',
                    event['job_id'], msg)
                beanstalk.release(job_id, delay=RELEASE_DELAY)

        handler(event, cb)

    def get_handler(self, event):
        return self.handlers.get(event.get('event'), None)

    def acct_addr(self):
        if not self._acct_addr or self.acct_refresh():
            acct_instance = self.cs.next_instance(ACCOUNT_SERVICE)
            self._acct_addr = acct_instance.get('addr')
            self.acct_update = time.time()
        return self._acct_addr

    def acct_refresh(self):
        return (time.time() - self.acct_update) > self.acct_refresh_interval
Beispiel #22
0
 def cs(self):
     if not self._cs:
         self._cs = ConscienceClient(self.conf, logger=self.logger)
     return self._cs
Beispiel #23
0
 def __init__(self, conf):
     self.conf = conf
     self.logger = get_logger(conf)
     self.cs_client = ConscienceClient(conf)
     self.container_client = ContainerClient(conf)
     self.ns_info = self.cs_client.info()
Beispiel #24
0
class XcuteOrchestrator(object):

    DEFAULT_DISPATCHER_TIMEOUT = 2
    DEFAULT_REFRESH_TIME_BEANSTALKD_WORKERS = 30
    DEFAULT_MAX_JOBS_PER_BEANSTALKD = 1024

    def __init__(self, conf, logger=None):
        self.conf = conf
        self.logger = logger or get_logger(self.conf)
        self.backend = XcuteBackend(self.conf, logger=self.logger)
        self.conscience_client = ConscienceClient(self.conf)

        self.orchestrator_id = self.conf.get('orchestrator_id')
        if not self.orchestrator_id:
            raise ValueError('Missing orchestrator ID')
        self.logger.info('Using orchestrator ID: %s', self.orchestrator_id)

        self.beanstalkd_workers_tube = self.conf.get('beanstalkd_workers_tube')
        if not self.beanstalkd_workers_tube:
            raise ValueError('Missing beanstalkd workers tube')
        self.logger.info('Using beanstalkd workers tube: %s',
                         self.beanstalkd_workers_tube)

        self.beanstalkd_reply_addr = self.conf.get('beanstalkd_reply_addr')
        if not self.beanstalkd_reply_addr:
            raise ValueError('Missing beanstalkd reply address')
        self.beanstalkd_reply_tube = self.conf.get(
            'beanstalkd_reply_tube', self.beanstalkd_workers_tube + '.reply')
        self.logger.info('Using beanstalkd reply : %s %s',
                         self.beanstalkd_reply_addr,
                         self.beanstalkd_reply_tube)

        self.refresh_time_beanstalkd_workers = int_value(
            self.conf.get('refresh_time_beanstalkd_workers'),
            self.DEFAULT_REFRESH_TIME_BEANSTALKD_WORKERS)

        self.max_jobs_per_beanstalkd = int_value(
            self.conf.get('max_jobs_per_beanstalkd'),
            self.DEFAULT_MAX_JOBS_PER_BEANSTALKD)

        self.running = True
        self.beanstalkd_workers = dict()

        self.refresh_beanstalkd_workers_thread = None
        self.listen_beanstalkd_reply_thread = None
        self.dispatch_tasks_threads = dict()
        self.compute_total_tasks_threads = dict()

    def handle_backend_errors(self, func, *args, **kwargs):
        while True:
            try:
                return func(*args, **kwargs), None
            except (RedisConnectionError, RedisTimeoutError) as exc:
                self.logger.warn('Fail to communicate with redis: %s', exc)
                if not self.running:
                    return None, exc
                sleep(1)

    def safe_run_forever(self):
        try:
            self.run_forever()
        except Exception as exc:
            self.logger.exception('Fail to run forever: %s', exc)
            self.exit_gracefully()

        if self.refresh_beanstalkd_workers_thread:
            self.refresh_beanstalkd_workers_thread.join()
        if self.listen_beanstalkd_reply_thread:
            self.listen_beanstalkd_reply_thread.join()
        for dispatch_tasks_thread in self.dispatch_tasks_threads.values():
            dispatch_tasks_thread.join()
        for compute_total_tasks_thread \
                in self.compute_total_tasks_threads.values():
            compute_total_tasks_thread.join()
        self.logger.info('Exited running thread')

    def run_forever(self):
        """
            Take jobs from the queue and spawn threads to dispatch them
        """

        # gather beanstalkd info
        self.refresh_beanstalkd_workers_thread = threading.Thread(
            target=self.refresh_beanstalkd_workers_forever)
        self.refresh_beanstalkd_workers_thread.start()

        # start processing replies
        self.listen_beanstalkd_reply_thread = threading.Thread(
            target=self.listen_beanstalkd_reply_forever)
        self.listen_beanstalkd_reply_thread.start()

        if not self.running:
            return

        # restart running jobs
        self.logger.debug('Look for unfinished jobs')
        orchestrator_jobs, exc = self.handle_backend_errors(
            self.backend.list_orchestrator_jobs, self.orchestrator_id)
        if exc is not None:
            self.logger.warn(
                'Unable to list running jobs for this orchestrator: %s', exc)
            return
        for job_info in orchestrator_jobs:
            if not self.running:
                return
            self.safe_handle_running_job(job_info)

        # run next jobs
        while self.running:
            sleep(1)
            job_info, exc = self.handle_backend_errors(self.backend.run_next,
                                                       self.orchestrator_id)
            if exc is not None:
                self.logger.warn('Unable to run next job: %s', exc)
                return
            if not job_info:
                continue
            self.safe_handle_running_job(job_info)

    def safe_handle_running_job(self, job_info):
        try:
            job_id = job_info['job']['id']
            job_type = job_info['job']['type']
            self.logger.info('Run job %s: %s', job_id, job_type)
            self.handle_running_job(job_id, job_type, job_info)
        except Exception as exc:
            self.logger.exception('Failed to run job %s: %s', job_id, exc)
            _, exc = self.handle_backend_errors(self.backend.fail, job_id)
            if exc is not None:
                self.logger.warn(
                    '[job_id=%s] Job has not been updated '
                    'with the failure: %s', job_id, exc)

    def handle_running_job(self, job_id, job_type, job_info):
        """
        First launch the computation of total number of tasks,
        then launch the dispatchnig of all tasks across the platform.
        """
        if job_info['tasks']['all_sent']:
            self.logger.info('[job_id=%s] All tasks are already sent', job_id)
            return

        job_class = JOB_TYPES[job_type]
        job = job_class(self.conf, logger=self.logger)

        if job_info['tasks']['total'] == 0 \
                and job_info['tasks']['is_total_temp'] \
                and job_info['tasks']['sent'] == 0 \
                and not job_info['tasks']['all_sent']:
            job.prepare(job_info['config']['params'])

        if job_id in self.compute_total_tasks_threads:
            self.logger.info(
                '[job_id=%s] Already computing the total number of tasks',
                job_id)
        elif job_info['tasks']['is_total_temp']:
            compute_total_tasks_thread = threading.Thread(
                target=self.safe_compute_total_tasks,
                args=(job_id, job_type, job_info, job))
            compute_total_tasks_thread.start()
            self.compute_total_tasks_threads[job_id] = \
                compute_total_tasks_thread
        else:
            self.logger.info(
                '[job_id=%s] The total number of tasks is already computed',
                job_id)

        if job_id in self.dispatch_tasks_threads:
            self.logger.warning('[job_id=%s] Already dispatching the tasks',
                                job_id)
        else:
            dispatch_tasks_thread = threading.Thread(
                target=self.safe_dispatch_tasks,
                args=(job_id, job_type, job_info, job))
            dispatch_tasks_thread.start()
            self.dispatch_tasks_threads[job_id] = dispatch_tasks_thread

    def safe_dispatch_tasks(self, job_id, job_type, job_info, job):
        """
        Dispatch all tasks across the platform
        and update the backend.
        """
        try:
            self.logger.info('[job_id=%s] Start to dispatch tasks', job_id)
            self.dispatch_tasks(job_id, job_type, job_info, job)
            self.logger.info('[job_id=%s] Finish to dispatch tasks', job_id)
        except Exception as exc:
            self.logger.exception('[job_id=%s] Fail to dispatch tasks: %s',
                                  job_id, exc)
            _, exc = self.handle_backend_errors(self.backend.fail, job_id)
            if exc is not None:
                self.logger.warn(
                    '[job_id=%s] Job has not been updated '
                    'with the failure: %s', job_id, exc)
        finally:
            del self.dispatch_tasks_threads[job_id]

    def adapt_speed(self, job_id, job_config, last_check, period=300):
        """
        Pause and/or reduce the rate of creation of new tasks in case
        the number of pending tasks is too high.
        """
        if last_check is not None \
                and time.time() < last_check['last'] + period:
            return last_check

        waiting_time = 0
        while True:
            for _ in range(waiting_time):
                if not self.running:
                    break
                sleep(1)

            if not self.running:
                return last_check

            job_info, exc = self.handle_backend_errors(
                self.backend.get_job_info, job_id)
            if exc is not None:
                self.logger.warning(
                    '[job_id=%s] Unable to retrieve job info '
                    'and adapt the speed: %s', job_id, exc)
                return last_check
            if job_info['job']['status'] != XcuteJobStatus.RUNNING \
                    or job_info['job']['request_pause']:
                return last_check

            job_mtime = job_info['job']['mtime']
            max_tasks_per_second = job_info['config']['tasks_per_second']
            max_tasks_batch_size = job_info['config']['tasks_batch_size']
            tasks_processed = job_info['tasks']['processed']
            pending_tasks = job_info['tasks']['sent'] - tasks_processed

            if last_check is None:  # Initialize
                last_check = dict()
                last_check['last'] = job_mtime
                last_check['processed'] = tasks_processed
                if pending_tasks / max_tasks_per_second >= period:
                    waiting_time = period
                    self.logger.error(
                        '[job_id=%s] Too many pending tasks '
                        'for the next %d seconds: %d (%d tasks/second); '
                        'wait %d seconds and check again', job_id, period,
                        pending_tasks, max_tasks_per_second, waiting_time)
                    continue
                return last_check

            tasks_processed_in_period = tasks_processed \
                - last_check['processed']
            if tasks_processed_in_period == 0:
                last_check['last'] = job_mtime
                last_check['processed'] = tasks_processed
                waiting_time = period
                self.logger.error(
                    '[job_id=%s] No task processed for the last %d seconds; '
                    'wait %d seconds and check again', job_id, period,
                    waiting_time)
                continue

            elapsed = job_mtime - last_check['last']
            actual_tasks_per_second = tasks_processed_in_period \
                / float(elapsed)
            if pending_tasks / actual_tasks_per_second >= period:
                last_check['last'] = job_mtime
                last_check['processed'] = tasks_processed
                waiting_time = period
                self.logger.error(
                    '[job_id=%s] Too many pending tasks '
                    'for the next %d seconds: %d (%f tasks/second) ; '
                    'wait %d seconds and check again', job_id, period,
                    pending_tasks, actual_tasks_per_second, waiting_time)
                continue

            current_tasks_per_second = job_config['tasks_per_second']
            current_tasks_batch_size = job_config['tasks_batch_size']
            diff_tasks_per_second = \
                current_tasks_per_second - actual_tasks_per_second
            new_tasks_per_second = None
            if diff_tasks_per_second < -0.5:  # Too fast to process tasks
                # The queues need to have a few tasks in advance.
                # Continue at this speed to allow the queues to empty.
                if actual_tasks_per_second > max_tasks_per_second:
                    self.logger.warning(
                        '[job_id=%s] Speeding: %f tasks/second (max: %d)',
                        job_id, actual_tasks_per_second, max_tasks_per_second)
                else:
                    self.logger.info(
                        '[job_id=%s] Speeding: %f tasks/second '
                        '(adapted max: %d)', job_id, actual_tasks_per_second,
                        current_tasks_per_second)
            elif diff_tasks_per_second <= 0.5:  # Good speed to process tasks
                if current_tasks_per_second < max_tasks_per_second:
                    new_tasks_per_second = current_tasks_per_second + 1
                    self.logger.info(
                        '[job_id=%s] Slowly climb up to maximum speed', job_id)
                # else:
                #    Tout marche bien navette !
            else:  # Too slow to process tasks
                new_tasks_per_second = int(math.floor(actual_tasks_per_second))
                self.logger.warning(
                    '[job_id=%s] The task processing speed is too slow: '
                    '%f tasks/second', job_id, actual_tasks_per_second)

            last_check['last'] = job_mtime
            last_check['processed'] = tasks_processed
            if new_tasks_per_second is not None:
                new_tasks_per_second = max(new_tasks_per_second, 1)
                new_tasks_batch_size = min(max_tasks_batch_size,
                                           new_tasks_per_second)
                job_config['tasks_per_second'] = new_tasks_per_second
                job_config['tasks_batch_size'] = new_tasks_batch_size
                self.logger.info(
                    '[job_id=%s] Adapt the speed: %d -> %d tasks/second '
                    '(%d -> %d tasks/batch)', job_id, current_tasks_per_second,
                    new_tasks_per_second, current_tasks_batch_size,
                    new_tasks_batch_size)
            return last_check

    def dispatch_tasks(self, job_id, job_type, job_info, job):
        job_config = job_info['config']
        job_params = job_config['params']
        last_task_id = job_info['tasks']['last_sent']

        job_tasks = job.get_tasks(job_params, marker=last_task_id)
        beanstalkd_workers = self.get_beanstalkd_workers()

        last_check = self.adapt_speed(job_id, job_config, None)
        tasks_per_second = job_config['tasks_per_second']
        tasks_batch_size = job_config['tasks_batch_size']
        batch_per_second = tasks_per_second / float(tasks_batch_size)

        tasks_run_time = 0
        # The backend must have the tasks in order
        # to know the last task sent
        tasks = OrderedDict()
        for task_id, task_payload in job_tasks:
            if not self.running:
                break

            tasks[task_id] = task_payload
            if len(tasks) < tasks_batch_size:
                continue

            tasks_run_time = ratelimit(tasks_run_time, batch_per_second)

            # Make sure that the sent tasks will be saved
            # before being processed
            exc = None
            sent = False
            while not sent:
                (job_status, old_last_sent), exc = self.handle_backend_errors(
                    self.backend.update_tasks_sent, job_id, tasks.keys())
                if exc is not None:
                    self.logger.warn(
                        '[job_id=%s] Job could not update '
                        'the sent tasks: %s', job_id, exc)
                    break
                sent = self.dispatch_tasks_batch(beanstalkd_workers, job_id,
                                                 job_type, job_config, tasks)
                if not sent:
                    self.logger.warn(
                        '[job_id=%s] Job aborting the last sent tasks', job_id)
                    job_status, exc = self.handle_backend_errors(
                        self.backend.abort_tasks_sent, job_id, tasks.keys(),
                        old_last_sent)
                    if exc is not None:
                        self.logger.warn(
                            '[job_id=%s] Job could not abort '
                            'the last sent tasks: %s', job_id, exc)
                        break
                if job_status == XcuteJobStatus.PAUSED:
                    self.logger.info('Job %s is paused', job_id)
                    return

                if not self.running:
                    break
                sleep(1)

            if exc is not None and not self.running:
                break
            tasks.clear()

            # After each tasks batch sent, adapt the sending speed
            # according to the processing speed.
            last_check = self.adapt_speed(job_id, job_config, last_check)
            tasks_per_second = job_config['tasks_per_second']
            tasks_batch_size = job_config['tasks_batch_size']
            batch_per_second = tasks_per_second / float(tasks_batch_size)
        else:
            # Make sure that the sent tasks will be saved
            # before being processed
            sent = False
            while not sent:
                (job_status, old_last_sent), exc = self.handle_backend_errors(
                    self.backend.update_tasks_sent,
                    job_id,
                    tasks.keys(),
                    all_tasks_sent=True)
                if exc is not None:
                    self.logger.warn(
                        '[job_id=%s] Job could not update '
                        'the sent tasks: %s', job_id, exc)
                    break
                if tasks:
                    sent = self.dispatch_tasks_batch(beanstalkd_workers,
                                                     job_id, job_type,
                                                     job_config, tasks)
                else:
                    sent = True
                if not sent:
                    self.logger.warn(
                        '[job_id=%s] Job aborting the last sent tasks', job_id)
                    job_status, exc = self.handle_backend_errors(
                        self.backend.abort_tasks_sent, job_id, tasks.keys(),
                        old_last_sent)
                    if exc is not None:
                        self.logger.warn(
                            '[job_id=%s] Job could not abort '
                            'the last sent tasks: %s', job_id, exc)
                        break
                else:
                    if job_status == XcuteJobStatus.FINISHED:
                        self.logger.info('Job %s is finished', job_id)

                    self.logger.info('Finished dispatching job (job_id=%s)',
                                     job_id)
                    return
                if job_status == XcuteJobStatus.PAUSED:
                    self.logger.info('Job %s is paused', job_id)
                    return

                if not self.running:
                    break
                sleep(1)

        self.logger.warn('[job_id=%s] Job was stopped before it was finished',
                         job_id)

        _, exc = self.handle_backend_errors(self.backend.free, job_id)
        if exc is not None:
            self.logger.warn('[job_id=%s] Job has not been freed: %s', job_id,
                             exc)

    def dispatch_tasks_batch(self, beanstalkd_workers, job_id, job_type,
                             job_config, tasks):
        """
            Try sending a task until it's ok
        """

        beanstalkd_payload = self.make_beanstalkd_payload(
            job_id, job_type, job_config, tasks)

        if len(beanstalkd_payload) > 2**16:
            raise ValueError('Task payload is too big (length=%s)' %
                             len(beanstalkd_payload))

        # max 2 minutes per task
        ttr = len(tasks) * DEFAULT_TTR

        i = 0
        for beanstalkd_worker in beanstalkd_workers:
            if not self.running:
                return False
            i += 1
            if beanstalkd_worker is None:
                # Try for at least 30 seconds
                if i > 30:
                    break
                continue

            try:
                beanstalkd_worker.put(beanstalkd_payload, ttr=ttr)
                self.logger.debug('[job_id=%s] Tasks sent to %s: %s', job_id,
                                  beanstalkd_worker.addr, str(tasks))
                return True
            except Exception as exc:
                self.logger.warn('[job_id=%s] Fail to send beanstalkd job: %s',
                                 job_id, exc)
                # TODO(adu): We could be more lenient
                # and wait for a few errors in a row
                # to happen before marking it as broken.
                beanstalkd_worker.is_broken = True
            sleep(1)
        return False

    def make_beanstalkd_payload(self, job_id, job_type, job_config, tasks):
        return json.dumps({
            'event': EventTypes.XCUTE_TASKS,
            'data': {
                'job_id': job_id,
                'job_type': job_type,
                'job_config': job_config,
                'tasks': tasks,
                'beanstalkd_reply': {
                    'addr': self.beanstalkd_reply_addr,
                    'tube': self.beanstalkd_reply_tube,
                },
            }
        })

    def safe_compute_total_tasks(self, job_id, job_type, job_info, job):
        """
        Compute the total number of tasks
        and update the backend.
        """
        try:
            self.logger.info(
                '[job_id=%s] Start to compute the total number of tasks',
                job_id)
            self.compute_total_tasks(job_id, job_type, job_info, job)
            self.logger.info(
                '[job_id=%s] Finish to compute the total number of tasks',
                job_id)
        except Exception as exc:
            self.logger.exception(
                '[job_id=%s] Fail to compute the total number of tasks: %s',
                job_id, exc)
        finally:
            del self.compute_total_tasks_threads[job_id]

    def compute_total_tasks(self, job_id, job_type, job_info, job):
        job_params = job_info['config']['params']
        total_marker = job_info['tasks']['total_marker']

        tasks_counter = job.get_total_tasks(job_params, marker=total_marker)
        for total_marker, tasks_incr in tasks_counter:
            stop, exc = self.handle_backend_errors(
                self.backend.incr_total_tasks, job_id, total_marker,
                tasks_incr)
            if exc is not None:
                self.logger.warn(
                    '[job_id=%s] Job has not been updated '
                    'with total tasks: %s', job_id, exc)
                return
            if stop or not self.running:
                return

        total_tasks, exc = self.handle_backend_errors(
            self.backend.total_tasks_done, job_id)
        if exc is not None:
            self.logger.warn(
                '[job_id=%s] Job has not been updated '
                'with last total tasks: %s', job_id, exc)
            return
        self.logger.info('[job_id=%s] %s estimated tasks', job_id, total_tasks)

    def listen_beanstalkd_reply_forever(self):
        """
            Process this orchestrator's job replies
        """

        self.logger.info('Connecting to the reply beanstalkd')

        while self.running:
            try:
                listener = BeanstalkdListener(addr=self.beanstalkd_reply_addr,
                                              tube=self.beanstalkd_reply_tube,
                                              logger=self.logger)

                break
            except ConnectionError:
                self.logger.error('Failed to connect to the reply beanstalkd')

            sleep(5)

        self.logger.info('Listening to replies on %s (tube=%s)',
                         self.beanstalkd_reply_addr,
                         self.beanstalkd_reply_tube)

        # keep the job results in memory
        while self.running:
            connection_error = self.listen_loop(listener)

            # in case of a beanstalkd connection error
            # sleep to avoid spamming
            if connection_error:
                sleep(2)

        self.logger.info('Exited listening thread')

    def listen_loop(self, listener):
        """
            One iteration of the listening loop
        """

        connection_error = False
        try:
            replies = listener.fetch_job(
                self.process_reply, timeout=self.DEFAULT_DISPATCHER_TIMEOUT)

            # to force the execution of process_reply
            # if there were no replies, consider it as a connection error
            connection_error = len(list(replies)) == 0

        except OioTimeout:
            pass

        return connection_error

    def process_reply(self, beanstalkd_job_id, encoded_reply):
        reply = json.loads(encoded_reply)

        job_id = reply['job_id']
        task_ids = reply['task_ids']
        task_results = reply['task_results']
        task_errors = reply['task_errors']

        self.logger.debug('Tasks processed (job_id=%s): %s', job_id, task_ids)

        try:
            finished, exc = self.handle_backend_errors(
                self.backend.update_tasks_processed, job_id, task_ids,
                task_errors, task_results)
            if exc is None:
                if finished:
                    self.logger.info('Job %s is finished', job_id)
            else:
                self.logger.warn(
                    '[job_id=%s] Job has not been updated '
                    'with the processed tasks: %s', job_id, exc)
        except Exception:
            self.logger.exception('Error processing reply')

        yield None

    def refresh_beanstalkd_workers_forever(self):
        """
        Refresh beanstalkd workers by looking at the score,
        existing tubes and tube statistics.
        """
        while self.running:
            try:
                beanstalkd_workers = self._find_beanstalkd_workers()
            except Exception as exc:
                self.logger.error('Fail to find beanstalkd workers: %s', exc)
                # TODO(adu): We could keep trying to send jobs
                # to the beanstalkd we already found.
                # But we need the score to know how to dispatch the tasks...
                beanstalkd_workers = dict()

            old_beanstalkd_workers_addr = set(self.beanstalkd_workers.keys())
            new_beanstalkd_workers_addr = set(beanstalkd_workers.keys())

            added_beanstalkds = new_beanstalkd_workers_addr \
                - old_beanstalkd_workers_addr
            for beanstalkd_addr in added_beanstalkds:
                self.logger.info('Add beanstalkd %s' % beanstalkd_addr)
                beanstalkd = beanstalkd_workers[beanstalkd_addr]
                beanstalkd.use(self.beanstalkd_workers_tube)

            removed_beanstalkds = old_beanstalkd_workers_addr \
                - new_beanstalkd_workers_addr
            for beanstalkd_addr in removed_beanstalkds:
                self.logger.info('Remove beanstalkd %s' % beanstalkd_addr)

            self.logger.info('Refresh beanstalkd workers')
            self.beanstalkd_workers = beanstalkd_workers

            for _ in range(self.refresh_time_beanstalkd_workers):
                if not self.running:
                    break
                sleep(1)

        self.logger.info('Exited beanstalkd workers thread')

    def _find_beanstalkd_workers(self):
        """
        Find beanstalkd workers by looking at the score,
        existing tubes and tube statistics.
        """
        all_beanstalkd = self.conscience_client.all_services('beanstalkd')

        beanstalkd_workers = dict()
        for beanstalkd_info in all_beanstalkd:
            try:
                beanstalkd = self._check_beanstalkd_worker(beanstalkd_info)
                if not beanstalkd:
                    continue
                beanstalkd_workers[beanstalkd.addr] = beanstalkd
            except Exception as exc:
                self.logger.error('Fail to check beanstalkd: %s', exc)
        return beanstalkd_workers

    def _check_beanstalkd_worker(self, beanstalkd_info):
        """
        Check beanstalkd worker by looking at the score,
        existing tubes and tube statistics.
        """
        beanstalkd_addr = 'beanstalk://' + beanstalkd_info['addr']
        beanstalkd_score = beanstalkd_info['score']
        if beanstalkd_score == 0:
            self.logger.debug('Ignore beanstalkd %s: score=0', beanstalkd_addr)
            return None

        beanstalkd = self.beanstalkd_workers.get(beanstalkd_addr)
        if not beanstalkd:
            beanstalkd = Beanstalk.from_url(beanstalkd_addr)
            beanstalkd.addr = beanstalkd_addr

        beanstalkd_tubes = beanstalkd.tubes()
        if self.beanstalkd_workers_tube not in beanstalkd_tubes:
            self.logger.debug(
                'Ignore beanstalkd %s: '
                'No worker has ever listened to the tube %s', beanstalkd_addr,
                self.beanstalkd_workers_tube)
            return None

        current_stats = beanstalkd.stats_tube(self.beanstalkd_workers_tube)
        beanstalkd_jobs_ready = current_stats['current-jobs-ready']
        if beanstalkd_jobs_ready > 0:
            beanstalkd_jobs_reserved = current_stats['current-jobs-reserved']
            if beanstalkd_jobs_reserved <= 0:
                self.logger.warn(
                    'Ignore beanstalkd %s: The worker doesn\'t process task '
                    '(current-jobs-ready=%d, current-jobs-reserved=%d)',
                    beanstalkd_addr, beanstalkd_jobs_ready,
                    beanstalkd_jobs_reserved)
                return None

            if beanstalkd_jobs_ready >= self.max_jobs_per_beanstalkd:
                self.logger.warn(
                    'Ignore beanstalkd %s: The queue is full '
                    '(current-jobs-ready=%d, current-jobs-reserved=%d)',
                    beanstalkd_addr, beanstalkd_jobs_ready,
                    beanstalkd_jobs_reserved)
                return None

        if hasattr(beanstalkd, 'is_broken') and beanstalkd.is_broken:
            self.logger.info(
                'Beanstalkd %s was broken, and now it\'s coming back',
                beanstalkd_addr)
        beanstalkd.is_broken = False

        # Favor the workers with a good score
        # 50% -> beanstalkd score
        worker_score = beanstalkd_score * 50. / 100.
        # 50% -> beanstalkd tube size
        worker_score += 50 - (beanstalkd_jobs_ready * 50. /
                              self.max_jobs_per_beanstalkd)
        beanstalkd.occurrence = int(math.ceil(worker_score / 10.))

        self.logger.debug(
            'Give the green light to beanstalkd %s (worker_score=%d)',
            beanstalkd_addr, worker_score)
        return beanstalkd

    def get_beanstalkd_workers(self):
        """
            Yield beanstalkd workers following a loadbalancing strategy
        """

        beanstalkd_workers_id = None
        beanstalkd_workers = list()
        while True:
            if not self.beanstalkd_workers:
                self.logger.info('No beanstalkd worker available')
                yield None
                sleep(1)
                continue

            if id(self.beanstalkd_workers) != beanstalkd_workers_id:
                beanstalkd_workers_id = id(self.beanstalkd_workers)
                beanstalkd_workers = list()
                for beanstalkd in self.beanstalkd_workers.values():
                    for _ in range(beanstalkd.occurrence):
                        beanstalkd_workers.append(beanstalkd)

            # Shuffle to not have the same suite for all jobs
            random.shuffle(beanstalkd_workers)

            yielded = False
            for beanstalkd_worker in beanstalkd_workers:
                if id(self.beanstalkd_workers) != beanstalkd_workers_id:
                    break
                if beanstalkd_worker.is_broken:
                    continue
                yield beanstalkd_worker
                yielded = True
            else:
                if not yielded:
                    self.logger.info(
                        'All beanstalkd workers available are broken')
                    yield None
                    sleep(1)

    def exit_gracefully(self, *args, **kwargs):
        if self.running:
            self.logger.info('Exiting gracefully')
            self.running = False
        else:
            self.logger.info('Already exiting gracefully')
Beispiel #25
0
    def __init__(self, conf, tool):
        super(_DistributedDispatcher, self).__init__(conf, tool)
        self.sending = None

        self.max_items_per_second = int_value(self.conf.get(
            'items_per_second'), self.tool.DEFAULT_ITEM_PER_SECOND)

        # All available beanstalkd
        conscience_client = ConscienceClient(self.conf)
        all_beanstalkd = conscience_client.all_services('beanstalkd')
        all_available_beanstalkd = dict()
        for beanstalkd in all_beanstalkd:
            if beanstalkd['score'] <= 0:
                continue
            all_available_beanstalkd[beanstalkd['addr']] = beanstalkd
        if not all_available_beanstalkd:
            raise OioException('No beanstalkd available')

        # Beanstalkd workers
        workers_tube = self.conf.get('distributed_beanstalkd_worker_tube') \
            or self.tool.DEFAULT_DISTRIBUTED_BEANSTALKD_WORKER_TUBE
        self.beanstalkd_workers = dict()
        for beanstalkd in locate_tube(all_available_beanstalkd.values(),
                                      workers_tube):
            beanstalkd_worker = BeanstalkdSender(
                beanstalkd['addr'], workers_tube, self.logger)
            self.beanstalkd_workers[beanstalkd['addr']] = beanstalkd_worker
            self.logger.info(
                'Beanstalkd %s using tube %s is selected as a worker',
                beanstalkd_worker.addr, beanstalkd_worker.tube)
        if not self.beanstalkd_workers:
            raise OioException('No beanstalkd worker available')
        nb_workers = len(self.beanstalkd_workers)
        if self.max_items_per_second > 0:
            # Max 5 seconds in advance
            queue_size_per_worker = self.max_items_per_second * 5 / nb_workers
        else:
            queue_size_per_worker = 1024
        for _, beanstalkd_worker in self.beanstalkd_workers.items():
            beanstalkd_worker.high_limit = queue_size_per_worker

        # Beanstalkd reply
        beanstalkd_reply = dict()
        try:
            local_services = conscience_client.local_services()
            for local_service in local_services:
                if local_service['type'] != 'beanstalkd':
                    continue
                beanstalkd = all_available_beanstalkd.get(
                    local_service['addr'])
                if beanstalkd is None:
                    continue
                if beanstalkd_reply \
                        and beanstalkd_reply['score'] >= beanstalkd['score']:
                    continue
                beanstalkd_reply = beanstalkd
        except Exception as exc:  # pylint: disable=broad-except
            self.logger.warning(
                'ERROR when searching for beanstalkd locally: %s', exc)
        if not beanstalkd_reply:
            self.logger.warn('No beanstalkd available locally')

            try:
                beanstalkd = conscience_client.next_instance('beanstalkd')
                beanstalkd_reply = all_available_beanstalkd[beanstalkd['addr']]
            except Exception as exc:  # pylint: disable=broad-except
                self.logger.warning(
                    'ERROR when searching for beanstalkd: %s', exc)
        beanstalkd_reply_addr = beanstalkd_reply['addr']

        # If the tube exists, another service must have already used this tube
        tube_reply = workers_tube + '.reply.' + str(time.time())
        tubes = Beanstalk.from_url(
            'beanstalk://' + beanstalkd_reply_addr).tubes()
        if tube_reply in tubes:
            raise OioException('Beanstalkd %s using tube %s is already used')

        self.beanstalkd_reply = BeanstalkdListener(
            beanstalkd_reply_addr, tube_reply, self.logger)
        self.logger.info(
            'Beanstalkd %s using tube %s is selected for the replies',
            self.beanstalkd_reply.addr, self.beanstalkd_reply.tube)
Beispiel #26
0
class ContentFactory(object):
    def __init__(self, conf):
        self.conf = conf
        self.logger = get_logger(conf)
        self.cs_client = ConscienceClient(conf)
        self.container_client = ContainerClient(conf)
        self.ns_info = self.cs_client.info()

    def _extract_datasec(self, stgpol_name):
        try:
            stgpol = self.ns_info["storage_policy"][stgpol_name]
        except KeyError:
            self.logger.error("Storage policy '%s' not found" % stgpol_name)
            raise InconsistentContent("Storage policy not found")

        stgclass_name, datasec_name, datatreat_name = stgpol.split(':')
        if datasec_name == 'NONE':
            return "DUP", {"nb_copy": "1", "distance": "0"}

        try:
            datasec = self.ns_info["data_security"][datasec_name]
        except KeyError:
            self.logger.error("Data security '%s' not found" % datasec_name)
            raise InconsistentContent("Data security not found")

        ds_type, ds_args = datasec.split(':')
        args = {}
        for arg in ds_args.split('|'):
            key, value = arg.split('=')
            args[key] = value

        return ds_type, args

    def get(self, container_id, content_id):
        try:
            meta, chunks = self.container_client.content_show(
                cid=container_id, content=content_id)
        except NotFound:
            raise ContentNotFound("Content %s/%s not found" %
                                  (container_id, content_id))

        pol_type, pol_args = self._extract_datasec(meta['policy'])

        if pol_type == "DUP":
            return DupContent(self.conf, container_id, meta, chunks, pol_args)
        elif pol_type == "RAIN":
            return RainContent(self.conf, container_id, meta, chunks, pol_args)

        raise InconsistentContent("Unknown storage policy")

    def new(self, container_id, path, size, policy):
        meta, chunks = self.container_client.content_prepare(cid=container_id,
                                                             path=path,
                                                             size=size,
                                                             stgpol=policy)

        pol_type, pol_args = self._extract_datasec(meta['policy'])

        if pol_type == "DUP":
            return DupContent(self.conf, container_id, meta, chunks, pol_args)
        elif pol_type == "RAIN":
            return RainContent(self.conf, container_id, meta, chunks, pol_args)

        raise InconsistentContent("Unknown storage policy")

    def change_policy(self, container_id, content_id, new_policy):
        old_content = self.get(container_id, content_id)
        if old_content.stgpol_name == new_policy:
            return old_content

        new_content = self.new(container_id, old_content.path,
                               old_content.length, new_policy)

        stream = old_content.download()
        new_content.upload(GeneratorIO(stream))
        # the old content is automatically deleted because the new content has
        # the same name (but not the same id)
        return new_content
Beispiel #27
0
class RawxDecommissionJob(XcuteRdirJob):

    JOB_TYPE = 'rawx-decommission'
    TASK_CLASS = RawxDecommissionTask

    DEFAULT_RAWX_TIMEOUT = 60.0
    DEFAULT_MIN_CHUNK_SIZE = 0
    DEFAULT_MAX_CHUNK_SIZE = 0
    DEFAULT_USAGE_TARGET = 0
    DEFAULT_USAGE_CHECK_INTERVAL = 60.0

    @classmethod
    def sanitize_params(cls, job_params):
        sanitized_job_params, _ = super(RawxDecommissionJob,
                                        cls).sanitize_params(job_params)

        # specific configuration
        service_id = job_params.get('service_id')
        if not service_id:
            raise ValueError('Missing service ID')
        sanitized_job_params['service_id'] = service_id

        sanitized_job_params['rawx_timeout'] = float_value(
            job_params.get('rawx_timeout'), cls.DEFAULT_RAWX_TIMEOUT)

        sanitized_job_params['min_chunk_size'] = int_value(
            job_params.get('min_chunk_size'), cls.DEFAULT_MIN_CHUNK_SIZE)

        sanitized_job_params['max_chunk_size'] = int_value(
            job_params.get('max_chunk_size'), cls.DEFAULT_MAX_CHUNK_SIZE)

        excluded_rawx = job_params.get('excluded_rawx')
        if excluded_rawx:
            excluded_rawx = excluded_rawx.split(',')
        else:
            excluded_rawx = list()
        sanitized_job_params['excluded_rawx'] = excluded_rawx

        sanitized_job_params['usage_target'] = int_value(
            job_params.get('usage_target'), cls.DEFAULT_USAGE_TARGET)

        sanitized_job_params['usage_check_interval'] = float_value(
            job_params.get('usage_check_interval'),
            cls.DEFAULT_USAGE_CHECK_INTERVAL)

        return sanitized_job_params, 'rawx/%s' % service_id

    def __init__(self, conf, logger=None):
        super(RawxDecommissionJob, self).__init__(conf, logger=logger)
        self.rdir_client = RdirClient(self.conf, logger=self.logger)
        self.conscience_client = ConscienceClient(self.conf,
                                                  logger=self.logger)

    def get_usage(self, service_id):
        services = self.conscience_client.all_services('rawx', full=True)
        for service in services:
            if service_id == service['tags'].get('tag.service_id',
                                                 service['addr']):
                return 100 - service['tags']['stat.space']
        raise ValueError('No rawx service this ID (%s)' % service_id)

    def get_tasks(self, job_params, marker=None):
        service_id = job_params['service_id']
        usage_target = job_params['usage_target']
        usage_check_interval = job_params['usage_check_interval']

        if usage_target > 0:
            now = time.time()
            current_usage = self.get_usage(service_id)
            if current_usage <= usage_target:
                self.logger.info(
                    'current usage %.2f%%: target already reached (%.2f%%)',
                    current_usage, usage_target)
                return
            last_usage_check = now

        chunk_infos = self.get_chunk_infos(job_params, marker=marker)
        for container_id, content_id, chunk_id, _ in chunk_infos:
            task_id = '|'.join((container_id, content_id, chunk_id))
            yield task_id, {
                'container_id': container_id,
                'content_id': content_id,
                'chunk_id': chunk_id
            }

            if usage_target <= 0:
                continue
            now = time.time()
            if now - last_usage_check < usage_check_interval:
                continue
            current_usage = self.get_usage(service_id)
            if current_usage > usage_target:
                last_usage_check = now
                continue
            self.logger.info('current usage %.2f%%: target reached (%.2f%%)',
                             current_usage, usage_target)
            return

    def get_total_tasks(self, job_params, marker=None):
        service_id = job_params['service_id']
        usage_target = job_params['usage_target']

        current_usage = self.get_usage(service_id)
        if current_usage <= usage_target:
            return

        kept_chunks_ratio = 1 - (usage_target / float(current_usage))
        chunk_infos = self.get_chunk_infos(job_params, marker=marker)
        i = 0
        for i, (container_id, content_id, chunk_id, _) \
                in enumerate(chunk_infos, 1):
            if i % 1000 == 0:
                yield ('|'.join((container_id, content_id, chunk_id)),
                       int(math.ceil(1000 * kept_chunks_ratio)))

        remaining = int(math.ceil(i % 1000 * kept_chunks_ratio))
        if remaining > 0:
            yield '|'.join((container_id, content_id, chunk_id)), remaining

    def get_chunk_infos(self, job_params, marker=None):
        service_id = job_params['service_id']
        rdir_fetch_limit = job_params['rdir_fetch_limit']
        rdir_timeout = job_params['rdir_timeout']

        chunk_infos = self.rdir_client.chunk_fetch(service_id,
                                                   timeout=rdir_timeout,
                                                   limit=rdir_fetch_limit,
                                                   start_after=marker)

        return chunk_infos
Beispiel #28
0
 def conscience(self):
     if not self._conscience:
         from oio.conscience.client import ConscienceClient
         self._conscience = ConscienceClient(self.conf,
                                             pool_manager=self.http_pool)
     return self._conscience
Beispiel #29
0
 def __init__(self, conf, logger=None):
     super(RawxDecommissionJob, self).__init__(conf, logger=logger)
     self.rdir_client = RdirClient(self.conf, logger=self.logger)
     self.conscience_client = ConscienceClient(self.conf,
                                               logger=self.logger)
Beispiel #30
0
 def __init__(self, conf, **kwargs):
     super(AccountClient, self).__init__(conf, **kwargs)
     self.cs = ConscienceClient(self.conf)
Beispiel #31
0
 def __init__(self, conf, **kwargs):
     super(AccountClient, self).__init__(conf, **kwargs)
     self.cs = ConscienceClient(self.conf)
Beispiel #32
0
 def cs(self):
     if not self._cs:
         self._cs = ConscienceClient(self.conf,
                                     logger=self.logger,
                                     pool_manager=self.rdir.pool_manager)
     return self._cs
Beispiel #33
0
class BlobClient(object):
    """A low-level client to rawx services."""

    def __init__(self, conf=None, perfdata=None,
                 logger=None, connection_pool=None, **kwargs):
        self.conf = conf
        self.perfdata = perfdata

        self.logger = logger or get_logger(self.conf)
        # FIXME(FVE): we do not target the same set of services,
        # we should use a separate connection pool for rawx services.
        self.http_pool = connection_pool or get_pool_manager(**kwargs)
        self.conscience_client = ConscienceClient(conf, logger=self.logger,
                                                  pool_manager=self.http_pool)

    def resolve_url(self, url):
        return self.conscience_client.resolve_url('rawx', url)

    @update_rawx_perfdata
    @ensure_request_id
    def chunk_put(self, url, meta, data, **kwargs):
        if not hasattr(data, 'read'):
            data = utils.GeneratorIO(data)
        chunk = {'url': self.resolve_url(url), 'pos': meta['chunk_pos']}
        # FIXME: ugly
        chunk_method = meta.get('chunk_method',
                                meta.get('content_chunkmethod'))
        storage_method = STORAGE_METHODS.load(chunk_method)
        checksum = meta['metachunk_hash' if storage_method.ec
                        else 'chunk_hash']
        writer = ReplicatedMetachunkWriter(
            meta, [chunk], FakeChecksum(checksum),
            storage_method, quorum=1, perfdata=self.perfdata)
        writer.stream(data, None)

    @update_rawx_perfdata
    @ensure_request_id
    def chunk_delete(self, url, **kwargs):
        resp = self.http_pool.request('DELETE', self.resolve_url(url),
                                      **kwargs)
        if resp.status != 204:
            raise exc.from_response(resp)
        return resp

    @ensure_request_id
    def chunk_delete_many(self, chunks, cid=None,
                          concurrency=PARALLEL_CHUNKS_DELETE,
                          **kwargs):
        """
        :rtype: `list` of either `urllib3.response.HTTPResponse`
            or `urllib3.exceptions.HTTPError`, with an extra "chunk"
            attribute.
        """
        headers = kwargs['headers'].copy()
        if cid is not None:
            # This is only to get a nice access log
            headers['X-oio-chunk-meta-container-id'] = cid
        timeout = kwargs.get('timeout')
        if not timeout:
            timeout = urllib3.Timeout(CHUNK_TIMEOUT)

        def __delete_chunk(chunk_):
            try:
                resp = self.http_pool.request(
                    "DELETE", self.resolve_url(chunk_['url']),
                    headers=headers, timeout=timeout)
                resp.chunk = chunk_
                return resp
            except urllib3.exceptions.HTTPError as ex:
                ex.chunk = chunk_
                return ex

        pile = GreenPile(concurrency)
        for chunk in chunks:
            pile.spawn(__delete_chunk, chunk)
        resps = [resp for resp in pile if resp]
        return resps

    @update_rawx_perfdata
    @ensure_headers
    @ensure_request_id
    def chunk_get(self, url, check_headers=True, **kwargs):
        """
        :keyword check_headers: when True (the default), raise FaultyChunk
            if a mandatory response header is missing.
        :returns: a tuple with a dictionary of chunk metadata and a stream
            to the chunk's data.
        """
        url = self.resolve_url(url)
        reader = ChunkReader([{'url': url}], READ_BUFFER_SIZE,
                             **kwargs)
        # This must be done now if we want to access headers
        stream = reader.stream()
        headers = extract_headers_meta(reader.headers, check=check_headers)
        return headers, stream

    @update_rawx_perfdata
    @ensure_request_id
    def chunk_head(self, url, **kwargs):
        """
        Perform a HEAD request on a chunk.

        :param url: URL of the chunk to request.
        :keyword xattr: when False, ask the rawx not to read
            extended attributes of the chunk.
        :keyword check_hash: when True, ask the rawx to validate
            checksum of the chunk.
        :returns: a `dict` with chunk metadata (empty when xattr is False).
        """
        _xattr = bool(kwargs.get('xattr', True))
        url = self.resolve_url(url)
        headers = kwargs['headers'].copy()
        headers[FETCHXATTR_HEADER] = _xattr
        if bool(kwargs.get('check_hash', False)):
            headers[CHECKHASH_HEADER] = True

        try:
            resp = self.http_pool.request(
                'HEAD', url, headers=headers)
        except urllib3.exceptions.HTTPError as ex:
            oio_exception_from_httperror(ex, reqid=headers[REQID_HEADER],
                                         url=url)
        if resp.status == 200:
            if not _xattr:
                return dict()
            return extract_headers_meta(resp.headers)
        else:
            raise exc.from_response(resp)

    @update_rawx_perfdata
    @ensure_request_id
    def chunk_copy(self, from_url, to_url, chunk_id=None, fullpath=None,
                   cid=None, path=None, version=None, content_id=None,
                   **kwargs):
        stream = None
        # Check source headers only when new fullpath is not provided
        kwargs['check_headers'] = not bool(fullpath)
        try:
            meta, stream = self.chunk_get(from_url, **kwargs)
            meta['oio_version'] = OIO_VERSION
            meta['chunk_id'] = chunk_id or to_url.split('/')[-1]
            meta['full_path'] = fullpath or meta['full_path']
            meta['container_id'] = cid or meta.get('container_id')
            meta['content_path'] = path or meta.get('content_path')
            # FIXME: the original keys are the good ones.
            # ReplicatedMetachunkWriter should be modified to accept them.
            meta['version'] = version or meta.get('content_version')
            meta['id'] = content_id or meta.get('content_id')
            meta['chunk_method'] = meta['content_chunkmethod']
            meta['policy'] = meta['content_policy']
            copy_meta = self.chunk_put(to_url, meta, stream, **kwargs)
            return copy_meta
        finally:
            if stream:
                stream.close()

    def _generate_fullchunk_copy(self, chunk, random_hex=60, **kwargs):
        """
        Generate new chunk URLs, by replacing the last `random_hex`
        characters of the original URLs by random hexadecimal digits.
        """
        rnd = ''.join(random.choice('0123456789ABCDEF')
                      for _ in range(random_hex))
        return chunk[:-random_hex] + rnd

    @update_rawx_perfdata
    @ensure_headers
    @ensure_request_id
    def chunk_link(self, target, link, fullpath, headers=None, **kwargs):
        hdrs = headers.copy()
        if link is None:
            link = self._generate_fullchunk_copy(target, **kwargs)
        hdrs['Destination'] = link
        hdrs[CHUNK_HEADERS['full_path']] = fullpath
        resp = self.http_pool.request('COPY', self.resolve_url(target),
                                      headers=hdrs)
        if resp.status != 201:
            raise exc.ChunkException(resp.status)
        return resp, link
Beispiel #34
0
 def __init__(self, conf):
     self.conf = conf
     self.logger = get_logger(conf)
     self.cs_client = ConscienceClient(conf)
     self.container_client = ContainerClient(conf)
     self.ns_info = self.cs_client.info()
Beispiel #35
0
class ServiceWatcher(object):
    def __init__(self, conf, service, **kwargs):
        self.conf = conf
        self.running = False

        for k in ['host', 'port', 'type']:
            if k not in service:
                raise Exception('Missing field "%s" in service configuration' %
                                k)
        self.name = '%s|%s|%s' % \
            (service['type'], service['host'], service['port'])

        self.service = service

        self.rise = int_value(self._load_item_config('rise'), 1)
        self.fall = int_value(self._load_item_config('fall'), 1)
        self.check_interval = float_value(
            self._load_item_config('check_interval'), 1)
        self.deregister_on_exit = true_value(
            self._load_item_config('deregister_on_exit', False))

        self.logger = get_logger(self.conf)
        self.pool_manager = get_pool_manager()
        self.cs = ConscienceClient(self.conf,
                                   pool_manager=self.pool_manager,
                                   logger=self.logger)
        # FIXME: explain that
        self.client = ProxyClient(self.conf,
                                  pool_manager=self.pool_manager,
                                  no_ns_in_url=True,
                                  logger=self.logger)
        self.last_status = False
        self.status = False
        self.failed = False
        self.service_definition = {
            'ns': self.conf['namespace'],
            'type': self.service['type'],
            'addr': get_addr(self.service['host'], self.service['port']),
            'score': 0,
            'tags': {}
        }
        if self.service.get('location', None):
            self.service_definition['tags']['tag.loc'] = \
                    self.service['location']
        if self.service.get('slots', None):
            self.service_definition['tags']['tag.slots'] = \
                    ','.join(self.service['slots'])
        self.service_checks = list()
        self.service_stats = list()
        self.init_checkers(service)
        self.init_stats(service)

    def _load_item_config(self, item, default=None):
        return self.service.get(item, self.conf.get(item)) or default

    def start(self):
        self.logger.info('watcher "%s" starting', self.name)
        self.running = True
        self.watch()
        self.running = False

    def stop(self):
        self.logger.info('watcher "%s" stopping', self.name)
        if self.deregister_on_exit:
            self.logger.info('watcher "%s" deregister service', self.name)
            try:
                self.status = False
                self.last_status = False
                self.register()
            except Exception as e:
                self.logger.warn('Failed to register service: %s', e)
        self.running = False

    def check(self):
        """Perform the registered checks on the service until any of
        them fails of the end of the list is reached."""
        self.status = True
        for service_check in (x for x in self.service_checks if self.running):
            if not service_check.service_status():
                self.status = False
                return

    def get_stats(self):
        """Update service definition with all configured stats"""
        if not self.status:
            return
        try:
            for stat in (x for x in self.service_stats if self.running):
                stats = stat.get_stats()
                self.service_definition['tags'].update(stats)
        except Exception as ex:
            self.logger.debug("get_stats error: %s", ex)
            self.status = False

    def register(self):
        # only accept a final zero/down-registration when exiting
        if not self.running and self.status:
            return

        # Alert when the status changes
        if self.status != self.last_status:
            if self.status:
                self.logger.info('service "%s" is now up', self.name)
            else:
                self.logger.warn('service "%s" is now down', self.name)
            self.last_status = self.status

        # Use a boolean so we can easily convert it to a number in conscience
        self.service_definition['tags']['tag.up'] = self.status
        try:
            self.cs.register(self.service['type'],
                             self.service_definition,
                             retries=False)
        except OioException as rqe:
            self.logger.warn("Failed to register service %s: %s",
                             self.service_definition["addr"], rqe)

    def watch(self):
        try:
            while self.running:
                self.check()
                self.get_stats()
                self.register()
                sleep(self.check_interval)
        except Exception as e:
            self.logger.warn('ERROR in watcher "%s"', e)
            self.failed = True
            raise e
        finally:
            self.logger.info('watcher "%s" stopped', self.name)

    def init_checkers(self, service):
        for check in service['checks']:
            check['host'] = check.get('host') or service['host']
            check['port'] = check.get('port') or service['port']
            check['name'] = check.get('name') or "%s|%s|%s" % \
                (check['type'], check['host'], check['port'])
            check['rise'] = check.get('rise') or self.rise
            check['fall'] = check.get('fall') or self.fall

            check['type'] = check.get('type') or 'unknown'
            service_check_class = CHECKERS_MODULES.get(check['type'])
            if not service_check_class:
                raise Exception(
                    'Invalid check type "%s", valid types: %s' %
                    (check['type'], ', '.join(CHECKERS_MODULES.keys())))
            service_check = service_check_class(self, check, self.logger)
            self.service_checks.append(service_check)

    def init_stats(self, service):
        """Initialize service stat fetchers"""
        self.service_stats[:] = []
        for stat in service['stats']:
            stat.setdefault('host', service['host'])
            stat.setdefault('port', service['port'])
            stat.setdefault('path', "")
            service_stat_class = STATS_MODULES.get(stat['type'], None)
            if not service_stat_class:
                raise Exception(
                    'Invalid stat type "%s", valid types: %s' %
                    (stat['type'], ', '.join(STATS_MODULES.keys())))
            service_stat = service_stat_class(self, stat, self.logger)
            self.service_stats.append(service_stat)
Beispiel #36
0
class ServiceWatcher(object):
    def __init__(self, conf, service, **kwargs):
        self.conf = conf

        for k in ['host', 'port', 'type']:
            if k not in service:
                raise Exception(
                    'Missing field "%s" in service configuration' % k)
        self.name = '%s|%s' % \
            (service['host'], service['port'])

        self.check_interval = float_value(conf.get('check_interval'), 1)
        self.service = service

        self.rise = int_value(conf.get('rise'), 1)
        self.fall = int_value(conf.get('fall'), 1)

        self.logger = get_logger(self.conf)
        self.cs = ConscienceClient(self.conf)
        self.init_checkers(service)
        self.last_status = False
        self.failed = False
        self.service_definition = {
            'ns': self.conf['namespace'],
            'type': self.service['type'],
            'addr': '%s:%s' % (self.service['host'], self.service['port']),
            'score': 0,
            'tags': {}}

    def start(self):
        self.logger.info('watcher "%s" starting', self.name)
        self.running = True
        self.watch()

    def stop(self):
        self.logger.info('watcher "%s" stopping', self.name)
        self.running = False

    def check(self):
        status = True
        for service_check in self.service_checks:
            if not service_check.service_status():
                status = False

        if status != self.last_status:
            if status:
                self.logger.info('service "%s" is now up', self.name)
            else:
                self.logger.warn('service "%s" is now down', self.name)
            self.last_status = status

    def register(self):
        tag_up = 'true' if self.last_status else 'false'
        self.service_definition['tags']['tag.up'] = tag_up
        self.cs.register(
            self.service['type'], self.service_definition)

    def watch(self):
        try:
            while self.running:
                self.check()
                self.register()
                sleep(self.check_interval)
        except Exception as e:
            self.logger.warn('ERROR in watcher "%s"', e)
            self.failed = True
            raise e
        finally:
            self.logger.info('watcher "%s" stopped', self.name)

    def init_checkers(self, service):
        self.service_checks = []
        for check in service['checks']:
            check['host'] = check.get('host') or service['host']
            check['port'] = check.get('port') or service['port']
            check['name'] = check.get('name') or "%s|%s|%s" % \
                (check['type'], check['host'], check['port'])
            check['rise'] = check.get('rise') or self.rise
            check['fall'] = check.get('fall') or self.fall

            check['type'] = check.get('type') or 'unknown'
            service_check_class = CHECKERS_MODULES.get(check['type'])
            if not service_check_class:
                raise Exception(
                    'Invalid check type "%s", valid types: %s' %
                    (check['type'], ', '.join(self.checkers.keys())))
            service_check = service_check_class(check, self.logger)

            self.service_checks.append(service_check)
Beispiel #37
0
class EventWorker(Worker):
    def init(self):
        eventlet.monkey_patch(os=False)
        self.session = requests.Session()
        self.cs = ConscienceClient(self.conf)
        self.rdir = RdirClient(self.conf)
        self._acct_addr = None
        self.acct_update = 0
        self.graceful_timeout = 1
        self.acct_refresh_interval = int_value(
            self.conf.get('acct_refresh_interval'), 60
        )
        self.concurrency = int_value(self.conf.get('concurrency'), 1000)
        self.acct_update = true_value(self.conf.get('acct_update', True))
        self.rdir_update = true_value(self.conf.get('rdir_update', True))
        super(EventWorker, self).init()

    def notify(self):
        """TODO"""
        pass

    def safe_decode_job(self, job):
        try:
            return json.loads(job)
        except Exception as e:
            self.logger.warn('ERROR decoding job "%s"', str(e.message))
            return None

    def run(self):
        queue_url = self.conf.get('queue_url', 'tcp://127.0.0.1:11300')
        self.beanstalk = Beanstalk.from_url(queue_url)

        gt = eventlet.spawn(
            self.handle)

        while self.alive:
            self.notify()
            try:
                eventlet.sleep(1.0)
            except AssertionError:
                self.alive = False
                break

        self.notify()
        try:
            with Timeout(self.graceful_timeout) as t:
                gt.kill(StopServe())
                gt.wait()
        except Timeout as te:
            if te != t:
                raise
            gt.kill()

    def handle(self):
        try:
            while True:
                job_id, data = self.beanstalk.reserve()
                try:
                    event = self.safe_decode_job(data)
                    if event:
                        self.process_event(event)
                    self.beanstalk.delete(job_id)
                except Exception:
                    self.logger.exception("ERROR handling event %s", job_id)
        except StopServe:
            self.logger.info('Stopping event handler')

    def process_event(self, event):
        handler = self.get_handler(event)
        if not handler:
            self.logger.warn("ERROR no handler found for event")
            # mark as success
            return True
        success = True
        try:
            handler(event)
        except Exception:
            success = False
        finally:
            return success

    def get_handler(self, event):
        event_type = event.get('event')
        if not event_type:
            return None

        if event_type == EventType.CONTAINER_PUT:
            return self.handle_container_put
        elif event_type == EventType.CONTAINER_DESTROY:
            return self.handle_container_destroy
        elif event_type == EventType.CONTAINER_UPDATE:
            return self.handle_container_update
        elif event_type == EventType.OBJECT_PUT:
            return self.handle_object_put
        elif event_type == EventType.OBJECT_DELETE:
            return self.handle_object_delete
        elif event_type == EventType.REFERENCE_UPDATE:
            return self.handle_reference_update
        elif event_type == EventType.CHUNK_PUT:
            return self.handle_chunk_put
        elif event_type == EventType.CHUNK_DELETE:
            return self.handle_chunk_delete
        elif event_type == EventType.PING:
            return self.handle_ping
        else:
            return None

    @property
    def acct_addr(self):
        if not self._acct_addr or self.acct_refresh():
            try:
                acct_instance = self.cs.next_instance(ACCOUNT_SERVICE)
                self._acct_addr = acct_instance.get('addr')
                self.acct_update = time.time()
            except Exception:
                self.logger.warn('Unable to find account instance')
        return self._acct_addr

    def acct_refresh(self):
        return (time.time() - self.acct_update) > self.acct_refresh_interval

    def handle_container_put(self, event):
        """
        Handle container creation.
        :param event:
        """
        self.logger.debug('worker handle container put')
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'mtime': mtime, 'name': name}
        self.session.post(uri, params={'id': account}, json=event)

    def handle_container_update(self, event):
        """
        Handle container update.
        :param event:
        """
        self.logger.debug('worker handle container update')
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = event.get('url').get('user')
        account = event.get('url').get('account')
        bytes_count = data.get('bytes-count', 0)
        object_count = data.get('object-count', 0)

        event = {
            'mtime': mtime,
            'name': name,
            'bytes': bytes_count,
            'objects': object_count
        }
        self.session.post(uri, params={'id': account}, json=event)

    def handle_container_destroy(self, event):
        """
        Handle container destroy.
        :param event:
        """
        self.logger.debug('worker handle container destroy')
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        dtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'dtime': dtime, 'name': name}
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_object_delete(self, event):
        """
        Handle object deletion.
        Delete the chunks of the object.
        :param event:
        """
        self.logger.debug('worker handle object delete')
        pile = GreenPile(PARALLEL_CHUNKS_DELETE)

        chunks = []

        for item in event.get('data'):
            if item.get('type') == 'chunks':
                chunks.append(item)
        if not len(chunks):
            self.logger.warn('No chunks found in event data')
            return

        def delete_chunk(chunk):
            resp = None
            try:
                with Timeout(CHUNK_TIMEOUT):
                    resp = self.session.delete(chunk['id'])
            except (Exception, Timeout) as e:
                self.logger.warn('error while deleting chunk %s "%s"',
                                 chunk['id'], str(e.message))
            return resp

        for chunk in chunks:
            pile.spawn(delete_chunk, chunk)

        resps = [resp for resp in pile if resp]

        for resp in resps:
            if resp.status_code == 204:
                self.logger.debug('deleted chunk %s' % resp.url)
            else:
                self.logger.warn('failed to delete chunk %s' % resp.url)

    def handle_object_put(self, event):
        """
        Handle object creation.
        TODO
        :param event:
        """
        self.logger.debug('worker handle object put')

    def handle_reference_update(self, event):
        """
        Handle reference update.
        TODO
        :param event
        """
        self.logger.debug('worker handle reference update')

    def handle_chunk_put(self, event):
        """
        Handle chunk creation.
        :param event
        """
        if not self.rdir_update:
            self.logger.debug('worker skip chunk creation')
            return

        self.logger.debug('worker handle chunk creation')

        when = event.get('when')
        data = event.get('data')
        volume_id = data.get('volume_id')
        del data['volume_id']
        container_id = data.get('container_id')
        del data['container_id']
        content_id = data.get('content_id')
        del data['content_id']
        chunk_id = data.get('chunk_id')
        del data['chunk_id']
        data['mtime'] = when
        self.rdir.chunk_push(volume_id, container_id, content_id, chunk_id,
                             **data)

    def handle_chunk_delete(self, event):
        """
        Handle chunk deletion.
        :param event
        """
        if not self.rdir_update:
            self.logger.debug('worker skip chunk deletion')
            return

        self.logger.debug('worker handle chunk deletion')

        data = event.get('data')
        volume_id = data.get('volume_id')
        container_id = data.get('container_id')
        content_id = data.get('content_id')
        chunk_id = data.get('chunk_id')
        self.rdir.chunk_delete(volume_id, container_id, content_id, chunk_id)

    def handle_ping(self, event):
        """
        Handle ping
        :param event
        """
        self.logger.debug('worker handle ping')
Beispiel #38
0
class EventWorker(Worker):
    def init(self):
        eventlet.monkey_patch(os=False)
        self.tube = self.conf.get("tube", DEFAULT_TUBE)
        self.session = requests.Session()
        self.cs = ConscienceClient(self.conf)
        self.rdir = RdirClient(self.conf)
        self._acct_addr = None
        self.acct_update = 0
        self.graceful_timeout = 1
        self.acct_refresh_interval = int_value(
            self.conf.get('acct_refresh_interval'), 60
        )
        self.acct_update = true_value(self.conf.get('acct_update', True))
        self.rdir_update = true_value(self.conf.get('rdir_update', True))
        if 'handlers_conf' not in self.conf:
            raise ValueError("'handlers_conf' path not defined in conf")
        self.handlers = loadhandlers(
            self.conf.get('handlers_conf'), evt_types, app=self)
        super(EventWorker, self).init()

    def notify(self):
        """TODO"""
        pass

    def safe_decode_job(self, job_id, data):
        try:
            env = json.loads(data)
            env['job_id'] = job_id
            return env
        except Exception as e:
            self.logger.warn('decoding job "%s"', str(e.message))
            return None

    def run(self):
        coros = []
        queue_url = self.conf.get('queue_url', '127.0.0.1:11300')
        concurrency = int_value(self.conf.get('concurrency'), 10)

        server_gt = greenthread.getcurrent()

        for i in range(concurrency):
            beanstalk = Beanstalk.from_url(queue_url)
            gt = eventlet.spawn(self.handle, beanstalk)
            gt.link(_eventlet_stop, server_gt, beanstalk)
            coros.append(gt)
            beanstalk, gt = None, None

        while self.alive:
            self.notify()
            try:
                eventlet.sleep(1.0)
            except AssertionError:
                self.alive = False
                break

        self.notify()
        try:
            with Timeout(self.graceful_timeout) as t:
                [c.kill(StopServe()) for c in coros]
                [c.wait() for c in coros]
        except Timeout as te:
            if te != t:
                raise
            [c.kill() for c in coros]

    def handle(self, beanstalk):
        conn_error = False
        try:
            if self.tube:
                beanstalk.use(self.tube)
                beanstalk.watch(self.tube)
            while True:
                try:
                    job_id, data = beanstalk.reserve()
                    if conn_error:
                        self.logger.warn("beanstalk reconnected")
                        conn_error = False
                except ConnectionError:
                    if not conn_error:
                        self.logger.warn("beanstalk connection error")
                        conn_error = True
                    eventlet.sleep(BEANSTALK_RECONNECTION)
                    continue
                try:
                    event = self.safe_decode_job(job_id, data)
                    self.process_event(job_id, event, beanstalk)
                except ConnectionError:
                    self.logger.warn(
                        "beanstalk connection error during processing")
                except Exception:
                    beanstalk.bury(job_id)
                    self.logger.exception("handling event %s (bury)", job_id)
        except StopServe:
            pass

    def process_event(self, job_id, event, beanstalk):
        handler = self.get_handler(event)
        if not handler:
            self.logger.warn('no handler found for %r' % event)
            beanstalk.delete(job_id)
            return

        def cb(status, msg):
            if is_success(status):
                beanstalk.delete(job_id)
            elif is_error(status):
                self.logger.warn('bury event %r' % event)
                beanstalk.bury(job_id)
            else:
                self.logger.warn('release event %r' % event)
                beanstalk.release(job_id)

        handler(event, cb)

    def get_handler(self, event):
        return self.handlers.get(event.get('event'), None)

    @property
    def acct_addr(self):
        if not self._acct_addr or self.acct_refresh():
            try:
                acct_instance = self.cs.next_instance(ACCOUNT_SERVICE)
                self._acct_addr = acct_instance.get('addr')
                self.acct_update = time.time()
            except Exception:
                self.logger.warn('Unable to find account instance')
        return self._acct_addr

    def acct_refresh(self):
        return (time.time() - self.acct_update) > self.acct_refresh_interval
Beispiel #39
0
class ServiceWatcher(object):
    def __init__(self, conf, service, **kwargs):
        self.conf = conf
        self.running = False

        for k in ['host', 'port', 'type']:
            if k not in service:
                raise Exception('Missing field "%s" in service configuration' %
                                k)
        self.name = '%s|%s|%s' % \
            (service['type'], service['host'], service['port'])

        self.service = service

        self.rise = int_value(self._load_item_config('rise'), 1)
        self.fall = int_value(self._load_item_config('fall'), 1)
        self.check_interval = float_value(
            self._load_item_config('check_interval'), 1)
        self.deregister_on_exit = true_value(
            self._load_item_config('deregister_on_exit', False))

        self.logger = get_logger(self.conf)
        self.session = requests.Session()
        self.cs = ConscienceClient(self.conf, session=self.session)
        self.client = Client(self.conf, session=self.session)
        self.last_status = False
        self.failed = False
        self.service_definition = {
            'ns': self.conf['namespace'],
            'type': self.service['type'],
            'addr': '%s:%s' % (self.service['host'], self.service['port']),
            'score': 0,
            'tags': {}
        }
        if self.service.get('location', None):
            self.service_definition['tags']['tag.loc'] = \
                    self.service['location']
        self.service_checks = list()
        self.service_stats = list()
        self.init_checkers(service)
        self.init_stats(service)

    def _load_item_config(self, item, default=None):
        return self.service.get(item, self.conf.get(item)) or default

    def start(self):
        self.logger.info('watcher "%s" starting', self.name)
        self.running = True
        self.watch()

    def stop(self):
        self.logger.info('watcher "%s" stopping', self.name)
        if self.deregister_on_exit:
            self.logger.info('watcher "%s" deregister service', self.name)
            try:
                self.last_status = False
                self.register()
            except Exception as e:
                self.logger.warn('Failed to register service: %s', e)
        self.running = False

    def check(self):
        status = True
        for service_check in self.service_checks:
            if not service_check.service_status():
                status = False

        if status != self.last_status:
            if status:
                self.logger.info('service "%s" is now up', self.name)
            else:
                self.logger.warn('service "%s" is now down', self.name)
            self.last_status = status

    def get_stats(self):
        """Update service definition with all configured stats"""
        if not self.last_status:
            return
        for stat in self.service_stats:
            stats = stat.get_stats()
            self.service_definition['tags'].update(stats)

    def register(self):
        # Use a boolean so we can easily convert it to a number in conscience
        self.service_definition['tags']['tag.up'] = self.last_status
        self.cs.register(self.service['type'], self.service_definition)

    def watch(self):
        try:
            while self.running:
                self.check()
                self.get_stats()
                self.register()
                sleep(self.check_interval)
        except Exception as e:
            self.logger.warn('ERROR in watcher "%s"', e)
            self.failed = True
            raise e
        finally:
            self.logger.info('watcher "%s" stopped', self.name)

    def init_checkers(self, service):
        for check in service['checks']:
            check['host'] = check.get('host') or service['host']
            check['port'] = check.get('port') or service['port']
            check['name'] = check.get('name') or "%s|%s|%s" % \
                (check['type'], check['host'], check['port'])
            check['rise'] = check.get('rise') or self.rise
            check['fall'] = check.get('fall') or self.fall

            check['type'] = check.get('type') or 'unknown'
            service_check_class = CHECKERS_MODULES.get(check['type'])
            if not service_check_class:
                raise Exception(
                    'Invalid check type "%s", valid types: %s' %
                    (check['type'], ', '.join(CHECKERS_MODULES.keys())))
            service_check = service_check_class(self, check, self.logger)
            self.service_checks.append(service_check)

    def init_stats(self, service):
        """Initialize service stat fetchers"""
        self.service_stats[:] = []
        for stat in service['stats']:
            stat.setdefault('host', service['host'])
            stat.setdefault('port', service['port'])
            stat.setdefault('path', "")
            service_stat_class = STATS_MODULES.get(stat['type'], None)
            if not service_stat_class:
                raise Exception(
                    'Invalid stat type "%s", valid types: %s' %
                    (stat['type'], ', '.join(STATS_MODULES.keys())))
            service_stat = service_stat_class(self, stat, self.logger)
            self.service_stats.append(service_stat)
Beispiel #40
0
class ServiceWatcher(object):
    def __init__(self, conf, service, **kwargs):
        self.conf = conf
        self.running = False

        for k in ['host', 'port', 'type']:
            if k not in service:
                raise Exception(
                    'Missing field "%s" in service configuration' % k)
        self.name = '%s|%s' % \
            (service['host'], service['port'])

        self.check_interval = float_value(conf.get('check_interval'), 1)
        self.service = service

        self.rise = int_value(conf.get('rise'), 1)
        self.fall = int_value(conf.get('fall'), 1)

        self.logger = get_logger(self.conf)
        self.cs = ConscienceClient(self.conf)
        self.client = Client(self.conf)
        self.last_status = False
        self.failed = False
        self.service_definition = {
            'ns': self.conf['namespace'],
            'type': self.service['type'],
            'addr': '%s:%s' % (self.service['host'], self.service['port']),
            'score': 0,
            'tags': {}}
        if self.service.get('location', None):
            self.service_definition['tags']['tag.loc'] = \
                    self.service['location']
        self.service_checks = list()
        self.service_stats = list()
        self.init_checkers(service)
        self.init_stats(service)

    def start(self):
        self.logger.info('watcher "%s" starting', self.name)
        self.running = True
        self.watch()

    def stop(self):
        self.logger.info('watcher "%s" stopping', self.name)
        self.running = False

    def check(self):
        status = True
        for service_check in self.service_checks:
            if not service_check.service_status():
                status = False

        if status != self.last_status:
            if status:
                self.logger.info('service "%s" is now up', self.name)
            else:
                self.logger.warn('service "%s" is now down', self.name)
            self.last_status = status

    def get_stats(self):
        """Update service definition with all configured stats"""
        for stat in self.service_stats:
            stats = stat.get_stats()
            self.logger.debug("Stat fetcher '%s' returned %s",
                              str(stat), str(stats))
            self.service_definition['tags'].update(stats)

    def register(self):
        # Use a boolean so we can easily convert it to a number in conscience
        self.service_definition['tags']['tag.up'] = self.last_status
        self.cs.register(
            self.service['type'], self.service_definition)

    def watch(self):
        try:
            while self.running:
                self.check()
                self.get_stats()
                self.register()
                sleep(self.check_interval)
        except Exception as e:
            self.logger.warn('ERROR in watcher "%s"', e)
            self.failed = True
            raise e
        finally:
            self.logger.info('watcher "%s" stopped', self.name)

    def init_checkers(self, service):
        for check in service['checks']:
            check['host'] = check.get('host') or service['host']
            check['port'] = check.get('port') or service['port']
            check['name'] = check.get('name') or "%s|%s|%s" % \
                (check['type'], check['host'], check['port'])
            check['rise'] = check.get('rise') or self.rise
            check['fall'] = check.get('fall') or self.fall

            check['type'] = check.get('type') or 'unknown'
            service_check_class = CHECKERS_MODULES.get(check['type'])
            if not service_check_class:
                raise Exception(
                    'Invalid check type "%s", valid types: %s' %
                    (check['type'], ', '.join(CHECKERS_MODULES.keys())))
            service_check = service_check_class(self, check, self.logger)
            self.service_checks.append(service_check)

    def init_stats(self, service):
        """Initialize service stat fetchers"""
        self.service_stats[:] = []
        for stat in service['stats']:
            stat.setdefault('host', service['host'])
            stat.setdefault('port', service['port'])
            stat.setdefault('path', "")
            service_stat_class = STATS_MODULES.get(stat['type'], None)
            if not service_stat_class:
                raise Exception(
                    'Invalid stat type "%s", valid types: %s' %
                    (stat['type'], ', '.join(STATS_MODULES.keys())))
            service_stat = service_stat_class(self, stat, self.logger)
            self.service_stats.append(service_stat)
Beispiel #41
0
class EventWorker(Worker):
    def init(self):
        eventlet.monkey_patch(os=False)
        self.tube = self.conf.get("tube", DEFAULT_TUBE)
        self.session = requests.Session()
        self.cs = ConscienceClient(self.conf)
        self.rdir = RdirClient(self.conf)
        self._acct_addr = None
        self.acct_update = 0
        self.graceful_timeout = 1
        self.acct_refresh_interval = int_value(
            self.conf.get('acct_refresh_interval'), 60)
        self.acct_update = true_value(self.conf.get('acct_update', True))
        self.rdir_update = true_value(self.conf.get('rdir_update', True))
        if 'handlers_conf' not in self.conf:
            raise ValueError("'handlers_conf' path not defined in conf")
        self.handlers = loadhandlers(self.conf.get('handlers_conf'),
                                     evt_types,
                                     app=self)
        super(EventWorker, self).init()

    def notify(self):
        """TODO"""
        pass

    def safe_decode_job(self, job_id, data):
        try:
            env = json.loads(data)
            env['job_id'] = job_id
            return env
        except Exception as e:
            self.logger.warn('decoding job "%s"', str(e.message))
            return None

    def run(self):
        coros = []
        queue_url = self.conf.get('queue_url', '127.0.0.1:11300')
        concurrency = int_value(self.conf.get('concurrency'), 10)

        server_gt = greenthread.getcurrent()

        for i in range(concurrency):
            beanstalk = Beanstalk.from_url(queue_url)
            gt = eventlet.spawn(self.handle, beanstalk)
            gt.link(_eventlet_stop, server_gt, beanstalk)
            coros.append(gt)
            beanstalk, gt = None, None

        while self.alive:
            self.notify()
            try:
                eventlet.sleep(1.0)
            except AssertionError:
                self.alive = False
                break

        self.notify()
        try:
            with Timeout(self.graceful_timeout) as t:
                [c.kill(StopServe()) for c in coros]
                [c.wait() for c in coros]
        except Timeout as te:
            if te != t:
                raise
            [c.kill() for c in coros]

    def handle(self, beanstalk):
        conn_error = False
        try:
            beanstalk.use(self.tube)
            beanstalk.watch(self.tube)
            while True:
                try:
                    job_id, data = beanstalk.reserve()
                    if conn_error:
                        self.logger.warn("beanstalk reconnected")
                        conn_error = False
                except ConnectionError:
                    if not conn_error:
                        self.logger.warn("beanstalk connection error")
                        conn_error = True
                    eventlet.sleep(BEANSTALK_RECONNECTION)
                    continue
                try:
                    event = self.safe_decode_job(job_id, data)
                    self.process_event(job_id, event, beanstalk)
                except ConnectionError:
                    self.logger.warn(
                        "beanstalk connection error during processing")
                except Exception:
                    beanstalk.bury(job_id)
                    self.logger.exception("handling event %s (bury)", job_id)
        except StopServe:
            pass

    def process_event(self, job_id, event, beanstalk):
        handler = self.get_handler(event)
        if not handler:
            self.logger.warn('no handler found for %r' % event)
            beanstalk.delete(job_id)
            return

        def cb(status, msg):
            if is_success(status):
                beanstalk.delete(job_id)
            elif is_error(status):
                self.logger.warn('bury event %r' % event)
                beanstalk.bury(job_id)
            else:
                self.logger.warn('release event %r' % event)
                beanstalk.release(job_id)

        handler(event, cb)

    def get_handler(self, event):
        return self.handlers.get(event.get('event'), None)

    @property
    def acct_addr(self):
        if not self._acct_addr or self.acct_refresh():
            try:
                acct_instance = self.cs.next_instance(ACCOUNT_SERVICE)
                self._acct_addr = acct_instance.get('addr')
                self.acct_update = time.time()
            except Exception:
                self.logger.warn('Unable to find account instance')
        return self._acct_addr

    def acct_refresh(self):
        return (time.time() - self.acct_update) > self.acct_refresh_interval
Beispiel #42
0
class RawxDecommissionTask(XcuteTask):
    def __init__(self, conf, job_params, logger=None):
        super(RawxDecommissionTask, self).__init__(conf,
                                                   job_params,
                                                   logger=logger)

        self.service_id = job_params['service_id']
        self.rawx_timeout = job_params['rawx_timeout']
        self.min_chunk_size = job_params['min_chunk_size']
        self.max_chunk_size = job_params['max_chunk_size']
        self.excluded_rawx = job_params['excluded_rawx']

        self.blob_client = BlobClient(self.conf, logger=self.logger)
        self.content_factory = ContentFactory(self.conf)
        self.conscience_client = ConscienceClient(self.conf,
                                                  logger=self.logger)

        self.fake_excluded_chunks = self._generate_fake_excluded_chunks(
            self.excluded_rawx)

    def _generate_fake_excluded_chunks(self, excluded_rawx):
        fake_excluded_chunks = list()
        fake_chunk_id = '0' * 64
        for service_id in excluded_rawx:
            service_addr = self.conscience_client.resolve_service_id(
                'rawx', service_id)
            chunk = dict()
            chunk['hash'] = '0000000000000000000000000000000000'
            chunk['pos'] = '0'
            chunk['size'] = 1
            chunk['score'] = 1
            chunk['url'] = 'http://{}/{}'.format(service_id, fake_chunk_id)
            chunk['real_url'] = 'http://{}/{}'.format(service_addr,
                                                      fake_chunk_id)
            fake_excluded_chunks.append(chunk)
        return fake_excluded_chunks

    def process(self, task_id, task_payload, reqid=None):
        container_id = task_payload['container_id']
        content_id = task_payload['content_id']
        chunk_id = task_payload['chunk_id']

        chunk_url = 'http://{}/{}'.format(self.service_id, chunk_id)
        try:
            meta = self.blob_client.chunk_head(chunk_url,
                                               timeout=self.rawx_timeout,
                                               reqid=reqid)
        except NotFound:
            # The chunk is still present in the rdir,
            # but the chunk no longer exists in the rawx.
            # We ignore it because there is nothing to move.
            return {'skipped_chunks_no_longer_exist': 1}
        if container_id != meta['container_id']:
            raise ValueError('Mismatch container ID: %s != %s', container_id,
                             meta['container_id'])
        if content_id != meta['content_id']:
            raise ValueError('Mismatch content ID: %s != %s', content_id,
                             meta['content_id'])
        chunk_size = int(meta['chunk_size'])

        # Maybe skip the chunk because it doesn't match the size constaint
        if chunk_size < self.min_chunk_size:
            self.logger.debug('[reqid=%s] SKIP %s too small', reqid, chunk_url)
            return {'skipped_chunks_too_small': 1}
        if self.max_chunk_size > 0 and chunk_size > self.max_chunk_size:
            self.logger.debug('[reqid=%s] SKIP %s too big', reqid, chunk_url)
            return {'skipped_chunks_too_big': 1}

        # Start moving the chunk
        try:
            content = self.content_factory.get(container_id,
                                               content_id,
                                               reqid=reqid)
            content.move_chunk(chunk_id,
                               fake_excluded_chunks=self.fake_excluded_chunks,
                               reqid=reqid)
        except (ContentNotFound, OrphanChunk):
            return {'orphan_chunks': 1}

        return {'moved_chunks': 1, 'moved_bytes': chunk_size}
Beispiel #43
0
def make_client(instance):
    client = ConscienceClient(
        instance.get_process_configuration()
    )
    return client