def _smart_link_rdir(self, volume_id, cs=None, all_rdir=None): """ Force the load balancer to avoid services that already host more bases than the average while selecting rdir services. """ if not cs: cs = ConscienceClient(self.conf) if not all_rdir: all_rdir = cs.all_services("rdir", True) avail_base_count = [x["tags"]["stat.opened_db_count"] for x in all_rdir if x["score"] > 0] mean = sum(avail_base_count) / float(len(avail_base_count)) avoids = [ _make_id(self.ns, "rdir", x["addr"]) for x in all_rdir if x["score"] > 0 and x["tags"]["stat.opened_db_count"] > mean ] known = [_make_id(self.ns, "rawx", volume_id)] try: polled = cs.poll("rdir", avoid=avoids, known=known)[0] except ClientException as exc: if exc.status != 481: raise # Retry without `avoids`, hoping the next iteration will rebalance polled = cs.poll("rdir", known=known)[0] forced = {"host": polled["addr"], "type": "rdir", "seq": 1, "args": "", "id": polled["id"]} self.directory.force(RDIR_ACCT, volume_id, "rdir", forced, autocreate=True) return polled["id"]
def __init__(self, conf, service, **kwargs): self.conf = conf self.running = False for k in ['host', 'port', 'type']: if k not in service: raise Exception( 'Missing field "%s" in service configuration' % k) self.name = '%s|%s' % \ (service['host'], service['port']) self.check_interval = float_value(conf.get('check_interval'), 1) self.service = service self.rise = int_value(conf.get('rise'), 1) self.fall = int_value(conf.get('fall'), 1) self.logger = get_logger(self.conf) self.cs = ConscienceClient(self.conf) self.client = Client(self.conf) self.last_status = False self.failed = False self.service_definition = { 'ns': self.conf['namespace'], 'type': self.service['type'], 'addr': '%s:%s' % (self.service['host'], self.service['port']), 'score': 0, 'tags': {}} if self.service.get('location', None): self.service_definition['tags']['tag.loc'] = \ self.service['location'] self.service_checks = list() self.service_stats = list() self.init_checkers(service) self.init_stats(service)
class Harasser(object): def __init__(self, ns, max_containers=256, max_contents=256): conf = {'namespace': ns} self.cs = ConscienceClient(conf) self.rdir = RdirClient(conf) self.rawx_list = [x['addr'] for x in self.cs.all_services('rawx')] self.sent = set() self.max_containers = max_containers self.max_contents = max_contents def harass_put(self, loops=None): if loops is None: loops = random.randint(1000, 2000) print "Pushing %d fake chunks" % loops loop = loops count_start_container = random.randrange(2**20) count_start_content = random.randrange(2**20) start = time.time() nb_rawx = len(self.rawx_list) while loop > 0: args = {'mtime': int(start)} # vol_id = random.choice(self.rawx_list) # container_id = "%064X" % (random.randrange(self.max_containers)) # content_id = "%032X" % (random.randrange(self.max_contents)) vol_id = self.rawx_list[loop % nb_rawx] container_id = "%064X" % (loop + count_start_container) content_id = "%032X" % (loop + count_start_content) chunk_id = "http://%s/%064X" \ % (vol_id, random.randrange(2**128)) self.rdir.chunk_push( vol_id, container_id, content_id, chunk_id, **args) self.sent.add((vol_id, container_id, content_id, chunk_id)) loop -= 1 end = time.time() print "%d pushed in %.3fs, %d req/s" \ % (loops, end-start, loops/(end-start)) def harass_del(self, min_loops=0): min_loops = min(min_loops, len(self.sent)) loops = random.randint(min_loops, len(self.sent)) print "Removing %d fake chunks" % loops loop = loops start = time.time() while loop > 0: args = self.sent.pop() self.rdir.chunk_delete(*args) loop -= 1 end = time.time() print "%d removed in %.3fs, %d req/s" \ % (loops, end-start, loops/(end-start)) def __call__(self): try: while True: self.harass_put() self.harass_del() except KeyboardInterrupt: print "Cleaning..." self.harass_del(len(self.sent))
def __init__(self, ns, max_containers=256, max_contents=256): conf = {'namespace': ns} self.cs = ConscienceClient(conf) self.rdir = RdirClient(conf) self.rawx_list = [x['addr'] for x in self.cs.all_services('rawx')] self.sent = set() self.max_containers = max_containers self.max_contents = max_contents
def assign_all_rawx(self): """ Find a rdir service for all rawx that don't have one already. """ cs = ConscienceClient(self.conf) all_rawx = cs.all_services("rawx") all_rdir = cs.all_services("rdir", True) by_id = {_make_id(self.ns, "rdir", x["addr"]): x for x in all_rdir} for rawx in all_rawx: try: # Verify that there is no rdir linked resp = self.directory.get(RDIR_ACCT, rawx["addr"], service_type="rdir") rawx["rdir"] = by_id[_make_id(self.ns, "rdir", self._lookup_rdir_host(resp))] except (NotFound, ClientException): rdir = self._smart_link_rdir(rawx["addr"], cs, all_rdir) n_bases = by_id[rdir]["tags"].get("stat.opened_db_count", 0) by_id[rdir]["tags"]["stat.opened_db_count"] = n_bases + 1 rawx["rdir"] = by_id[rdir] return all_rawx
def __init__(self, conf, name, context, **kwargs): self.conf = conf self.name = name verbose = kwargs.pop("verbose", False) self.logger = get_logger(self.conf, verbose=verbose) self.init_zmq(context) self.cs = ConscienceClient(self.conf) self.rdir = RdirClient(self.conf) self._acct_addr = None self.acct_update = 0 self.acct_refresh_interval = int_value(conf.get("acct_refresh_interval"), 60) self.acct_update = true_value(conf.get("acct_update", True)) self.rdir_update = true_value(conf.get("rdir_update", True)) self.session = requests.Session() self.failed = False
def init(self): eventlet.monkey_patch(os=False) self.session = requests.Session() self.cs = ConscienceClient(self.conf) self.rdir = RdirClient(self.conf) self._acct_addr = None self.acct_update = 0 self.graceful_timeout = 1 self.acct_refresh_interval = int_value( self.conf.get('acct_refresh_interval'), 60 ) self.concurrency = int_value(self.conf.get('concurrency'), 1000) self.acct_update = true_value(self.conf.get('acct_update', True)) self.rdir_update = true_value(self.conf.get('rdir_update', True)) super(EventWorker, self).init()
def test_rdir_linking(self): """ Tests that rdir services linked to rawx services are not on the same locations """ cs = ConscienceClient({'namespace': self.ns}) rawx_list = cs.all_services('rawx') rdir_dict = {x['addr']: x for x in cs.all_services('rdir')} # Link the services for rawx in rawx_list: self.api.link('_RDIR_TEST', rawx['addr'], 'rdir', autocreate=True) # Do the checks for rawx in rawx_list: linked_rdir = self.api.get( '_RDIR_TEST', rawx['addr'], service_type='rdir')['srv'] rdir = rdir_dict[linked_rdir[0]['host']] rawx_loc = rawx['tags'].get('tag.loc') rdir_loc = rdir['tags'].get('tag.loc') self.assertNotEqual(rawx_loc, rdir_loc) # Unlink the services for rawx in rawx_list: self.api.unlink('_RDIR_TEST', rawx['addr'], 'rdir') self.api.delete('_RDIR_TEST', rawx['addr'])
def init(self): eventlet.monkey_patch(os=False) self.tube = self.conf.get("tube", DEFAULT_TUBE) self.session = requests.Session() self.cs = ConscienceClient(self.conf) self.rdir = RdirClient(self.conf) self._acct_addr = None self.acct_update = 0 self.graceful_timeout = 1 self.acct_refresh_interval = int_value( self.conf.get('acct_refresh_interval'), 60 ) self.acct_update = true_value(self.conf.get('acct_update', True)) self.rdir_update = true_value(self.conf.get('rdir_update', True)) if 'handlers_conf' not in self.conf: raise ValueError("'handlers_conf' path not defined in conf") self.handlers = loadhandlers( self.conf.get('handlers_conf'), evt_types, app=self) super(EventWorker, self).init()
class AccountClient(Client): def __init__(self, conf, **kwargs): super(AccountClient, self).__init__(conf, **kwargs) self.cs = ConscienceClient(self.conf) # TODO keep account srv addr in local cache to avoid lookup requests def _get_account_addr(self): try: acct_instance = self.cs.next_instance('account') acct_addr = acct_instance.get('addr') except Exception: raise ClientException("No Account service found") return acct_addr def _make_uri(self, action): account_addr = self._get_account_addr() uri = 'http://%s/v1.0/account/%s' % (account_addr, action) return uri def _account_request(self, account, method, action, params={}): uri = self._make_uri(action) params['id'] = account resp, body = self._direct_request(method, uri, params=params) return resp, body def account_create(self, account): self._account_request(account, 'PUT', 'create') def account_delete(self, account): self._account_request(account, 'POST', 'delete') def containers_list(self, account, marker=None, limit=None): params = {} if marker is not None: params['marker'] = marker if limit is not None: params['limit'] = limit resp, body = self._account_request(account, 'GET', 'containers', params) return body
class EventWorker(object): def __init__(self, conf, name, context, **kwargs): self.conf = conf self.name = name verbose = kwargs.pop('verbose', False) self.logger = get_logger(self.conf, verbose=verbose) self.init_zmq(context) self.cs = ConscienceClient(self.conf) self.rdir = RdirClient(self.conf) self._acct_addr = None self.acct_update = 0 self.acct_refresh_interval = int_value( conf.get('acct_refresh_interval'), 60 ) self.acct_update = true_value( conf.get('acct_update', True)) self.session = requests.Session() self.failed = False def start(self): self.logger.info('worker "%s" starting', self.name) self.running = True self.run() def stop(self): self.logger.info('worker "%s" stopping', self.name) self.running = False def init_zmq(self, context): socket = context.socket(zmq.REP) socket.connect('inproc://event-front') self.socket = socket def safe_ack(self, msg): try: self.socket.send_multipart(msg) except Exception: self.logger.warn('Unable to ack event') def run(self): try: while self.running: msg = self.socket.recv_multipart() self.logger.debug("msg received: %s" % msg) event = decode_msg(msg) success = self.process_event(event) f = "0" if success else "" self.safe_ack([msg[0], f]) except Exception as e: self.logger.warn('ERROR in worker "%s"', e) self.failed = True raise e finally: self.logger.info('worker "%s" stopped', self.name) def process_event(self, event): handler = self.get_handler(event) if not handler: self.logger.warn("No handler found") # mark as success return True success = True try: handler(event) except Exception: success = False finally: return success def get_handler(self, event): event_type = event.get('event') if not event_type: return None if event_type == EventType.CONTAINER_PUT: return self.handle_container_put elif event_type == EventType.CONTAINER_DESTROY: return self.handle_container_destroy elif event_type == EventType.CONTAINER_UPDATE: return self.handle_container_update elif event_type == EventType.OBJECT_PUT: return self.handle_object_put elif event_type == EventType.OBJECT_DELETE: return self.handle_object_delete elif event_type == EventType.REFERENCE_UPDATE: return self.handle_reference_update elif event_type == EventType.CHUNK_PUT: return self.handle_chunk_put elif event_type == EventType.CHUNK_DELETE: return self.handle_chunk_delete elif event_type == EventType.PING: return self.handle_ping else: return None @property def acct_addr(self): if not self._acct_addr or self.acct_refresh(): try: acct_instance = self.cs.next_instance(ACCOUNT_SERVICE) self._acct_addr = acct_instance.get('addr') self.acct_update = time.time() except Exception: self.logger.warn('Unable to find account instance') return self._acct_addr def acct_refresh(self): return (time.time() - self.acct_update) > self.acct_refresh_interval def handle_container_put(self, event): """ Handle container creation. :param event: """ self.logger.debug('worker "%s" handle container put', self.name) if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr mtime = event.get('when') data = event.get('data') name = data.get('url').get('user') account = data.get('url').get('account') event = {'mtime': mtime, 'name': name} self.session.post(uri, params={'id': account}, data=json.dumps(event)) def handle_container_update(self, event): """ Handle container update. :param event: """ self.logger.debug('worker "%s" handle container update', self.name) if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr mtime = event.get('when') data = event.get('data') name = event.get('url').get('user') account = event.get('url').get('account') bytes_count = data.get('bytes-count', 0) object_count = data.get('object-count', 0) event = { 'mtime': mtime, 'name': name, 'bytes': bytes_count, 'objects': object_count } self.session.post(uri, params={'id': account}, data=json.dumps(event)) def handle_container_destroy(self, event): """ Handle container destroy. :param event: """ self.logger.debug('worker "%s" handle container destroy', self.name) if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr dtime = event.get('when') data = event.get('data') name = data.get('url').get('user') account = data.get('url').get('account') event = {'dtime': dtime, 'name': name} self.session.post(uri, params={'id': account}, data=json.dumps(event)) def handle_object_delete(self, event): """ Handle object deletion. Delete the chunks of the object. :param event: """ self.logger.debug('worker "%s" handle object delete', self.name) pile = GreenPile(PARALLEL_CHUNKS_DELETE) chunks = [] for item in event.get('data'): if item.get('type') == 'chunks': chunks.append(item) if not len(chunks): self.logger.warn('No chunks found in event data') return def delete_chunk(chunk): resp = None try: with Timeout(CHUNK_TIMEOUT): resp = self.session.delete(chunk['id']) except (Exception, Timeout) as e: self.logger.exception(e) return resp for chunk in chunks: pile.spawn(delete_chunk, chunk) resps = [resp for resp in pile if resp] for resp in resps: if resp.status_code == 204: self.logger.info('deleted chunk %s' % resp.url) else: self.logger.warn('failed to delete chunk %s' % resp.url) def handle_object_put(self, event): """ Handle object creation. TODO :param event: """ self.logger.debug('worker "%s" handle object put', self.name) def handle_reference_update(self, event): """ Handle reference update. TODO :param event """ self.logger.debug('worker "%s" handle reference update', self.name) def handle_chunk_put(self, event): """ Handle chunk creation. :param event """ self.logger.debug('worker "%s" handle chunk creation', self.name) when = event.get('when') data = event.get('data') volume_id = data.get('volume_id') del data['volume_id'] container_id = data.get('container_id') del data['container_id'] content_id = data.get('content_id') del data['content_id'] chunk_id = data.get('chunk_id') del data['chunk_id'] data['mtime'] = when self.rdir.chunk_push(volume_id, container_id, content_id, chunk_id, **data) def handle_chunk_delete(self, event): """ Handle chunk deletion. :param event """ self.logger.debug('worker "%s" handle chunk deletion', self.name) data = event.get('data') volume_id = data.get('volume_id') container_id = data.get('container_id') content_id = data.get('content_id') chunk_id = data.get('chunk_id') self.rdir.chunk_delete(volume_id, container_id, content_id, chunk_id) def handle_ping(self, event): """ Handle ping :param event """ self.logger.debug('worker "%s" handle ping', self.name)
class ContainerClient(ProxyClient): """ Intermediate level class to manage containers. """ def __init__(self, conf, refresh_rawx_scores_delay=30.0, **kwargs): super(ContainerClient, self).__init__(conf, request_prefix="/container", **kwargs) # to refresh the rawx scores from cache kwargs.pop('pool_manager', None) self.conscience_client = ConscienceClient( self.conf, pool_manager=self.pool_manager, **kwargs) self.rawx_scores = dict() self._refresh_rawx_scores_delay = refresh_rawx_scores_delay self._last_refresh_rawx_scores = 0.0 def _make_uri(self, target): """ Build URIs for request that don't use the same prefix as the one set in this class' constructor. """ uri = '%s://%s/v3.0/%s/%s' % (self.proxy_scheme, self.proxy_netloc, self.ns, target) return uri def _make_params(self, account=None, reference=None, path=None, cid=None, content=None, version=None, **kwargs): if cid: params = {'cid': cid} else: params = {'acct': account, 'ref': reference} if path: params.update({'path': path}) if content: params.update({'content': content}) if version: params.update({'version': version}) return params def _get_rawx_scores(self): rawx_services = self.conscience_client.all_services('rawx') rawx_scores = dict() for rawx_service in rawx_services: rawx_scores[rawx_service['id']] = \ rawx_service['score'] return rawx_scores def _refresh_rawx_scores(self, now=None, **kwargs): """Refresh rawx service scores.""" self.rawx_scores = self._get_rawx_scores() if not now: now = time.time() self._last_refresh_rawx_scores = now def _maybe_refresh_rawx_scores(self, **kwargs): """Refresh rawx service scores if delay has been reached.""" if self._refresh_rawx_scores_delay >= 0.0 or not self.rawx_scores: now = time.time() if now - self._last_refresh_rawx_scores \ > self._refresh_rawx_scores_delay: try: self._refresh_rawx_scores(now, **kwargs) except OioNetworkException as exc: self.logger.warn( "Failed to refresh rawx service scores: %s", exc) except Exception: self.logger.exception( "Failed to refresh rawx service scores") def container_create(self, account, reference, properties=None, system=None, **kwargs): """ Create a container. :param account: account in which to create the container :type account: `str` :param reference: name of the container :type reference: `str` :param properties: properties to set on the container :type properties: `dict` :param system: system properties to set on the container :type system: `dict` :keyword headers: extra headers to send to the proxy :type headers: `dict` :returns: True if the container has been created, False if it already exists """ params = self._make_params(account, reference) data = json.dumps({ 'properties': properties or {}, 'system': system or {} }) resp, body = self._request('POST', '/create', params=params, data=data, **kwargs) if resp.status not in (204, 201): raise exceptions.from_response(resp, body) return resp.status == 201 def container_create_many(self, account, containers, properties=None, **kwargs): """ Create several containers. :param account: account in which to create the containers :type account: `str` :param containers: names of the containers :type containers: iterable of `str` :param properties: properties to set on the containers :type properties: `dict` :keyword headers: extra headers to send to the proxy :type headers: `dict` :returns: a list of tuples with the name of the container and a boolean telling if the container has been created :rtype: `list` of `tuple` """ results = list() try: params = self._make_params(account) unformatted_data = list() for container in containers: unformatted_data.append({ 'name': container, 'properties': properties or {}, 'system': kwargs.get('system', {}) }) data = json.dumps({"containers": unformatted_data}) resp, body = self._request('POST', '/create_many', params=params, data=data, **kwargs) if resp.status not in (204, 200): raise exceptions.from_response(resp, body) for container in body["containers"]: results.append((container["name"], container["status"] == 201)) return results except exceptions.TooLarge: # Batch too large for the proxy pivot = len(containers) // 2 head = containers[:pivot] tail = containers[pivot:] if head: results += self.container_create_many(account, head, properties=properties, **kwargs) if tail: results += self.container_create_many(account, tail, properties=properties, **kwargs) return results except exceptions.NotFound: # Batches not supported by the proxy for container in containers: try: rc = self.container_create(account, container, properties=properties, **kwargs) results.append((container, rc)) except Exception: results.append((container, False)) return results def container_delete(self, account=None, reference=None, cid=None, **kwargs): """ Delete a container. :param account: account from which to delete the container :type account: `str` :param reference: name of the container :type reference: `str` :param cid: container id that can be used instead of account and reference :type cid: `str` :keyword headers: extra headers to send to the proxy :type headers: `dict` """ params = self._make_params(account, reference, cid=cid) del_cached_container_metadata(account=account, reference=reference, cid=cid, **kwargs) try: self._request('POST', '/destroy', params=params, **kwargs) except exceptions.Conflict as exc: raise exceptions.ContainerNotEmpty(exc) def container_show(self, account=None, reference=None, cid=None, **kwargs): """ Get information about a container (like user properties). :param account: account in which the container is :type account: `str` :param reference: name of the container :type reference: `str` :param cid: container id that can be used instead of account and reference :type cid: `str` :keyword headers: extra headers to send to the proxy :type headers: `dict` :returns: a `dict` with "properties" containing a `dict` of user properties. :deprecated: use `container_get_properties` instead """ params = self._make_params(account, reference, cid=cid) _resp, body = self._request('GET', '/show', params=params, **kwargs) return body def container_snapshot(self, account=None, reference=None, dst_account=None, dst_reference=None, cid=None, **kwargs): """ Create a snapshot of a the container. This function duplicates only the database. It doesn't duplicate the chunks of the contents. :param account: account in which the container is :type account: `str` :param reference: name of the container :type reference: `str` :param cid: container id that can be used instead of account and reference :type cid: `str` :param dst_account: account in which the snapshot will be created :type dst_account: `str` :param dst_reference: name of the snapshot :type dst_reference: `str` """ params = self._make_params(account, reference, cid=cid) data = json.dumps({"account": dst_account, "container": dst_reference}) resp, _ = self._request('POST', '/snapshot', params=params, data=data, **kwargs) return resp def container_enable(self, account=None, reference=None, cid=None, **kwargs): """ Change the status of a container database to enable :param account: account in which the container is :type account: `str` :param reference: name of the container :type reference: `str` :param cid: container id that can be used instead of account and reference """ uri = self._make_uri('admin/enable') params = self._make_params(account, reference, cid=cid) params.update({"type": "meta2"}) del_cached_container_metadata(account=account, reference=reference, cid=cid, **kwargs) resp, _ = self._direct_request('POST', uri, params=params, **kwargs) return resp def container_freeze(self, account=None, reference=None, cid=None, **kwargs): """ Freeze the database of a container :param account: account in which the container is :type account: `str` :param reference: name of the container :type reference: name of the container :param cid: container id that can be used instead of account and reference """ uri = self._make_uri('admin/freeze') params = self._make_params(account, reference, cid=cid) params.update({"type": "meta2"}) del_cached_container_metadata(account=account, reference=reference, cid=cid, **kwargs) resp, _ = self._direct_request('POST', uri, params=params, **kwargs) return resp @extract_reference_params def container_get_properties(self, account=None, reference=None, properties=None, cid=None, params=None, **kwargs): """ Get information about a container (user and system properties). :param account: account in which the container is :type account: `str` :param reference: name of the container :type reference: `str` :param cid: container id that can be used instead of account and reference :type cid: `str` :keyword headers: extra headers to send to the proxy :type headers: `dict` :returns: a `dict` with "properties" and "system" entries, containing respectively a `dict` of user properties and a `dict` of system properties. """ container_meta = get_cached_container_metadata(account=account, reference=reference, cid=cid, **kwargs) if container_meta is not None: return container_meta if not properties: properties = list() data = json.dumps(properties) _resp, container_meta = self._request('POST', '/get_properties', data=data, params=params, **kwargs) set_cached_container_metadata(container_meta, account=account, reference=reference, cid=cid, **kwargs) return container_meta def container_set_properties(self, account=None, reference=None, properties=None, clear=False, cid=None, system=None, **kwargs): params = self._make_params(account, reference, cid=cid) if clear: params["flush"] = 1 data = json.dumps({ 'properties': properties or {}, 'system': system or {} }) del_cached_container_metadata(account=account, reference=reference, cid=cid, **kwargs) _resp, body = self._request('POST', '/set_properties', data=data, params=params, **kwargs) return body def container_del_properties(self, account=None, reference=None, properties=[], cid=None, **kwargs): params = self._make_params(account, reference, cid=cid) data = json.dumps(properties) del_cached_container_metadata(account=account, reference=reference, cid=cid, **kwargs) _resp, body = self._request('POST', '/del_properties', data=data, params=params, **kwargs) return body def container_touch(self, account=None, reference=None, cid=None, recompute=False, **kwargs): params = self._make_params(account, reference, cid=cid) if recompute: params['recompute'] = True self._request('POST', '/touch', params=params, **kwargs) def container_dedup(self, account=None, reference=None, cid=None, **kwargs): params = self._make_params(account, reference, cid=cid) self._request('POST', '/dedup', params=params, **kwargs) def container_purge(self, account=None, reference=None, cid=None, maxvers=None, **kwargs): params = self._make_params(account, reference, cid=cid) if maxvers is not None: params["maxvers"] = maxvers self._request('POST', '/purge', params=params, **kwargs) def container_raw_insert(self, bean, account=None, reference=None, cid=None, **kwargs): params = self._make_params(account, reference, cid=cid) data = json.dumps((bean, )) if kwargs.pop("frozen", None): params["frozen"] = 1 self._request('POST', '/raw_insert', data=data, params=params, **kwargs) def container_raw_update(self, old, new, account=None, reference=None, cid=None, **kwargs): params = self._make_params(account, reference, cid=cid) data = json.dumps({"old": old, "new": new}) if kwargs.pop("frozen", None): params["frozen"] = 1 self._request('POST', '/raw_update', data=data, params=params, **kwargs) def container_raw_delete(self, account=None, reference=None, data=None, cid=None, **kwargs): """ Delete raw 'beans' from a container. :param data: dictionaries representing the beans to delete. They must have a key for each column of the meta2 database, plus a 'type' telling which type of bean it is. :type data: `list` of `dict` items """ params = self._make_params(account, reference, cid=cid) data = json.dumps(data) self._request('POST', '/raw_delete', data=data, params=params, **kwargs) def container_flush(self, account=None, reference=None, cid=None, **kwargs): params = self._make_params(account, reference, cid=cid) resp, _ = self._request('POST', '/flush', params=params, **kwargs) return { 'truncated': boolean_value(resp.getheader('x-oio-truncated'), False) } @extract_reference_params def content_list(self, account=None, reference=None, limit=None, marker=None, end_marker=None, prefix=None, delimiter=None, properties=False, cid=None, versions=False, deleted=False, params=None, **kwargs): """ Get the list of contents of a container. :returns: a tuple with container metadata `dict` as first element and a `dict` with "object" and "prefixes" as second element """ p_up = { 'max': limit, 'marker': marker, 'end_marker': end_marker, 'prefix': prefix, 'delimiter': delimiter, 'properties': properties } params.update(p_up) # As of 4.0.0.a3, to make it false, the 'all' parameter must be absent if versions: params['all'] = '1' if deleted: params['deleted'] = 1 if kwargs.get('local'): params['local'] = 1 resp, body = self._request('GET', '/list', params=params, **kwargs) return resp.headers, body @ensure_headers def content_create(self, account=None, reference=None, path=None, size=None, checksum=None, data=None, cid=None, content_id=None, stgpol=None, version=None, mime_type=None, chunk_method=None, headers=None, append=False, change_policy=False, force=False, **kwargs): """ Create a new object. This method does not upload any data, it just registers object metadata in the database. :param size: size of the object :type size: `int` :param checksum: checksum of the object (may be None when appending) :type checksum: hexadecimal `str` :param data: metadata of the object (list of chunks and dict of properties) :type data: `dict` :param cid: container id that can be used in place of `account` and `reference` :type cid: hexadecimal `str` :param content_id: the ID to set on the object, or the ID of the existing object when appending :param stgpol: name of the storage policy for the object :param version: version of the object :type version: `int` :param mime_type: MIME type to set on the object :param chunk_method: :param headers: extra headers to send to the proxy :param append: append to an existing object instead of creating it :type append: `bool` :param change_policy: change policy of an existing object :type change_policy: `bool` """ uri = self._make_uri('content/create') params = self._make_params(account, reference, path, cid=cid) if append: params['append'] = '1' if change_policy: params['change_policy'] = '1' # TODO(FVE): implement 'force' parameter if not isinstance(data, dict): warnings.simplefilter('once') warnings.warn("'data' parameter should be a dict, not a list", DeprecationWarning, stacklevel=3) if kwargs.get('meta_pos') is not None: data = data['chunks'] # TODO(FVE): change "id" into "content", and other occurrences params['id'] = content_id uri = self._make_uri('content/update') data = json.dumps(data) hdrs = { 'x-oio-content-meta-length': str(size), 'x-oio-content-meta-hash': checksum } hdrs.update(headers) if content_id is not None: hdrs['x-oio-content-meta-id'] = content_id if stgpol is not None: hdrs['x-oio-content-meta-policy'] = stgpol if version is not None: hdrs['x-oio-content-meta-version'] = str(version) if mime_type is not None: hdrs['x-oio-content-meta-mime-type'] = mime_type if chunk_method is not None: hdrs['x-oio-content-meta-chunk-method'] = chunk_method del_cached_object_metadata(account=account, reference=reference, path=path, cid=cid, version=version, **kwargs) resp, body = self._direct_request('POST', uri, data=data, params=params, headers=hdrs, **kwargs) return resp, body def content_drain(self, account=None, reference=None, path=None, cid=None, version=None, **kwargs): uri = self._make_uri('content/drain') params = self._make_params(account, reference, path, cid=cid, version=version) del_cached_object_metadata(account=account, reference=reference, path=path, cid=cid, version=version, **kwargs) resp, _ = self._direct_request('POST', uri, params=params, **kwargs) return resp.status == 204 def content_delete(self, account=None, reference=None, path=None, cid=None, version=None, **kwargs): """ Delete one object. :returns: True if the object has been deleted """ uri = self._make_uri('content/delete') params = self._make_params(account, reference, path, cid=cid, version=version) del_cached_object_metadata(account=account, reference=reference, path=path, cid=cid, version=version, **kwargs) resp, _ = self._direct_request('POST', uri, params=params, **kwargs) return resp.status == 204 def content_delete_many(self, account=None, reference=None, paths=None, cid=None, **kwargs): """ Delete several objects. :param paths: an iterable of object paths (should not be a generator) :returns: a list of tuples with the path of the content and a boolean telling if the content has been deleted :rtype: `list` of `tuple` """ uri = self._make_uri('content/delete_many') params = self._make_params(account, reference, cid=cid) unformatted_data = list() for obj in paths: unformatted_data.append({'name': obj}) data = json.dumps({"contents": unformatted_data}) results = list() for path in paths: del_cached_object_metadata(account=account, reference=reference, path=path, cid=cid, **kwargs) try: _, resp_body = self._direct_request('POST', uri, data=data, params=params, **kwargs) for obj in resp_body["contents"]: results.append((obj["name"], obj["status"] == 204)) return results except exceptions.NotFound: for obj in paths: rc = self.content_delete(account, reference, obj, cid=cid, **kwargs) results.append((obj, rc)) return results except exceptions.TooLarge: pivot = len(paths) // 2 head = paths[:pivot] tail = paths[pivot:] if head: results += self.content_delete_many(account, reference, head, cid=cid, **kwargs) if tail: results += self.content_delete_many(account, reference, tail, cid=cid, **kwargs) return results except Exception: raise @extract_reference_params def content_locate(self, account=None, reference=None, path=None, cid=None, content=None, version=None, properties=True, params=None, **kwargs): """ Get a description of the content along with the list of its chunks. :param cid: container id that can be used in place of `account` and `reference` :type cid: hexadecimal `str` :param content: content id that can be used in place of `path` :type content: hexadecimal `str` :param properties: should the request return object properties along with content description :type properties: `bool` :returns: a tuple with content metadata `dict` as first element and chunk `list` as second element """ content_meta, chunks = get_cached_object_metadata( account=account, reference=reference, path=path, cid=cid, version=version, properties=properties, **kwargs) if content_meta is not None and chunks is not None: # Refresh asynchronously so as not to slow down the current request eventlet.spawn_n(self._maybe_refresh_rawx_scores, **kwargs) for chunk in chunks: chunk['score'] = self.rawx_scores.get( chunk['url'].split('/')[2], 0) return content_meta, chunks uri = self._make_uri('content/locate') params['properties'] = properties try: resp, chunks = self._direct_request('GET', uri, params=params, **kwargs) content_meta = extract_content_headers_meta(resp.headers) except exceptions.OioNetworkException as exc: # TODO(FVE): this special behavior can be removed when # the 'content/locate' protocol is changed to include # object properties in the response body instead of headers. if properties and 'got more than ' in str(exc): params['properties'] = False _resp, chunks = self._direct_request('GET', uri, params=params, **kwargs) content_meta = self.content_get_properties(account, reference, path, cid=cid, content=content, version=version, **kwargs) else: raise set_cached_object_metadata(content_meta, chunks, account=account, reference=reference, path=path, cid=cid, version=version, properties=properties, **kwargs) return content_meta, chunks @extract_reference_params def content_prepare(self, account=None, reference=None, path=None, position=0, size=None, cid=None, stgpol=None, content_id=None, version=None, params=None, **kwargs): """ Prepare an upload: get URLs of chunks on available rawx. :param position: position a the metachunk that must be prepared :param stgpol: name of the storage policy of the object being uploaded :param version: version of the object being uploaded. This is required only on the second and later calls to this method to get coherent results. :keyword autocreate: create container if it doesn't exist """ uri = self._make_uri('content/prepare') data = {'size': size, 'position': position} if stgpol: data['policy'] = stgpol data = json.dumps(data) try: resp, body = self._direct_request('POST', uri + '2', data=data, params=params, **kwargs) chunks = body['chunks'] obj_meta = extract_content_headers_meta(resp.headers) obj_meta['properties'] = dict() # pylint: disable=no-member obj_meta['properties'].update(body.get('properties', {})) except exceptions.NotFound: # Proxy does not support v2 request (oio < 4.3) resp, chunks = self._direct_request('POST', uri, data=data, params=params, **kwargs) obj_meta = extract_content_headers_meta(resp.headers) return obj_meta, chunks @extract_reference_params def content_get_properties(self, account=None, reference=None, path=None, properties=None, cid=None, content=None, version=None, params=None, **kwargs): """ Get a description of the content along with its user properties. """ obj_meta, _ = get_cached_object_metadata(account=account, reference=reference, path=path, cid=cid, version=version, properties=True, **kwargs) if obj_meta is not None: return obj_meta uri = self._make_uri('content/get_properties') data = json.dumps(properties) if properties else None resp, body = self._direct_request('POST', uri, data=data, params=params, **kwargs) obj_meta = extract_content_headers_meta(resp.headers) obj_meta.update(body) set_cached_object_metadata(obj_meta, None, account=account, reference=reference, path=path, cid=cid, version=version, properties=True, **kwargs) return obj_meta def content_set_properties(self, account=None, reference=None, path=None, properties={}, cid=None, version=None, clear=False, **kwargs): """ Set properties on an object. :param properties: dictionary of properties """ uri = self._make_uri('content/set_properties') params = self._make_params(account, reference, path, cid=cid, version=version) if clear: params['flush'] = 1 data = json.dumps(properties) del_cached_object_metadata(account=account, reference=reference, path=path, cid=cid, version=version, **kwargs) _resp, _body = self._direct_request('POST', uri, data=data, params=params, **kwargs) def content_del_properties(self, account=None, reference=None, path=None, properties=[], cid=None, version=None, **kwargs): """ Delete some properties from an object. :param properties: list of property keys to delete :type properties: `list` :returns: True is the property has been deleted """ uri = self._make_uri('content/del_properties') params = self._make_params(account, reference, path, cid=cid, version=version) # Build a list in case the parameter is a view (not serializable). data = json.dumps([x for x in properties]) del_cached_object_metadata(account=account, reference=reference, path=path, cid=cid, version=version, **kwargs) resp, _body = self._direct_request('POST', uri, data=data, params=params, **kwargs) return resp.status == 204 def content_touch(self, account=None, reference=None, path=None, cid=None, version=None, **kwargs): uri = self._make_uri('content/touch') params = self._make_params(account, reference, path, cid=cid, version=version) self._direct_request('POST', uri, params=params, **kwargs) @extract_reference_params def content_spare(self, account=None, reference=None, path=None, version=None, data=None, cid=None, stgpol=None, position=None, params=None, **kwargs): uri = self._make_uri('content/spare') if None in (stgpol, position): raise ValueError('stgpol and position cannot be None') params['stgpol'] = stgpol params['position'] = position data = json.dumps(data) _resp, body = self._direct_request('POST', uri, data=data, params=params, **kwargs) return body def content_truncate(self, account=None, reference=None, path=None, cid=None, version=None, size=0, **kwargs): uri = self._make_uri('content/truncate') params = self._make_params(account, reference, path, cid=cid, version=version) params['size'] = size del_cached_object_metadata(account=account, reference=reference, path=path, cid=cid, version=version, **kwargs) _resp, body = self._direct_request('POST', uri, params=params, **kwargs) return body def content_purge(self, account=None, reference=None, path=None, cid=None, maxvers=None, **kwargs): uri = self._make_uri('content/purge') params = self._make_params(account, reference, path, cid=cid) if maxvers is not None: params["maxvers"] = maxvers del_cached_object_metadata(account=account, reference=reference, path=path, cid=cid, **kwargs) self._direct_request('POST', uri, params=params, **kwargs)
def conscience(self): if not self._conscience: self._conscience = ConscienceClient(self.conf) return self._conscience
class ContentFactory(object): def __init__(self, conf): self.conf = conf self.logger = get_logger(conf) self.cs_client = ConscienceClient(conf) self.container_client = ContainerClient(conf) self.ns_info = self.cs_client.info() def _extract_datasec(self, stgpol_name): try: stgpol = self.ns_info["storage_policy"][stgpol_name] except KeyError: self.logger.error("Storage policy '%s' not found" % stgpol_name) raise InconsistentContent("Storage policy not found") stgclass_name, datasec_name, datatreat_name = stgpol.split(':') if datasec_name == 'NONE': return "DUP", {"nb_copy": "1", "distance": "0"} try: datasec = self.ns_info["data_security"][datasec_name] except KeyError: self.logger.error("Data security '%s' not found" % datasec_name) raise InconsistentContent("Data security not found") ds_type, ds_args = datasec.split(':') args = {} for arg in ds_args.split('|'): key, value = arg.split('=') args[key] = value return ds_type, args def get(self, container_id, content_id): try: meta, chunks = self.container_client.content_show( cid=container_id, content=content_id) except NotFound: raise ContentNotFound("Content %s/%s not found" % (container_id, content_id)) pol_type, pol_args = self._extract_datasec(meta['policy']) if pol_type == "DUP": return DupContent(self.conf, container_id, meta, chunks, pol_args) elif pol_type == "RAIN": return RainContent(self.conf, container_id, meta, chunks, pol_args) raise InconsistentContent("Unknown storage policy") def new(self, container_id, path, size, policy): meta, chunks = self.container_client.content_prepare( cid=container_id, path=path, size=size, stgpol=policy) pol_type, pol_args = self._extract_datasec(meta['policy']) if pol_type == "DUP": return DupContent(self.conf, container_id, meta, chunks, pol_args) elif pol_type == "RAIN": return RainContent(self.conf, container_id, meta, chunks, pol_args) raise InconsistentContent("Unknown storage policy") def change_policy(self, container_id, content_id, new_policy): old_content = self.get(container_id, content_id) if old_content.stgpol_name == new_policy: return old_content new_content = self.new(container_id, old_content.path, old_content.length, new_policy) stream = old_content.download() new_content.upload(GeneratorIO(stream)) # the old content is automatically deleted because the new content has # the same name (but not the same id) return new_content
class AccountClient(HttpApi): """Simple client API for the account service.""" def __init__(self, conf, endpoint=None, proxy_endpoint=None, refresh_delay=3600.0, logger=None, **kwargs): """ Initialize a client for the account service. :param conf: dictionary with at least the namespace name :type conf: `dict` :param endpoint: URL of an account service :param proxy_endpoint: URL of the proxy :param refresh_interval: time between refreshes of the account service endpoint (if not provided at instantiation) :type refresh_interval: `float` seconds """ super(AccountClient, self).__init__(endpoint=endpoint, **kwargs) self.logger = logger or get_logger(conf) self.cs = ConscienceClient(conf, endpoint=proxy_endpoint, logger=self.logger, **kwargs) self._refresh_delay = refresh_delay if not self.endpoint else -1.0 self._last_refresh = 0.0 def _get_account_addr(self): """Fetch IP and port of an account service from Conscience.""" try: acct_instance = self.cs.next_instance('account') acct_addr = acct_instance.get('addr') except Exception: raise ClientException("No Account service found") return acct_addr def _refresh_endpoint(self, now=None): """Refresh account service endpoint.""" addr = self._get_account_addr() self.endpoint = '/'.join(("http:/", addr, "v1.0/account")) if not now: now = time.time() self._last_refresh = now def _maybe_refresh_endpoint(self): """Refresh account service endpoint if delay has been reached.""" if self._refresh_delay >= 0.0 or not self.endpoint: now = time.time() if now - self._last_refresh > self._refresh_delay: try: self._refresh_endpoint(now) except ClientException: if not self.endpoint: # Cannot use the previous one raise self.logger.exception("Failed to refresh account endpoint") def account_request(self, account, method, action, params=None, **kwargs): """Make a request to the account service.""" self._maybe_refresh_endpoint() if not params: params = dict() if account: params['id'] = quote(account) try: resp, body = self._request(method, action, params=params, **kwargs) except OioNetworkException as exc: exc_info = sys.exc_info() if self._refresh_delay >= 0.0: self.logger.info("Refreshing account endpoint after error %s", exc) try: self._refresh_endpoint() except Exception as exc: self.logger.warn("%s", exc) raise exc_info[0], exc_info[1], exc_info[2] return resp, body def account_create(self, account, **kwargs): """ Create an account. :param account: name of the account to create :type account: `str` :returns: `True` if the account has been created """ resp, _body = self.account_request(account, 'PUT', 'create', **kwargs) return resp.status == 201 def account_delete(self, account, **kwargs): """ Delete an account. :param account: name of the account to delete :type account: `str` """ self.account_request(account, 'POST', 'delete', **kwargs) def account_list(self, **kwargs): """ List accounts. """ _resp, body = self.account_request(None, 'GET', 'list', **kwargs) return body def account_show(self, account, **kwargs): """ Get information about an account. """ _resp, body = self.account_request(account, 'GET', 'show', **kwargs) return body # FIXME: document this def account_update(self, account, metadata, to_delete, **kwargs): data = json.dumps({"metadata": metadata, "to_delete": to_delete}) self.account_request(account, 'POST', 'update', data=data, **kwargs) def container_list(self, account, limit=None, marker=None, end_marker=None, prefix=None, delimiter=None, **kwargs): """ Get the list of containers of an account. :param account: account from which to get the container list :type account: `str` :keyword limit: maximum number of results to return :type limit: `int` :keyword marker: name of the container from where to start the listing :type marker: `str` :keyword end_marker: :keyword prefix: :keyword delimiter: """ params = { "id": account, "limit": limit, "marker": marker, "end_marker": end_marker, "prefix": prefix, "delimiter": delimiter } _resp, body = self.account_request(account, 'GET', 'containers', params=params, **kwargs) return body def container_update(self, account, container, metadata=None, **kwargs): """ Update account with container-related metadata. :param account: name of the account to update :type account: `str` :param container: name of the container whose metadata has changed :type container: `str` :param metadata: container metadata ("bytes", "objects", "mtime", "dtime") :type metadata: `dict` """ metadata['name'] = container _resp, body = self.account_request(account, 'POST', 'container/update', data=json.dumps(metadata)) return body def container_reset(self, account, container, mtime, **kwargs): """ Reset container of an account :param account: name of the account :type account: `str` :param container: name of the container to reset :type container: `str` :param mtime: time of the modification """ metadata = dict() metadata["name"] = container metadata["mtime"] = mtime self.account_request(account, 'POST', 'container/reset', data=json.dumps(metadata)) def account_refresh(self, account, **kwargs): """ Refresh counters of an account :param account: name of the account to refresh :type account: `str` """ self.account_request(account, 'POST', 'refresh') def account_flush(self, account, **kwargs): """ Flush all containers of an account :param account: name of the account to flush :type account: `str` """ self.account_request(account, 'POST', 'flush')
class XcuteClient(HttpApi): """Simple client API for the xcute service.""" def __init__(self, conf, endpoint=None, proxy_endpoint=None, refresh_delay=3600.0, logger=None, **kwargs): """ Initialize a client for the xcute service. :param conf: dictionary with at least the namespace name :type conf: `dict` :param endpoint: URL of an xcute service :param proxy_endpoint: URL of the proxy :param refresh_interval: time between refreshes of the xcute service endpoint (if not provided at instantiation) :type refresh_interval: `float` seconds """ super(XcuteClient, self).__init__(endpoint=endpoint, service_type='xcute-service', **kwargs) self.conf = conf self.logger = logger or get_logger(self.conf) self.conscience = ConscienceClient(conf, endpoint=proxy_endpoint, logger=self.logger, **kwargs) self._refresh_delay = refresh_delay if not self.endpoint else -1.0 self._last_refresh = 0.0 def _get_xcute_addr(self, **kwargs): """Fetch IP and port of an xcute service from Conscience.""" acct_instance = self.conscience.next_instance('xcute', **kwargs) acct_addr = acct_instance.get('addr') return acct_addr def _refresh_endpoint(self, now=None, **kwargs): """Refresh xcute service endpoint.""" addr = self._get_xcute_addr(**kwargs) self.endpoint = '/'.join(("http:/", addr, "v1.0/xcute")) if not now: now = time.time() self._last_refresh = now def _maybe_refresh_endpoint(self, **kwargs): """Refresh xcute service endpoint if delay has been reached.""" if self._refresh_delay >= 0.0 or not self.endpoint: now = time.time() if now - self._last_refresh > self._refresh_delay: try: self._refresh_endpoint(now, **kwargs) except OioNetworkException as exc: if not self.endpoint: # Cannot use the previous one raise self.logger.warn("Failed to refresh xcute endpoint: %s", exc) except OioException: if not self.endpoint: # Cannot use the previous one raise self.logger.exception("Failed to refresh xcute endpoint") def xcute_request(self, method, action, params=None, **kwargs): """Make a request to the xcute service.""" self._maybe_refresh_endpoint(**kwargs) if not params: params = dict() try: resp, body = self._request(method, action, params=params, **kwargs) except OioNetworkException as exc: exc_info = sys.exc_info() if self._refresh_delay >= 0.0: self.logger.info("Refreshing xcute endpoint after error %s", exc) try: self._refresh_endpoint(**kwargs) except Exception as exc: self.logger.warn("%s", exc) reraise(exc_info[0], exc_info[1], exc_info[2]) return resp, body def job_list(self, limit=None, prefix=None, marker=None, job_status=None, job_type=None, job_lock=None): _, data = self.xcute_request('GET', '/job/list', params={ 'limit': limit, 'prefix': prefix, 'marker': marker, 'status': job_status, 'type': job_type, 'lock': job_lock }) return data def job_create(self, job_type, job_config=None): _, data = self.xcute_request('POST', '/job/create', params={'type': job_type}, json=job_config) return data def job_show(self, job_id): _, data = self.xcute_request('GET', '/job/show', params={'id': job_id}) return data def job_pause(self, job_id): _, data = self.xcute_request('POST', '/job/pause', params={'id': job_id}) return data def job_resume(self, job_id): _, data = self.xcute_request('POST', '/job/resume', params={'id': job_id}) return data def job_update(self, job_id, job_config=None): _, data = self.xcute_request('POST', '/job/update', params={'id': job_id}, json=job_config) return data def job_delete(self, job_id): self.xcute_request('DELETE', '/job/delete', params={'id': job_id}) def lock_list(self): _, data = self.xcute_request('GET', '/lock/list') return data def lock_show(self, lock): _, data = self.xcute_request('GET', '/lock/show', params={'lock': lock}) return data
def cluster(self): if not self._cluster: from oio.conscience.client import ConscienceClient self._cluster = ConscienceClient(self.conf) return self._cluster
class Harasser(object): def __init__(self, ns, max_containers=256, max_contents=256): conf = {'namespace': ns} self.cs = ConscienceClient(conf) self.rdir = RdirClient(conf) self.rawx_list = [x['addr'] for x in self.cs.all_services('rawx')] self.sent = set() self.max_containers = max_containers self.max_contents = max_contents self.pushed_count = 0 self.pushed_time = 0 self.removed_count = 0 self.removed_time = 0 def harass_put(self, loops=None): if loops is None: loops = random.randint(1000, 2000) print "Pushing %d fake chunks" % loops loop = loops count_start_container = random.randrange(2**20) count_start_content = random.randrange(2**20) start = time.time() nb_rawx = len(self.rawx_list) while loop > 0: args = {'mtime': int(start)} # vol_id = random.choice(self.rawx_list) # container_id = "%064X" % (random.randrange(self.max_containers)) # content_id = "%032X" % (random.randrange(self.max_contents)) vol_id = self.rawx_list[loop % nb_rawx] container_id = "%064X" % (loop + count_start_container) content_id = "%032X" % (loop + count_start_content) chunk_id = "http://%s/%064X" \ % (vol_id, random.randrange(2**128)) self.rdir.chunk_push(vol_id, container_id, content_id, chunk_id, **args) self.sent.add((vol_id, container_id, content_id, chunk_id)) loop -= 1 end = time.time() self.pushed_count += loops self.pushed_time += end - start print "%d pushed in %.3fs, %d req/s" \ % (loops, end-start, loops/(end-start)) def harass_del(self, min_loops=0): min_loops = min(min_loops, len(self.sent)) loops = random.randint(min_loops, len(self.sent)) print "Removing %d fake chunks" % loops loop = loops start = time.time() while loop > 0: args = self.sent.pop() self.rdir.chunk_delete(*args) loop -= 1 end = time.time() self.removed_count += loops self.removed_time += end - start print "%d removed in %.3fs, %d req/s" \ % (loops, end-start, loops/(end-start)) def __call__(self): try: while True: self.harass_put() self.harass_del() except KeyboardInterrupt: print "Cleaning..." self.harass_del(len(self.sent)) print "Stats:" print "Pushed %d in %.3fs, %d req/s" % ( self.pushed_count, self.pushed_time, self.pushed_count / self.pushed_time) print "Removed %d in %.3fs, %d req/s" % ( self.removed_count, self.removed_time, self.removed_count / self.removed_time)
def conscience(self): if self._conscience_client is None: from oio.conscience.client import ConscienceClient self._conscience_client = ConscienceClient( self.client_conf, pool_manager=self.pool_manager) return self._conscience_client
class EventWorker(object): def __init__(self, conf, name, context, **kwargs): self.conf = conf self.name = name verbose = kwargs.pop('verbose', False) self.logger = get_logger(self.conf, verbose=verbose) self.init_zmq(context) self.cs = ConscienceClient(self.conf) self.rdir = RdirClient(self.conf) self._acct_addr = None self.acct_update = 0 self.acct_refresh_interval = int_value( conf.get('acct_refresh_interval'), 60) self.acct_update = true_value(conf.get('acct_update', True)) self.rdir_update = true_value(conf.get('rdir_update', True)) self.session = requests.Session() self.failed = False def start(self): self.logger.info('worker "%s" starting', self.name) self.running = True self.run() def stop(self): self.logger.info('worker "%s" stopping', self.name) self.running = False def init_zmq(self, context): socket = context.socket(zmq.REP) socket.connect('inproc://event-front') self.socket = socket def safe_ack(self, msg): try: self.socket.send_multipart(msg) except Exception: self.logger.warn('Unable to ack event') def run(self): try: while self.running: msg = self.socket.recv_multipart() self.logger.debug("msg received: %s" % msg) event = decode_msg(msg) success = self.process_event(event) f = "0" if success else "" self.safe_ack([msg[0], f]) except Exception as e: self.logger.warn('ERROR in worker "%s"', e) self.failed = True raise e finally: self.logger.info('worker "%s" stopped', self.name) def process_event(self, event): handler = self.get_handler(event) if not handler: self.logger.warn("No handler found") # mark as success return True success = True try: handler(event) except Exception: success = False finally: return success def get_handler(self, event): event_type = event.get('event') if not event_type: return None if event_type == EventType.CONTAINER_PUT: return self.handle_container_put elif event_type == EventType.CONTAINER_DESTROY: return self.handle_container_destroy elif event_type == EventType.CONTAINER_UPDATE: return self.handle_container_update elif event_type == EventType.OBJECT_PUT: return self.handle_object_put elif event_type == EventType.OBJECT_DELETE: return self.handle_object_delete elif event_type == EventType.REFERENCE_UPDATE: return self.handle_reference_update elif event_type == EventType.CHUNK_PUT: return self.handle_chunk_put elif event_type == EventType.CHUNK_DELETE: return self.handle_chunk_delete elif event_type == EventType.PING: return self.handle_ping else: return None @property def acct_addr(self): if not self._acct_addr or self.acct_refresh(): try: acct_instance = self.cs.next_instance(ACCOUNT_SERVICE) self._acct_addr = acct_instance.get('addr') self.acct_update = time.time() except Exception: self.logger.warn('Unable to find account instance') return self._acct_addr def acct_refresh(self): return (time.time() - self.acct_update) > self.acct_refresh_interval def handle_container_put(self, event): """ Handle container creation. :param event: """ self.logger.debug('worker "%s" handle container put', self.name) if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr mtime = event.get('when') data = event.get('data') name = data.get('url').get('user') account = data.get('url').get('account') event = {'mtime': mtime, 'name': name} self.session.post(uri, params={'id': account}, data=json.dumps(event)) def handle_container_update(self, event): """ Handle container update. :param event: """ self.logger.debug('worker "%s" handle container update', self.name) if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr mtime = event.get('when') data = event.get('data') name = event.get('url').get('user') account = event.get('url').get('account') bytes_count = data.get('bytes-count', 0) object_count = data.get('object-count', 0) event = { 'mtime': mtime, 'name': name, 'bytes': bytes_count, 'objects': object_count } self.session.post(uri, params={'id': account}, data=json.dumps(event)) def handle_container_destroy(self, event): """ Handle container destroy. :param event: """ self.logger.debug('worker "%s" handle container destroy', self.name) if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr dtime = event.get('when') data = event.get('data') name = data.get('url').get('user') account = data.get('url').get('account') event = {'dtime': dtime, 'name': name} self.session.post(uri, params={'id': account}, data=json.dumps(event)) def handle_object_delete(self, event): """ Handle object deletion. Delete the chunks of the object. :param event: """ self.logger.debug('worker "%s" handle object delete', self.name) pile = GreenPile(PARALLEL_CHUNKS_DELETE) chunks = [] for item in event.get('data'): if item.get('type') == 'chunks': chunks.append(item) if not len(chunks): self.logger.warn('No chunks found in event data') return def delete_chunk(chunk): resp = None try: with Timeout(CHUNK_TIMEOUT): resp = self.session.delete(chunk['id']) except (Exception, Timeout) as e: self.logger.exception(e) return resp for chunk in chunks: pile.spawn(delete_chunk, chunk) resps = [resp for resp in pile if resp] for resp in resps: if resp.status_code == 204: self.logger.info('deleted chunk %s' % resp.url) else: self.logger.warn('failed to delete chunk %s' % resp.url) def handle_object_put(self, event): """ Handle object creation. TODO :param event: """ self.logger.debug('worker "%s" handle object put', self.name) def handle_reference_update(self, event): """ Handle reference update. TODO :param event """ self.logger.debug('worker "%s" handle reference update', self.name) def handle_chunk_put(self, event): """ Handle chunk creation. :param event """ if not self.rdir_update: self.logger.debug('worker "%s" skip chunk creation', self.name) return self.logger.debug('worker "%s" handle chunk creation', self.name) when = event.get('when') data = event.get('data') volume_id = data.get('volume_id') del data['volume_id'] container_id = data.get('container_id') del data['container_id'] content_id = data.get('content_id') del data['content_id'] chunk_id = data.get('chunk_id') del data['chunk_id'] data['mtime'] = when self.rdir.chunk_push(volume_id, container_id, content_id, chunk_id, **data) def handle_chunk_delete(self, event): """ Handle chunk deletion. :param event """ if not self.rdir_update: self.logger.debug('worker "%s" skip chunk deletion', self.name) return self.logger.debug('worker "%s" handle chunk deletion', self.name) data = event.get('data') volume_id = data.get('volume_id') container_id = data.get('container_id') content_id = data.get('content_id') chunk_id = data.get('chunk_id') self.rdir.chunk_delete(volume_id, container_id, content_id, chunk_id) def handle_ping(self, event): """ Handle ping :param event """ self.logger.debug('worker "%s" handle ping', self.name)
class EventWorker(Worker): def __init__(self, *args, **kwargs): super(EventWorker, self).__init__(*args, **kwargs) self.app_env = dict() def init(self): eventlet.monkey_patch(os=False) self.tube = self.conf.get("tube", DEFAULT_TUBE) self.cs = ConscienceClient(self.conf, logger=self.logger) self.rdir = RdirClient(self.conf, logger=self.logger) self._acct_addr = None self.acct_update = 0 self.graceful_timeout = 1 self.acct_refresh_interval = int_value( self.conf.get('acct_refresh_interval'), 60) self.acct_update = true_value(self.conf.get('acct_update', True)) self.rdir_update = true_value(self.conf.get('rdir_update', True)) self.app_env['acct_addr'] = self.acct_addr if 'handlers_conf' not in self.conf: raise ValueError("'handlers_conf' path not defined in conf") self.handlers = loadhandlers(self.conf.get('handlers_conf'), global_conf=self.conf, app=self) super(EventWorker, self).init() def notify(self): """TODO""" pass def safe_decode_job(self, job_id, data): try: env = json.loads(data) env['job_id'] = job_id return env except Exception as exc: self.logger.warn('Failed to decode job %s: "%s"', job_id, str(exc.message)) return None def run(self): coros = [] queue_url = self.conf.get('queue_url', '127.0.0.1:11300') concurrency = int_value(self.conf.get('concurrency'), 10) server_gt = greenthread.getcurrent() for i in range(concurrency): beanstalk = Beanstalk.from_url(queue_url) gt = eventlet.spawn(self.handle, beanstalk) gt.link(_eventlet_stop, server_gt, beanstalk) coros.append(gt) beanstalk, gt = None, None while self.alive: self.notify() try: eventlet.sleep(1.0) except AssertionError: self.alive = False break self.notify() try: with Timeout(self.graceful_timeout) as t: [c.kill(StopServe()) for c in coros] [c.wait() for c in coros] except Timeout as te: if te != t: raise [c.kill() for c in coros] def handle(self, beanstalk): conn_error = False try: if self.tube: beanstalk.use(self.tube) beanstalk.watch(self.tube) while True: try: job_id, data = beanstalk.reserve() if conn_error: self.logger.warn("beanstalk reconnected") conn_error = False except ConnectionError: if not conn_error: self.logger.warn("beanstalk connection error") conn_error = True eventlet.sleep(BEANSTALK_RECONNECTION) continue event = self.safe_decode_job(job_id, data) try: self.process_event(job_id, event, beanstalk) except (ClientException, OioNetworkException) as exc: self.logger.warn("Burying event %s (%s): %s", job_id, event.get('event'), exc) beanstalk.bury(job_id) except ExplicitBury: self.logger.info("Burying event %s (%s)", job_id, event.get('event')) beanstalk.bury(job_id) except Exception: self.logger.exception("Burying event %s: %s", job_id, event) beanstalk.bury(job_id) except StopServe: pass def process_event(self, job_id, event, beanstalk): handler = self.get_handler(event) if not handler: self.logger.warn('no handler found for %r' % event) beanstalk.delete(job_id) return def cb(status, msg): if is_success(status): beanstalk.delete(job_id) elif is_error(status): self.logger.warn( 'event %s handling failure (release with delay): %s', event['job_id'], msg) beanstalk.release(job_id, delay=RELEASE_DELAY) handler(event, cb) def get_handler(self, event): return self.handlers.get(event.get('event'), None) def acct_addr(self): if not self._acct_addr or self.acct_refresh(): acct_instance = self.cs.next_instance(ACCOUNT_SERVICE) self._acct_addr = acct_instance.get('addr') self.acct_update = time.time() return self._acct_addr def acct_refresh(self): return (time.time() - self.acct_update) > self.acct_refresh_interval
def cs(self): if not self._cs: self._cs = ConscienceClient(self.conf, logger=self.logger) return self._cs
def __init__(self, conf): self.conf = conf self.logger = get_logger(conf) self.cs_client = ConscienceClient(conf) self.container_client = ContainerClient(conf) self.ns_info = self.cs_client.info()
class XcuteOrchestrator(object): DEFAULT_DISPATCHER_TIMEOUT = 2 DEFAULT_REFRESH_TIME_BEANSTALKD_WORKERS = 30 DEFAULT_MAX_JOBS_PER_BEANSTALKD = 1024 def __init__(self, conf, logger=None): self.conf = conf self.logger = logger or get_logger(self.conf) self.backend = XcuteBackend(self.conf, logger=self.logger) self.conscience_client = ConscienceClient(self.conf) self.orchestrator_id = self.conf.get('orchestrator_id') if not self.orchestrator_id: raise ValueError('Missing orchestrator ID') self.logger.info('Using orchestrator ID: %s', self.orchestrator_id) self.beanstalkd_workers_tube = self.conf.get('beanstalkd_workers_tube') if not self.beanstalkd_workers_tube: raise ValueError('Missing beanstalkd workers tube') self.logger.info('Using beanstalkd workers tube: %s', self.beanstalkd_workers_tube) self.beanstalkd_reply_addr = self.conf.get('beanstalkd_reply_addr') if not self.beanstalkd_reply_addr: raise ValueError('Missing beanstalkd reply address') self.beanstalkd_reply_tube = self.conf.get( 'beanstalkd_reply_tube', self.beanstalkd_workers_tube + '.reply') self.logger.info('Using beanstalkd reply : %s %s', self.beanstalkd_reply_addr, self.beanstalkd_reply_tube) self.refresh_time_beanstalkd_workers = int_value( self.conf.get('refresh_time_beanstalkd_workers'), self.DEFAULT_REFRESH_TIME_BEANSTALKD_WORKERS) self.max_jobs_per_beanstalkd = int_value( self.conf.get('max_jobs_per_beanstalkd'), self.DEFAULT_MAX_JOBS_PER_BEANSTALKD) self.running = True self.beanstalkd_workers = dict() self.refresh_beanstalkd_workers_thread = None self.listen_beanstalkd_reply_thread = None self.dispatch_tasks_threads = dict() self.compute_total_tasks_threads = dict() def handle_backend_errors(self, func, *args, **kwargs): while True: try: return func(*args, **kwargs), None except (RedisConnectionError, RedisTimeoutError) as exc: self.logger.warn('Fail to communicate with redis: %s', exc) if not self.running: return None, exc sleep(1) def safe_run_forever(self): try: self.run_forever() except Exception as exc: self.logger.exception('Fail to run forever: %s', exc) self.exit_gracefully() if self.refresh_beanstalkd_workers_thread: self.refresh_beanstalkd_workers_thread.join() if self.listen_beanstalkd_reply_thread: self.listen_beanstalkd_reply_thread.join() for dispatch_tasks_thread in self.dispatch_tasks_threads.values(): dispatch_tasks_thread.join() for compute_total_tasks_thread \ in self.compute_total_tasks_threads.values(): compute_total_tasks_thread.join() self.logger.info('Exited running thread') def run_forever(self): """ Take jobs from the queue and spawn threads to dispatch them """ # gather beanstalkd info self.refresh_beanstalkd_workers_thread = threading.Thread( target=self.refresh_beanstalkd_workers_forever) self.refresh_beanstalkd_workers_thread.start() # start processing replies self.listen_beanstalkd_reply_thread = threading.Thread( target=self.listen_beanstalkd_reply_forever) self.listen_beanstalkd_reply_thread.start() if not self.running: return # restart running jobs self.logger.debug('Look for unfinished jobs') orchestrator_jobs, exc = self.handle_backend_errors( self.backend.list_orchestrator_jobs, self.orchestrator_id) if exc is not None: self.logger.warn( 'Unable to list running jobs for this orchestrator: %s', exc) return for job_info in orchestrator_jobs: if not self.running: return self.safe_handle_running_job(job_info) # run next jobs while self.running: sleep(1) job_info, exc = self.handle_backend_errors(self.backend.run_next, self.orchestrator_id) if exc is not None: self.logger.warn('Unable to run next job: %s', exc) return if not job_info: continue self.safe_handle_running_job(job_info) def safe_handle_running_job(self, job_info): try: job_id = job_info['job']['id'] job_type = job_info['job']['type'] self.logger.info('Run job %s: %s', job_id, job_type) self.handle_running_job(job_id, job_type, job_info) except Exception as exc: self.logger.exception('Failed to run job %s: %s', job_id, exc) _, exc = self.handle_backend_errors(self.backend.fail, job_id) if exc is not None: self.logger.warn( '[job_id=%s] Job has not been updated ' 'with the failure: %s', job_id, exc) def handle_running_job(self, job_id, job_type, job_info): """ First launch the computation of total number of tasks, then launch the dispatchnig of all tasks across the platform. """ if job_info['tasks']['all_sent']: self.logger.info('[job_id=%s] All tasks are already sent', job_id) return job_class = JOB_TYPES[job_type] job = job_class(self.conf, logger=self.logger) if job_info['tasks']['total'] == 0 \ and job_info['tasks']['is_total_temp'] \ and job_info['tasks']['sent'] == 0 \ and not job_info['tasks']['all_sent']: job.prepare(job_info['config']['params']) if job_id in self.compute_total_tasks_threads: self.logger.info( '[job_id=%s] Already computing the total number of tasks', job_id) elif job_info['tasks']['is_total_temp']: compute_total_tasks_thread = threading.Thread( target=self.safe_compute_total_tasks, args=(job_id, job_type, job_info, job)) compute_total_tasks_thread.start() self.compute_total_tasks_threads[job_id] = \ compute_total_tasks_thread else: self.logger.info( '[job_id=%s] The total number of tasks is already computed', job_id) if job_id in self.dispatch_tasks_threads: self.logger.warning('[job_id=%s] Already dispatching the tasks', job_id) else: dispatch_tasks_thread = threading.Thread( target=self.safe_dispatch_tasks, args=(job_id, job_type, job_info, job)) dispatch_tasks_thread.start() self.dispatch_tasks_threads[job_id] = dispatch_tasks_thread def safe_dispatch_tasks(self, job_id, job_type, job_info, job): """ Dispatch all tasks across the platform and update the backend. """ try: self.logger.info('[job_id=%s] Start to dispatch tasks', job_id) self.dispatch_tasks(job_id, job_type, job_info, job) self.logger.info('[job_id=%s] Finish to dispatch tasks', job_id) except Exception as exc: self.logger.exception('[job_id=%s] Fail to dispatch tasks: %s', job_id, exc) _, exc = self.handle_backend_errors(self.backend.fail, job_id) if exc is not None: self.logger.warn( '[job_id=%s] Job has not been updated ' 'with the failure: %s', job_id, exc) finally: del self.dispatch_tasks_threads[job_id] def adapt_speed(self, job_id, job_config, last_check, period=300): """ Pause and/or reduce the rate of creation of new tasks in case the number of pending tasks is too high. """ if last_check is not None \ and time.time() < last_check['last'] + period: return last_check waiting_time = 0 while True: for _ in range(waiting_time): if not self.running: break sleep(1) if not self.running: return last_check job_info, exc = self.handle_backend_errors( self.backend.get_job_info, job_id) if exc is not None: self.logger.warning( '[job_id=%s] Unable to retrieve job info ' 'and adapt the speed: %s', job_id, exc) return last_check if job_info['job']['status'] != XcuteJobStatus.RUNNING \ or job_info['job']['request_pause']: return last_check job_mtime = job_info['job']['mtime'] max_tasks_per_second = job_info['config']['tasks_per_second'] max_tasks_batch_size = job_info['config']['tasks_batch_size'] tasks_processed = job_info['tasks']['processed'] pending_tasks = job_info['tasks']['sent'] - tasks_processed if last_check is None: # Initialize last_check = dict() last_check['last'] = job_mtime last_check['processed'] = tasks_processed if pending_tasks / max_tasks_per_second >= period: waiting_time = period self.logger.error( '[job_id=%s] Too many pending tasks ' 'for the next %d seconds: %d (%d tasks/second); ' 'wait %d seconds and check again', job_id, period, pending_tasks, max_tasks_per_second, waiting_time) continue return last_check tasks_processed_in_period = tasks_processed \ - last_check['processed'] if tasks_processed_in_period == 0: last_check['last'] = job_mtime last_check['processed'] = tasks_processed waiting_time = period self.logger.error( '[job_id=%s] No task processed for the last %d seconds; ' 'wait %d seconds and check again', job_id, period, waiting_time) continue elapsed = job_mtime - last_check['last'] actual_tasks_per_second = tasks_processed_in_period \ / float(elapsed) if pending_tasks / actual_tasks_per_second >= period: last_check['last'] = job_mtime last_check['processed'] = tasks_processed waiting_time = period self.logger.error( '[job_id=%s] Too many pending tasks ' 'for the next %d seconds: %d (%f tasks/second) ; ' 'wait %d seconds and check again', job_id, period, pending_tasks, actual_tasks_per_second, waiting_time) continue current_tasks_per_second = job_config['tasks_per_second'] current_tasks_batch_size = job_config['tasks_batch_size'] diff_tasks_per_second = \ current_tasks_per_second - actual_tasks_per_second new_tasks_per_second = None if diff_tasks_per_second < -0.5: # Too fast to process tasks # The queues need to have a few tasks in advance. # Continue at this speed to allow the queues to empty. if actual_tasks_per_second > max_tasks_per_second: self.logger.warning( '[job_id=%s] Speeding: %f tasks/second (max: %d)', job_id, actual_tasks_per_second, max_tasks_per_second) else: self.logger.info( '[job_id=%s] Speeding: %f tasks/second ' '(adapted max: %d)', job_id, actual_tasks_per_second, current_tasks_per_second) elif diff_tasks_per_second <= 0.5: # Good speed to process tasks if current_tasks_per_second < max_tasks_per_second: new_tasks_per_second = current_tasks_per_second + 1 self.logger.info( '[job_id=%s] Slowly climb up to maximum speed', job_id) # else: # Tout marche bien navette ! else: # Too slow to process tasks new_tasks_per_second = int(math.floor(actual_tasks_per_second)) self.logger.warning( '[job_id=%s] The task processing speed is too slow: ' '%f tasks/second', job_id, actual_tasks_per_second) last_check['last'] = job_mtime last_check['processed'] = tasks_processed if new_tasks_per_second is not None: new_tasks_per_second = max(new_tasks_per_second, 1) new_tasks_batch_size = min(max_tasks_batch_size, new_tasks_per_second) job_config['tasks_per_second'] = new_tasks_per_second job_config['tasks_batch_size'] = new_tasks_batch_size self.logger.info( '[job_id=%s] Adapt the speed: %d -> %d tasks/second ' '(%d -> %d tasks/batch)', job_id, current_tasks_per_second, new_tasks_per_second, current_tasks_batch_size, new_tasks_batch_size) return last_check def dispatch_tasks(self, job_id, job_type, job_info, job): job_config = job_info['config'] job_params = job_config['params'] last_task_id = job_info['tasks']['last_sent'] job_tasks = job.get_tasks(job_params, marker=last_task_id) beanstalkd_workers = self.get_beanstalkd_workers() last_check = self.adapt_speed(job_id, job_config, None) tasks_per_second = job_config['tasks_per_second'] tasks_batch_size = job_config['tasks_batch_size'] batch_per_second = tasks_per_second / float(tasks_batch_size) tasks_run_time = 0 # The backend must have the tasks in order # to know the last task sent tasks = OrderedDict() for task_id, task_payload in job_tasks: if not self.running: break tasks[task_id] = task_payload if len(tasks) < tasks_batch_size: continue tasks_run_time = ratelimit(tasks_run_time, batch_per_second) # Make sure that the sent tasks will be saved # before being processed exc = None sent = False while not sent: (job_status, old_last_sent), exc = self.handle_backend_errors( self.backend.update_tasks_sent, job_id, tasks.keys()) if exc is not None: self.logger.warn( '[job_id=%s] Job could not update ' 'the sent tasks: %s', job_id, exc) break sent = self.dispatch_tasks_batch(beanstalkd_workers, job_id, job_type, job_config, tasks) if not sent: self.logger.warn( '[job_id=%s] Job aborting the last sent tasks', job_id) job_status, exc = self.handle_backend_errors( self.backend.abort_tasks_sent, job_id, tasks.keys(), old_last_sent) if exc is not None: self.logger.warn( '[job_id=%s] Job could not abort ' 'the last sent tasks: %s', job_id, exc) break if job_status == XcuteJobStatus.PAUSED: self.logger.info('Job %s is paused', job_id) return if not self.running: break sleep(1) if exc is not None and not self.running: break tasks.clear() # After each tasks batch sent, adapt the sending speed # according to the processing speed. last_check = self.adapt_speed(job_id, job_config, last_check) tasks_per_second = job_config['tasks_per_second'] tasks_batch_size = job_config['tasks_batch_size'] batch_per_second = tasks_per_second / float(tasks_batch_size) else: # Make sure that the sent tasks will be saved # before being processed sent = False while not sent: (job_status, old_last_sent), exc = self.handle_backend_errors( self.backend.update_tasks_sent, job_id, tasks.keys(), all_tasks_sent=True) if exc is not None: self.logger.warn( '[job_id=%s] Job could not update ' 'the sent tasks: %s', job_id, exc) break if tasks: sent = self.dispatch_tasks_batch(beanstalkd_workers, job_id, job_type, job_config, tasks) else: sent = True if not sent: self.logger.warn( '[job_id=%s] Job aborting the last sent tasks', job_id) job_status, exc = self.handle_backend_errors( self.backend.abort_tasks_sent, job_id, tasks.keys(), old_last_sent) if exc is not None: self.logger.warn( '[job_id=%s] Job could not abort ' 'the last sent tasks: %s', job_id, exc) break else: if job_status == XcuteJobStatus.FINISHED: self.logger.info('Job %s is finished', job_id) self.logger.info('Finished dispatching job (job_id=%s)', job_id) return if job_status == XcuteJobStatus.PAUSED: self.logger.info('Job %s is paused', job_id) return if not self.running: break sleep(1) self.logger.warn('[job_id=%s] Job was stopped before it was finished', job_id) _, exc = self.handle_backend_errors(self.backend.free, job_id) if exc is not None: self.logger.warn('[job_id=%s] Job has not been freed: %s', job_id, exc) def dispatch_tasks_batch(self, beanstalkd_workers, job_id, job_type, job_config, tasks): """ Try sending a task until it's ok """ beanstalkd_payload = self.make_beanstalkd_payload( job_id, job_type, job_config, tasks) if len(beanstalkd_payload) > 2**16: raise ValueError('Task payload is too big (length=%s)' % len(beanstalkd_payload)) # max 2 minutes per task ttr = len(tasks) * DEFAULT_TTR i = 0 for beanstalkd_worker in beanstalkd_workers: if not self.running: return False i += 1 if beanstalkd_worker is None: # Try for at least 30 seconds if i > 30: break continue try: beanstalkd_worker.put(beanstalkd_payload, ttr=ttr) self.logger.debug('[job_id=%s] Tasks sent to %s: %s', job_id, beanstalkd_worker.addr, str(tasks)) return True except Exception as exc: self.logger.warn('[job_id=%s] Fail to send beanstalkd job: %s', job_id, exc) # TODO(adu): We could be more lenient # and wait for a few errors in a row # to happen before marking it as broken. beanstalkd_worker.is_broken = True sleep(1) return False def make_beanstalkd_payload(self, job_id, job_type, job_config, tasks): return json.dumps({ 'event': EventTypes.XCUTE_TASKS, 'data': { 'job_id': job_id, 'job_type': job_type, 'job_config': job_config, 'tasks': tasks, 'beanstalkd_reply': { 'addr': self.beanstalkd_reply_addr, 'tube': self.beanstalkd_reply_tube, }, } }) def safe_compute_total_tasks(self, job_id, job_type, job_info, job): """ Compute the total number of tasks and update the backend. """ try: self.logger.info( '[job_id=%s] Start to compute the total number of tasks', job_id) self.compute_total_tasks(job_id, job_type, job_info, job) self.logger.info( '[job_id=%s] Finish to compute the total number of tasks', job_id) except Exception as exc: self.logger.exception( '[job_id=%s] Fail to compute the total number of tasks: %s', job_id, exc) finally: del self.compute_total_tasks_threads[job_id] def compute_total_tasks(self, job_id, job_type, job_info, job): job_params = job_info['config']['params'] total_marker = job_info['tasks']['total_marker'] tasks_counter = job.get_total_tasks(job_params, marker=total_marker) for total_marker, tasks_incr in tasks_counter: stop, exc = self.handle_backend_errors( self.backend.incr_total_tasks, job_id, total_marker, tasks_incr) if exc is not None: self.logger.warn( '[job_id=%s] Job has not been updated ' 'with total tasks: %s', job_id, exc) return if stop or not self.running: return total_tasks, exc = self.handle_backend_errors( self.backend.total_tasks_done, job_id) if exc is not None: self.logger.warn( '[job_id=%s] Job has not been updated ' 'with last total tasks: %s', job_id, exc) return self.logger.info('[job_id=%s] %s estimated tasks', job_id, total_tasks) def listen_beanstalkd_reply_forever(self): """ Process this orchestrator's job replies """ self.logger.info('Connecting to the reply beanstalkd') while self.running: try: listener = BeanstalkdListener(addr=self.beanstalkd_reply_addr, tube=self.beanstalkd_reply_tube, logger=self.logger) break except ConnectionError: self.logger.error('Failed to connect to the reply beanstalkd') sleep(5) self.logger.info('Listening to replies on %s (tube=%s)', self.beanstalkd_reply_addr, self.beanstalkd_reply_tube) # keep the job results in memory while self.running: connection_error = self.listen_loop(listener) # in case of a beanstalkd connection error # sleep to avoid spamming if connection_error: sleep(2) self.logger.info('Exited listening thread') def listen_loop(self, listener): """ One iteration of the listening loop """ connection_error = False try: replies = listener.fetch_job( self.process_reply, timeout=self.DEFAULT_DISPATCHER_TIMEOUT) # to force the execution of process_reply # if there were no replies, consider it as a connection error connection_error = len(list(replies)) == 0 except OioTimeout: pass return connection_error def process_reply(self, beanstalkd_job_id, encoded_reply): reply = json.loads(encoded_reply) job_id = reply['job_id'] task_ids = reply['task_ids'] task_results = reply['task_results'] task_errors = reply['task_errors'] self.logger.debug('Tasks processed (job_id=%s): %s', job_id, task_ids) try: finished, exc = self.handle_backend_errors( self.backend.update_tasks_processed, job_id, task_ids, task_errors, task_results) if exc is None: if finished: self.logger.info('Job %s is finished', job_id) else: self.logger.warn( '[job_id=%s] Job has not been updated ' 'with the processed tasks: %s', job_id, exc) except Exception: self.logger.exception('Error processing reply') yield None def refresh_beanstalkd_workers_forever(self): """ Refresh beanstalkd workers by looking at the score, existing tubes and tube statistics. """ while self.running: try: beanstalkd_workers = self._find_beanstalkd_workers() except Exception as exc: self.logger.error('Fail to find beanstalkd workers: %s', exc) # TODO(adu): We could keep trying to send jobs # to the beanstalkd we already found. # But we need the score to know how to dispatch the tasks... beanstalkd_workers = dict() old_beanstalkd_workers_addr = set(self.beanstalkd_workers.keys()) new_beanstalkd_workers_addr = set(beanstalkd_workers.keys()) added_beanstalkds = new_beanstalkd_workers_addr \ - old_beanstalkd_workers_addr for beanstalkd_addr in added_beanstalkds: self.logger.info('Add beanstalkd %s' % beanstalkd_addr) beanstalkd = beanstalkd_workers[beanstalkd_addr] beanstalkd.use(self.beanstalkd_workers_tube) removed_beanstalkds = old_beanstalkd_workers_addr \ - new_beanstalkd_workers_addr for beanstalkd_addr in removed_beanstalkds: self.logger.info('Remove beanstalkd %s' % beanstalkd_addr) self.logger.info('Refresh beanstalkd workers') self.beanstalkd_workers = beanstalkd_workers for _ in range(self.refresh_time_beanstalkd_workers): if not self.running: break sleep(1) self.logger.info('Exited beanstalkd workers thread') def _find_beanstalkd_workers(self): """ Find beanstalkd workers by looking at the score, existing tubes and tube statistics. """ all_beanstalkd = self.conscience_client.all_services('beanstalkd') beanstalkd_workers = dict() for beanstalkd_info in all_beanstalkd: try: beanstalkd = self._check_beanstalkd_worker(beanstalkd_info) if not beanstalkd: continue beanstalkd_workers[beanstalkd.addr] = beanstalkd except Exception as exc: self.logger.error('Fail to check beanstalkd: %s', exc) return beanstalkd_workers def _check_beanstalkd_worker(self, beanstalkd_info): """ Check beanstalkd worker by looking at the score, existing tubes and tube statistics. """ beanstalkd_addr = 'beanstalk://' + beanstalkd_info['addr'] beanstalkd_score = beanstalkd_info['score'] if beanstalkd_score == 0: self.logger.debug('Ignore beanstalkd %s: score=0', beanstalkd_addr) return None beanstalkd = self.beanstalkd_workers.get(beanstalkd_addr) if not beanstalkd: beanstalkd = Beanstalk.from_url(beanstalkd_addr) beanstalkd.addr = beanstalkd_addr beanstalkd_tubes = beanstalkd.tubes() if self.beanstalkd_workers_tube not in beanstalkd_tubes: self.logger.debug( 'Ignore beanstalkd %s: ' 'No worker has ever listened to the tube %s', beanstalkd_addr, self.beanstalkd_workers_tube) return None current_stats = beanstalkd.stats_tube(self.beanstalkd_workers_tube) beanstalkd_jobs_ready = current_stats['current-jobs-ready'] if beanstalkd_jobs_ready > 0: beanstalkd_jobs_reserved = current_stats['current-jobs-reserved'] if beanstalkd_jobs_reserved <= 0: self.logger.warn( 'Ignore beanstalkd %s: The worker doesn\'t process task ' '(current-jobs-ready=%d, current-jobs-reserved=%d)', beanstalkd_addr, beanstalkd_jobs_ready, beanstalkd_jobs_reserved) return None if beanstalkd_jobs_ready >= self.max_jobs_per_beanstalkd: self.logger.warn( 'Ignore beanstalkd %s: The queue is full ' '(current-jobs-ready=%d, current-jobs-reserved=%d)', beanstalkd_addr, beanstalkd_jobs_ready, beanstalkd_jobs_reserved) return None if hasattr(beanstalkd, 'is_broken') and beanstalkd.is_broken: self.logger.info( 'Beanstalkd %s was broken, and now it\'s coming back', beanstalkd_addr) beanstalkd.is_broken = False # Favor the workers with a good score # 50% -> beanstalkd score worker_score = beanstalkd_score * 50. / 100. # 50% -> beanstalkd tube size worker_score += 50 - (beanstalkd_jobs_ready * 50. / self.max_jobs_per_beanstalkd) beanstalkd.occurrence = int(math.ceil(worker_score / 10.)) self.logger.debug( 'Give the green light to beanstalkd %s (worker_score=%d)', beanstalkd_addr, worker_score) return beanstalkd def get_beanstalkd_workers(self): """ Yield beanstalkd workers following a loadbalancing strategy """ beanstalkd_workers_id = None beanstalkd_workers = list() while True: if not self.beanstalkd_workers: self.logger.info('No beanstalkd worker available') yield None sleep(1) continue if id(self.beanstalkd_workers) != beanstalkd_workers_id: beanstalkd_workers_id = id(self.beanstalkd_workers) beanstalkd_workers = list() for beanstalkd in self.beanstalkd_workers.values(): for _ in range(beanstalkd.occurrence): beanstalkd_workers.append(beanstalkd) # Shuffle to not have the same suite for all jobs random.shuffle(beanstalkd_workers) yielded = False for beanstalkd_worker in beanstalkd_workers: if id(self.beanstalkd_workers) != beanstalkd_workers_id: break if beanstalkd_worker.is_broken: continue yield beanstalkd_worker yielded = True else: if not yielded: self.logger.info( 'All beanstalkd workers available are broken') yield None sleep(1) def exit_gracefully(self, *args, **kwargs): if self.running: self.logger.info('Exiting gracefully') self.running = False else: self.logger.info('Already exiting gracefully')
def __init__(self, conf, tool): super(_DistributedDispatcher, self).__init__(conf, tool) self.sending = None self.max_items_per_second = int_value(self.conf.get( 'items_per_second'), self.tool.DEFAULT_ITEM_PER_SECOND) # All available beanstalkd conscience_client = ConscienceClient(self.conf) all_beanstalkd = conscience_client.all_services('beanstalkd') all_available_beanstalkd = dict() for beanstalkd in all_beanstalkd: if beanstalkd['score'] <= 0: continue all_available_beanstalkd[beanstalkd['addr']] = beanstalkd if not all_available_beanstalkd: raise OioException('No beanstalkd available') # Beanstalkd workers workers_tube = self.conf.get('distributed_beanstalkd_worker_tube') \ or self.tool.DEFAULT_DISTRIBUTED_BEANSTALKD_WORKER_TUBE self.beanstalkd_workers = dict() for beanstalkd in locate_tube(all_available_beanstalkd.values(), workers_tube): beanstalkd_worker = BeanstalkdSender( beanstalkd['addr'], workers_tube, self.logger) self.beanstalkd_workers[beanstalkd['addr']] = beanstalkd_worker self.logger.info( 'Beanstalkd %s using tube %s is selected as a worker', beanstalkd_worker.addr, beanstalkd_worker.tube) if not self.beanstalkd_workers: raise OioException('No beanstalkd worker available') nb_workers = len(self.beanstalkd_workers) if self.max_items_per_second > 0: # Max 5 seconds in advance queue_size_per_worker = self.max_items_per_second * 5 / nb_workers else: queue_size_per_worker = 1024 for _, beanstalkd_worker in self.beanstalkd_workers.items(): beanstalkd_worker.high_limit = queue_size_per_worker # Beanstalkd reply beanstalkd_reply = dict() try: local_services = conscience_client.local_services() for local_service in local_services: if local_service['type'] != 'beanstalkd': continue beanstalkd = all_available_beanstalkd.get( local_service['addr']) if beanstalkd is None: continue if beanstalkd_reply \ and beanstalkd_reply['score'] >= beanstalkd['score']: continue beanstalkd_reply = beanstalkd except Exception as exc: # pylint: disable=broad-except self.logger.warning( 'ERROR when searching for beanstalkd locally: %s', exc) if not beanstalkd_reply: self.logger.warn('No beanstalkd available locally') try: beanstalkd = conscience_client.next_instance('beanstalkd') beanstalkd_reply = all_available_beanstalkd[beanstalkd['addr']] except Exception as exc: # pylint: disable=broad-except self.logger.warning( 'ERROR when searching for beanstalkd: %s', exc) beanstalkd_reply_addr = beanstalkd_reply['addr'] # If the tube exists, another service must have already used this tube tube_reply = workers_tube + '.reply.' + str(time.time()) tubes = Beanstalk.from_url( 'beanstalk://' + beanstalkd_reply_addr).tubes() if tube_reply in tubes: raise OioException('Beanstalkd %s using tube %s is already used') self.beanstalkd_reply = BeanstalkdListener( beanstalkd_reply_addr, tube_reply, self.logger) self.logger.info( 'Beanstalkd %s using tube %s is selected for the replies', self.beanstalkd_reply.addr, self.beanstalkd_reply.tube)
class ContentFactory(object): def __init__(self, conf): self.conf = conf self.logger = get_logger(conf) self.cs_client = ConscienceClient(conf) self.container_client = ContainerClient(conf) self.ns_info = self.cs_client.info() def _extract_datasec(self, stgpol_name): try: stgpol = self.ns_info["storage_policy"][stgpol_name] except KeyError: self.logger.error("Storage policy '%s' not found" % stgpol_name) raise InconsistentContent("Storage policy not found") stgclass_name, datasec_name, datatreat_name = stgpol.split(':') if datasec_name == 'NONE': return "DUP", {"nb_copy": "1", "distance": "0"} try: datasec = self.ns_info["data_security"][datasec_name] except KeyError: self.logger.error("Data security '%s' not found" % datasec_name) raise InconsistentContent("Data security not found") ds_type, ds_args = datasec.split(':') args = {} for arg in ds_args.split('|'): key, value = arg.split('=') args[key] = value return ds_type, args def get(self, container_id, content_id): try: meta, chunks = self.container_client.content_show( cid=container_id, content=content_id) except NotFound: raise ContentNotFound("Content %s/%s not found" % (container_id, content_id)) pol_type, pol_args = self._extract_datasec(meta['policy']) if pol_type == "DUP": return DupContent(self.conf, container_id, meta, chunks, pol_args) elif pol_type == "RAIN": return RainContent(self.conf, container_id, meta, chunks, pol_args) raise InconsistentContent("Unknown storage policy") def new(self, container_id, path, size, policy): meta, chunks = self.container_client.content_prepare(cid=container_id, path=path, size=size, stgpol=policy) pol_type, pol_args = self._extract_datasec(meta['policy']) if pol_type == "DUP": return DupContent(self.conf, container_id, meta, chunks, pol_args) elif pol_type == "RAIN": return RainContent(self.conf, container_id, meta, chunks, pol_args) raise InconsistentContent("Unknown storage policy") def change_policy(self, container_id, content_id, new_policy): old_content = self.get(container_id, content_id) if old_content.stgpol_name == new_policy: return old_content new_content = self.new(container_id, old_content.path, old_content.length, new_policy) stream = old_content.download() new_content.upload(GeneratorIO(stream)) # the old content is automatically deleted because the new content has # the same name (but not the same id) return new_content
class RawxDecommissionJob(XcuteRdirJob): JOB_TYPE = 'rawx-decommission' TASK_CLASS = RawxDecommissionTask DEFAULT_RAWX_TIMEOUT = 60.0 DEFAULT_MIN_CHUNK_SIZE = 0 DEFAULT_MAX_CHUNK_SIZE = 0 DEFAULT_USAGE_TARGET = 0 DEFAULT_USAGE_CHECK_INTERVAL = 60.0 @classmethod def sanitize_params(cls, job_params): sanitized_job_params, _ = super(RawxDecommissionJob, cls).sanitize_params(job_params) # specific configuration service_id = job_params.get('service_id') if not service_id: raise ValueError('Missing service ID') sanitized_job_params['service_id'] = service_id sanitized_job_params['rawx_timeout'] = float_value( job_params.get('rawx_timeout'), cls.DEFAULT_RAWX_TIMEOUT) sanitized_job_params['min_chunk_size'] = int_value( job_params.get('min_chunk_size'), cls.DEFAULT_MIN_CHUNK_SIZE) sanitized_job_params['max_chunk_size'] = int_value( job_params.get('max_chunk_size'), cls.DEFAULT_MAX_CHUNK_SIZE) excluded_rawx = job_params.get('excluded_rawx') if excluded_rawx: excluded_rawx = excluded_rawx.split(',') else: excluded_rawx = list() sanitized_job_params['excluded_rawx'] = excluded_rawx sanitized_job_params['usage_target'] = int_value( job_params.get('usage_target'), cls.DEFAULT_USAGE_TARGET) sanitized_job_params['usage_check_interval'] = float_value( job_params.get('usage_check_interval'), cls.DEFAULT_USAGE_CHECK_INTERVAL) return sanitized_job_params, 'rawx/%s' % service_id def __init__(self, conf, logger=None): super(RawxDecommissionJob, self).__init__(conf, logger=logger) self.rdir_client = RdirClient(self.conf, logger=self.logger) self.conscience_client = ConscienceClient(self.conf, logger=self.logger) def get_usage(self, service_id): services = self.conscience_client.all_services('rawx', full=True) for service in services: if service_id == service['tags'].get('tag.service_id', service['addr']): return 100 - service['tags']['stat.space'] raise ValueError('No rawx service this ID (%s)' % service_id) def get_tasks(self, job_params, marker=None): service_id = job_params['service_id'] usage_target = job_params['usage_target'] usage_check_interval = job_params['usage_check_interval'] if usage_target > 0: now = time.time() current_usage = self.get_usage(service_id) if current_usage <= usage_target: self.logger.info( 'current usage %.2f%%: target already reached (%.2f%%)', current_usage, usage_target) return last_usage_check = now chunk_infos = self.get_chunk_infos(job_params, marker=marker) for container_id, content_id, chunk_id, _ in chunk_infos: task_id = '|'.join((container_id, content_id, chunk_id)) yield task_id, { 'container_id': container_id, 'content_id': content_id, 'chunk_id': chunk_id } if usage_target <= 0: continue now = time.time() if now - last_usage_check < usage_check_interval: continue current_usage = self.get_usage(service_id) if current_usage > usage_target: last_usage_check = now continue self.logger.info('current usage %.2f%%: target reached (%.2f%%)', current_usage, usage_target) return def get_total_tasks(self, job_params, marker=None): service_id = job_params['service_id'] usage_target = job_params['usage_target'] current_usage = self.get_usage(service_id) if current_usage <= usage_target: return kept_chunks_ratio = 1 - (usage_target / float(current_usage)) chunk_infos = self.get_chunk_infos(job_params, marker=marker) i = 0 for i, (container_id, content_id, chunk_id, _) \ in enumerate(chunk_infos, 1): if i % 1000 == 0: yield ('|'.join((container_id, content_id, chunk_id)), int(math.ceil(1000 * kept_chunks_ratio))) remaining = int(math.ceil(i % 1000 * kept_chunks_ratio)) if remaining > 0: yield '|'.join((container_id, content_id, chunk_id)), remaining def get_chunk_infos(self, job_params, marker=None): service_id = job_params['service_id'] rdir_fetch_limit = job_params['rdir_fetch_limit'] rdir_timeout = job_params['rdir_timeout'] chunk_infos = self.rdir_client.chunk_fetch(service_id, timeout=rdir_timeout, limit=rdir_fetch_limit, start_after=marker) return chunk_infos
def conscience(self): if not self._conscience: from oio.conscience.client import ConscienceClient self._conscience = ConscienceClient(self.conf, pool_manager=self.http_pool) return self._conscience
def __init__(self, conf, logger=None): super(RawxDecommissionJob, self).__init__(conf, logger=logger) self.rdir_client = RdirClient(self.conf, logger=self.logger) self.conscience_client = ConscienceClient(self.conf, logger=self.logger)
def __init__(self, conf, **kwargs): super(AccountClient, self).__init__(conf, **kwargs) self.cs = ConscienceClient(self.conf)
def __init__(self, conf, **kwargs): super(AccountClient, self).__init__(conf, **kwargs) self.cs = ConscienceClient(self.conf)
def cs(self): if not self._cs: self._cs = ConscienceClient(self.conf, logger=self.logger, pool_manager=self.rdir.pool_manager) return self._cs
class BlobClient(object): """A low-level client to rawx services.""" def __init__(self, conf=None, perfdata=None, logger=None, connection_pool=None, **kwargs): self.conf = conf self.perfdata = perfdata self.logger = logger or get_logger(self.conf) # FIXME(FVE): we do not target the same set of services, # we should use a separate connection pool for rawx services. self.http_pool = connection_pool or get_pool_manager(**kwargs) self.conscience_client = ConscienceClient(conf, logger=self.logger, pool_manager=self.http_pool) def resolve_url(self, url): return self.conscience_client.resolve_url('rawx', url) @update_rawx_perfdata @ensure_request_id def chunk_put(self, url, meta, data, **kwargs): if not hasattr(data, 'read'): data = utils.GeneratorIO(data) chunk = {'url': self.resolve_url(url), 'pos': meta['chunk_pos']} # FIXME: ugly chunk_method = meta.get('chunk_method', meta.get('content_chunkmethod')) storage_method = STORAGE_METHODS.load(chunk_method) checksum = meta['metachunk_hash' if storage_method.ec else 'chunk_hash'] writer = ReplicatedMetachunkWriter( meta, [chunk], FakeChecksum(checksum), storage_method, quorum=1, perfdata=self.perfdata) writer.stream(data, None) @update_rawx_perfdata @ensure_request_id def chunk_delete(self, url, **kwargs): resp = self.http_pool.request('DELETE', self.resolve_url(url), **kwargs) if resp.status != 204: raise exc.from_response(resp) return resp @ensure_request_id def chunk_delete_many(self, chunks, cid=None, concurrency=PARALLEL_CHUNKS_DELETE, **kwargs): """ :rtype: `list` of either `urllib3.response.HTTPResponse` or `urllib3.exceptions.HTTPError`, with an extra "chunk" attribute. """ headers = kwargs['headers'].copy() if cid is not None: # This is only to get a nice access log headers['X-oio-chunk-meta-container-id'] = cid timeout = kwargs.get('timeout') if not timeout: timeout = urllib3.Timeout(CHUNK_TIMEOUT) def __delete_chunk(chunk_): try: resp = self.http_pool.request( "DELETE", self.resolve_url(chunk_['url']), headers=headers, timeout=timeout) resp.chunk = chunk_ return resp except urllib3.exceptions.HTTPError as ex: ex.chunk = chunk_ return ex pile = GreenPile(concurrency) for chunk in chunks: pile.spawn(__delete_chunk, chunk) resps = [resp for resp in pile if resp] return resps @update_rawx_perfdata @ensure_headers @ensure_request_id def chunk_get(self, url, check_headers=True, **kwargs): """ :keyword check_headers: when True (the default), raise FaultyChunk if a mandatory response header is missing. :returns: a tuple with a dictionary of chunk metadata and a stream to the chunk's data. """ url = self.resolve_url(url) reader = ChunkReader([{'url': url}], READ_BUFFER_SIZE, **kwargs) # This must be done now if we want to access headers stream = reader.stream() headers = extract_headers_meta(reader.headers, check=check_headers) return headers, stream @update_rawx_perfdata @ensure_request_id def chunk_head(self, url, **kwargs): """ Perform a HEAD request on a chunk. :param url: URL of the chunk to request. :keyword xattr: when False, ask the rawx not to read extended attributes of the chunk. :keyword check_hash: when True, ask the rawx to validate checksum of the chunk. :returns: a `dict` with chunk metadata (empty when xattr is False). """ _xattr = bool(kwargs.get('xattr', True)) url = self.resolve_url(url) headers = kwargs['headers'].copy() headers[FETCHXATTR_HEADER] = _xattr if bool(kwargs.get('check_hash', False)): headers[CHECKHASH_HEADER] = True try: resp = self.http_pool.request( 'HEAD', url, headers=headers) except urllib3.exceptions.HTTPError as ex: oio_exception_from_httperror(ex, reqid=headers[REQID_HEADER], url=url) if resp.status == 200: if not _xattr: return dict() return extract_headers_meta(resp.headers) else: raise exc.from_response(resp) @update_rawx_perfdata @ensure_request_id def chunk_copy(self, from_url, to_url, chunk_id=None, fullpath=None, cid=None, path=None, version=None, content_id=None, **kwargs): stream = None # Check source headers only when new fullpath is not provided kwargs['check_headers'] = not bool(fullpath) try: meta, stream = self.chunk_get(from_url, **kwargs) meta['oio_version'] = OIO_VERSION meta['chunk_id'] = chunk_id or to_url.split('/')[-1] meta['full_path'] = fullpath or meta['full_path'] meta['container_id'] = cid or meta.get('container_id') meta['content_path'] = path or meta.get('content_path') # FIXME: the original keys are the good ones. # ReplicatedMetachunkWriter should be modified to accept them. meta['version'] = version or meta.get('content_version') meta['id'] = content_id or meta.get('content_id') meta['chunk_method'] = meta['content_chunkmethod'] meta['policy'] = meta['content_policy'] copy_meta = self.chunk_put(to_url, meta, stream, **kwargs) return copy_meta finally: if stream: stream.close() def _generate_fullchunk_copy(self, chunk, random_hex=60, **kwargs): """ Generate new chunk URLs, by replacing the last `random_hex` characters of the original URLs by random hexadecimal digits. """ rnd = ''.join(random.choice('0123456789ABCDEF') for _ in range(random_hex)) return chunk[:-random_hex] + rnd @update_rawx_perfdata @ensure_headers @ensure_request_id def chunk_link(self, target, link, fullpath, headers=None, **kwargs): hdrs = headers.copy() if link is None: link = self._generate_fullchunk_copy(target, **kwargs) hdrs['Destination'] = link hdrs[CHUNK_HEADERS['full_path']] = fullpath resp = self.http_pool.request('COPY', self.resolve_url(target), headers=hdrs) if resp.status != 201: raise exc.ChunkException(resp.status) return resp, link
def __init__(self, conf): self.conf = conf self.logger = get_logger(conf) self.cs_client = ConscienceClient(conf) self.container_client = ContainerClient(conf) self.ns_info = self.cs_client.info()
class ServiceWatcher(object): def __init__(self, conf, service, **kwargs): self.conf = conf self.running = False for k in ['host', 'port', 'type']: if k not in service: raise Exception('Missing field "%s" in service configuration' % k) self.name = '%s|%s|%s' % \ (service['type'], service['host'], service['port']) self.service = service self.rise = int_value(self._load_item_config('rise'), 1) self.fall = int_value(self._load_item_config('fall'), 1) self.check_interval = float_value( self._load_item_config('check_interval'), 1) self.deregister_on_exit = true_value( self._load_item_config('deregister_on_exit', False)) self.logger = get_logger(self.conf) self.pool_manager = get_pool_manager() self.cs = ConscienceClient(self.conf, pool_manager=self.pool_manager, logger=self.logger) # FIXME: explain that self.client = ProxyClient(self.conf, pool_manager=self.pool_manager, no_ns_in_url=True, logger=self.logger) self.last_status = False self.status = False self.failed = False self.service_definition = { 'ns': self.conf['namespace'], 'type': self.service['type'], 'addr': get_addr(self.service['host'], self.service['port']), 'score': 0, 'tags': {} } if self.service.get('location', None): self.service_definition['tags']['tag.loc'] = \ self.service['location'] if self.service.get('slots', None): self.service_definition['tags']['tag.slots'] = \ ','.join(self.service['slots']) self.service_checks = list() self.service_stats = list() self.init_checkers(service) self.init_stats(service) def _load_item_config(self, item, default=None): return self.service.get(item, self.conf.get(item)) or default def start(self): self.logger.info('watcher "%s" starting', self.name) self.running = True self.watch() self.running = False def stop(self): self.logger.info('watcher "%s" stopping', self.name) if self.deregister_on_exit: self.logger.info('watcher "%s" deregister service', self.name) try: self.status = False self.last_status = False self.register() except Exception as e: self.logger.warn('Failed to register service: %s', e) self.running = False def check(self): """Perform the registered checks on the service until any of them fails of the end of the list is reached.""" self.status = True for service_check in (x for x in self.service_checks if self.running): if not service_check.service_status(): self.status = False return def get_stats(self): """Update service definition with all configured stats""" if not self.status: return try: for stat in (x for x in self.service_stats if self.running): stats = stat.get_stats() self.service_definition['tags'].update(stats) except Exception as ex: self.logger.debug("get_stats error: %s", ex) self.status = False def register(self): # only accept a final zero/down-registration when exiting if not self.running and self.status: return # Alert when the status changes if self.status != self.last_status: if self.status: self.logger.info('service "%s" is now up', self.name) else: self.logger.warn('service "%s" is now down', self.name) self.last_status = self.status # Use a boolean so we can easily convert it to a number in conscience self.service_definition['tags']['tag.up'] = self.status try: self.cs.register(self.service['type'], self.service_definition, retries=False) except OioException as rqe: self.logger.warn("Failed to register service %s: %s", self.service_definition["addr"], rqe) def watch(self): try: while self.running: self.check() self.get_stats() self.register() sleep(self.check_interval) except Exception as e: self.logger.warn('ERROR in watcher "%s"', e) self.failed = True raise e finally: self.logger.info('watcher "%s" stopped', self.name) def init_checkers(self, service): for check in service['checks']: check['host'] = check.get('host') or service['host'] check['port'] = check.get('port') or service['port'] check['name'] = check.get('name') or "%s|%s|%s" % \ (check['type'], check['host'], check['port']) check['rise'] = check.get('rise') or self.rise check['fall'] = check.get('fall') or self.fall check['type'] = check.get('type') or 'unknown' service_check_class = CHECKERS_MODULES.get(check['type']) if not service_check_class: raise Exception( 'Invalid check type "%s", valid types: %s' % (check['type'], ', '.join(CHECKERS_MODULES.keys()))) service_check = service_check_class(self, check, self.logger) self.service_checks.append(service_check) def init_stats(self, service): """Initialize service stat fetchers""" self.service_stats[:] = [] for stat in service['stats']: stat.setdefault('host', service['host']) stat.setdefault('port', service['port']) stat.setdefault('path', "") service_stat_class = STATS_MODULES.get(stat['type'], None) if not service_stat_class: raise Exception( 'Invalid stat type "%s", valid types: %s' % (stat['type'], ', '.join(STATS_MODULES.keys()))) service_stat = service_stat_class(self, stat, self.logger) self.service_stats.append(service_stat)
class ServiceWatcher(object): def __init__(self, conf, service, **kwargs): self.conf = conf for k in ['host', 'port', 'type']: if k not in service: raise Exception( 'Missing field "%s" in service configuration' % k) self.name = '%s|%s' % \ (service['host'], service['port']) self.check_interval = float_value(conf.get('check_interval'), 1) self.service = service self.rise = int_value(conf.get('rise'), 1) self.fall = int_value(conf.get('fall'), 1) self.logger = get_logger(self.conf) self.cs = ConscienceClient(self.conf) self.init_checkers(service) self.last_status = False self.failed = False self.service_definition = { 'ns': self.conf['namespace'], 'type': self.service['type'], 'addr': '%s:%s' % (self.service['host'], self.service['port']), 'score': 0, 'tags': {}} def start(self): self.logger.info('watcher "%s" starting', self.name) self.running = True self.watch() def stop(self): self.logger.info('watcher "%s" stopping', self.name) self.running = False def check(self): status = True for service_check in self.service_checks: if not service_check.service_status(): status = False if status != self.last_status: if status: self.logger.info('service "%s" is now up', self.name) else: self.logger.warn('service "%s" is now down', self.name) self.last_status = status def register(self): tag_up = 'true' if self.last_status else 'false' self.service_definition['tags']['tag.up'] = tag_up self.cs.register( self.service['type'], self.service_definition) def watch(self): try: while self.running: self.check() self.register() sleep(self.check_interval) except Exception as e: self.logger.warn('ERROR in watcher "%s"', e) self.failed = True raise e finally: self.logger.info('watcher "%s" stopped', self.name) def init_checkers(self, service): self.service_checks = [] for check in service['checks']: check['host'] = check.get('host') or service['host'] check['port'] = check.get('port') or service['port'] check['name'] = check.get('name') or "%s|%s|%s" % \ (check['type'], check['host'], check['port']) check['rise'] = check.get('rise') or self.rise check['fall'] = check.get('fall') or self.fall check['type'] = check.get('type') or 'unknown' service_check_class = CHECKERS_MODULES.get(check['type']) if not service_check_class: raise Exception( 'Invalid check type "%s", valid types: %s' % (check['type'], ', '.join(self.checkers.keys()))) service_check = service_check_class(check, self.logger) self.service_checks.append(service_check)
class EventWorker(Worker): def init(self): eventlet.monkey_patch(os=False) self.session = requests.Session() self.cs = ConscienceClient(self.conf) self.rdir = RdirClient(self.conf) self._acct_addr = None self.acct_update = 0 self.graceful_timeout = 1 self.acct_refresh_interval = int_value( self.conf.get('acct_refresh_interval'), 60 ) self.concurrency = int_value(self.conf.get('concurrency'), 1000) self.acct_update = true_value(self.conf.get('acct_update', True)) self.rdir_update = true_value(self.conf.get('rdir_update', True)) super(EventWorker, self).init() def notify(self): """TODO""" pass def safe_decode_job(self, job): try: return json.loads(job) except Exception as e: self.logger.warn('ERROR decoding job "%s"', str(e.message)) return None def run(self): queue_url = self.conf.get('queue_url', 'tcp://127.0.0.1:11300') self.beanstalk = Beanstalk.from_url(queue_url) gt = eventlet.spawn( self.handle) while self.alive: self.notify() try: eventlet.sleep(1.0) except AssertionError: self.alive = False break self.notify() try: with Timeout(self.graceful_timeout) as t: gt.kill(StopServe()) gt.wait() except Timeout as te: if te != t: raise gt.kill() def handle(self): try: while True: job_id, data = self.beanstalk.reserve() try: event = self.safe_decode_job(data) if event: self.process_event(event) self.beanstalk.delete(job_id) except Exception: self.logger.exception("ERROR handling event %s", job_id) except StopServe: self.logger.info('Stopping event handler') def process_event(self, event): handler = self.get_handler(event) if not handler: self.logger.warn("ERROR no handler found for event") # mark as success return True success = True try: handler(event) except Exception: success = False finally: return success def get_handler(self, event): event_type = event.get('event') if not event_type: return None if event_type == EventType.CONTAINER_PUT: return self.handle_container_put elif event_type == EventType.CONTAINER_DESTROY: return self.handle_container_destroy elif event_type == EventType.CONTAINER_UPDATE: return self.handle_container_update elif event_type == EventType.OBJECT_PUT: return self.handle_object_put elif event_type == EventType.OBJECT_DELETE: return self.handle_object_delete elif event_type == EventType.REFERENCE_UPDATE: return self.handle_reference_update elif event_type == EventType.CHUNK_PUT: return self.handle_chunk_put elif event_type == EventType.CHUNK_DELETE: return self.handle_chunk_delete elif event_type == EventType.PING: return self.handle_ping else: return None @property def acct_addr(self): if not self._acct_addr or self.acct_refresh(): try: acct_instance = self.cs.next_instance(ACCOUNT_SERVICE) self._acct_addr = acct_instance.get('addr') self.acct_update = time.time() except Exception: self.logger.warn('Unable to find account instance') return self._acct_addr def acct_refresh(self): return (time.time() - self.acct_update) > self.acct_refresh_interval def handle_container_put(self, event): """ Handle container creation. :param event: """ self.logger.debug('worker handle container put') if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr mtime = event.get('when') data = event.get('data') name = data.get('url').get('user') account = data.get('url').get('account') event = {'mtime': mtime, 'name': name} self.session.post(uri, params={'id': account}, json=event) def handle_container_update(self, event): """ Handle container update. :param event: """ self.logger.debug('worker handle container update') if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr mtime = event.get('when') data = event.get('data') name = event.get('url').get('user') account = event.get('url').get('account') bytes_count = data.get('bytes-count', 0) object_count = data.get('object-count', 0) event = { 'mtime': mtime, 'name': name, 'bytes': bytes_count, 'objects': object_count } self.session.post(uri, params={'id': account}, json=event) def handle_container_destroy(self, event): """ Handle container destroy. :param event: """ self.logger.debug('worker handle container destroy') if not self.acct_update: return uri = 'http://%s/v1.0/account/container/update' % self.acct_addr dtime = event.get('when') data = event.get('data') name = data.get('url').get('user') account = data.get('url').get('account') event = {'dtime': dtime, 'name': name} self.session.post(uri, params={'id': account}, data=json.dumps(event)) def handle_object_delete(self, event): """ Handle object deletion. Delete the chunks of the object. :param event: """ self.logger.debug('worker handle object delete') pile = GreenPile(PARALLEL_CHUNKS_DELETE) chunks = [] for item in event.get('data'): if item.get('type') == 'chunks': chunks.append(item) if not len(chunks): self.logger.warn('No chunks found in event data') return def delete_chunk(chunk): resp = None try: with Timeout(CHUNK_TIMEOUT): resp = self.session.delete(chunk['id']) except (Exception, Timeout) as e: self.logger.warn('error while deleting chunk %s "%s"', chunk['id'], str(e.message)) return resp for chunk in chunks: pile.spawn(delete_chunk, chunk) resps = [resp for resp in pile if resp] for resp in resps: if resp.status_code == 204: self.logger.debug('deleted chunk %s' % resp.url) else: self.logger.warn('failed to delete chunk %s' % resp.url) def handle_object_put(self, event): """ Handle object creation. TODO :param event: """ self.logger.debug('worker handle object put') def handle_reference_update(self, event): """ Handle reference update. TODO :param event """ self.logger.debug('worker handle reference update') def handle_chunk_put(self, event): """ Handle chunk creation. :param event """ if not self.rdir_update: self.logger.debug('worker skip chunk creation') return self.logger.debug('worker handle chunk creation') when = event.get('when') data = event.get('data') volume_id = data.get('volume_id') del data['volume_id'] container_id = data.get('container_id') del data['container_id'] content_id = data.get('content_id') del data['content_id'] chunk_id = data.get('chunk_id') del data['chunk_id'] data['mtime'] = when self.rdir.chunk_push(volume_id, container_id, content_id, chunk_id, **data) def handle_chunk_delete(self, event): """ Handle chunk deletion. :param event """ if not self.rdir_update: self.logger.debug('worker skip chunk deletion') return self.logger.debug('worker handle chunk deletion') data = event.get('data') volume_id = data.get('volume_id') container_id = data.get('container_id') content_id = data.get('content_id') chunk_id = data.get('chunk_id') self.rdir.chunk_delete(volume_id, container_id, content_id, chunk_id) def handle_ping(self, event): """ Handle ping :param event """ self.logger.debug('worker handle ping')
class EventWorker(Worker): def init(self): eventlet.monkey_patch(os=False) self.tube = self.conf.get("tube", DEFAULT_TUBE) self.session = requests.Session() self.cs = ConscienceClient(self.conf) self.rdir = RdirClient(self.conf) self._acct_addr = None self.acct_update = 0 self.graceful_timeout = 1 self.acct_refresh_interval = int_value( self.conf.get('acct_refresh_interval'), 60 ) self.acct_update = true_value(self.conf.get('acct_update', True)) self.rdir_update = true_value(self.conf.get('rdir_update', True)) if 'handlers_conf' not in self.conf: raise ValueError("'handlers_conf' path not defined in conf") self.handlers = loadhandlers( self.conf.get('handlers_conf'), evt_types, app=self) super(EventWorker, self).init() def notify(self): """TODO""" pass def safe_decode_job(self, job_id, data): try: env = json.loads(data) env['job_id'] = job_id return env except Exception as e: self.logger.warn('decoding job "%s"', str(e.message)) return None def run(self): coros = [] queue_url = self.conf.get('queue_url', '127.0.0.1:11300') concurrency = int_value(self.conf.get('concurrency'), 10) server_gt = greenthread.getcurrent() for i in range(concurrency): beanstalk = Beanstalk.from_url(queue_url) gt = eventlet.spawn(self.handle, beanstalk) gt.link(_eventlet_stop, server_gt, beanstalk) coros.append(gt) beanstalk, gt = None, None while self.alive: self.notify() try: eventlet.sleep(1.0) except AssertionError: self.alive = False break self.notify() try: with Timeout(self.graceful_timeout) as t: [c.kill(StopServe()) for c in coros] [c.wait() for c in coros] except Timeout as te: if te != t: raise [c.kill() for c in coros] def handle(self, beanstalk): conn_error = False try: if self.tube: beanstalk.use(self.tube) beanstalk.watch(self.tube) while True: try: job_id, data = beanstalk.reserve() if conn_error: self.logger.warn("beanstalk reconnected") conn_error = False except ConnectionError: if not conn_error: self.logger.warn("beanstalk connection error") conn_error = True eventlet.sleep(BEANSTALK_RECONNECTION) continue try: event = self.safe_decode_job(job_id, data) self.process_event(job_id, event, beanstalk) except ConnectionError: self.logger.warn( "beanstalk connection error during processing") except Exception: beanstalk.bury(job_id) self.logger.exception("handling event %s (bury)", job_id) except StopServe: pass def process_event(self, job_id, event, beanstalk): handler = self.get_handler(event) if not handler: self.logger.warn('no handler found for %r' % event) beanstalk.delete(job_id) return def cb(status, msg): if is_success(status): beanstalk.delete(job_id) elif is_error(status): self.logger.warn('bury event %r' % event) beanstalk.bury(job_id) else: self.logger.warn('release event %r' % event) beanstalk.release(job_id) handler(event, cb) def get_handler(self, event): return self.handlers.get(event.get('event'), None) @property def acct_addr(self): if not self._acct_addr or self.acct_refresh(): try: acct_instance = self.cs.next_instance(ACCOUNT_SERVICE) self._acct_addr = acct_instance.get('addr') self.acct_update = time.time() except Exception: self.logger.warn('Unable to find account instance') return self._acct_addr def acct_refresh(self): return (time.time() - self.acct_update) > self.acct_refresh_interval
class ServiceWatcher(object): def __init__(self, conf, service, **kwargs): self.conf = conf self.running = False for k in ['host', 'port', 'type']: if k not in service: raise Exception('Missing field "%s" in service configuration' % k) self.name = '%s|%s|%s' % \ (service['type'], service['host'], service['port']) self.service = service self.rise = int_value(self._load_item_config('rise'), 1) self.fall = int_value(self._load_item_config('fall'), 1) self.check_interval = float_value( self._load_item_config('check_interval'), 1) self.deregister_on_exit = true_value( self._load_item_config('deregister_on_exit', False)) self.logger = get_logger(self.conf) self.session = requests.Session() self.cs = ConscienceClient(self.conf, session=self.session) self.client = Client(self.conf, session=self.session) self.last_status = False self.failed = False self.service_definition = { 'ns': self.conf['namespace'], 'type': self.service['type'], 'addr': '%s:%s' % (self.service['host'], self.service['port']), 'score': 0, 'tags': {} } if self.service.get('location', None): self.service_definition['tags']['tag.loc'] = \ self.service['location'] self.service_checks = list() self.service_stats = list() self.init_checkers(service) self.init_stats(service) def _load_item_config(self, item, default=None): return self.service.get(item, self.conf.get(item)) or default def start(self): self.logger.info('watcher "%s" starting', self.name) self.running = True self.watch() def stop(self): self.logger.info('watcher "%s" stopping', self.name) if self.deregister_on_exit: self.logger.info('watcher "%s" deregister service', self.name) try: self.last_status = False self.register() except Exception as e: self.logger.warn('Failed to register service: %s', e) self.running = False def check(self): status = True for service_check in self.service_checks: if not service_check.service_status(): status = False if status != self.last_status: if status: self.logger.info('service "%s" is now up', self.name) else: self.logger.warn('service "%s" is now down', self.name) self.last_status = status def get_stats(self): """Update service definition with all configured stats""" if not self.last_status: return for stat in self.service_stats: stats = stat.get_stats() self.service_definition['tags'].update(stats) def register(self): # Use a boolean so we can easily convert it to a number in conscience self.service_definition['tags']['tag.up'] = self.last_status self.cs.register(self.service['type'], self.service_definition) def watch(self): try: while self.running: self.check() self.get_stats() self.register() sleep(self.check_interval) except Exception as e: self.logger.warn('ERROR in watcher "%s"', e) self.failed = True raise e finally: self.logger.info('watcher "%s" stopped', self.name) def init_checkers(self, service): for check in service['checks']: check['host'] = check.get('host') or service['host'] check['port'] = check.get('port') or service['port'] check['name'] = check.get('name') or "%s|%s|%s" % \ (check['type'], check['host'], check['port']) check['rise'] = check.get('rise') or self.rise check['fall'] = check.get('fall') or self.fall check['type'] = check.get('type') or 'unknown' service_check_class = CHECKERS_MODULES.get(check['type']) if not service_check_class: raise Exception( 'Invalid check type "%s", valid types: %s' % (check['type'], ', '.join(CHECKERS_MODULES.keys()))) service_check = service_check_class(self, check, self.logger) self.service_checks.append(service_check) def init_stats(self, service): """Initialize service stat fetchers""" self.service_stats[:] = [] for stat in service['stats']: stat.setdefault('host', service['host']) stat.setdefault('port', service['port']) stat.setdefault('path', "") service_stat_class = STATS_MODULES.get(stat['type'], None) if not service_stat_class: raise Exception( 'Invalid stat type "%s", valid types: %s' % (stat['type'], ', '.join(STATS_MODULES.keys()))) service_stat = service_stat_class(self, stat, self.logger) self.service_stats.append(service_stat)
class ServiceWatcher(object): def __init__(self, conf, service, **kwargs): self.conf = conf self.running = False for k in ['host', 'port', 'type']: if k not in service: raise Exception( 'Missing field "%s" in service configuration' % k) self.name = '%s|%s' % \ (service['host'], service['port']) self.check_interval = float_value(conf.get('check_interval'), 1) self.service = service self.rise = int_value(conf.get('rise'), 1) self.fall = int_value(conf.get('fall'), 1) self.logger = get_logger(self.conf) self.cs = ConscienceClient(self.conf) self.client = Client(self.conf) self.last_status = False self.failed = False self.service_definition = { 'ns': self.conf['namespace'], 'type': self.service['type'], 'addr': '%s:%s' % (self.service['host'], self.service['port']), 'score': 0, 'tags': {}} if self.service.get('location', None): self.service_definition['tags']['tag.loc'] = \ self.service['location'] self.service_checks = list() self.service_stats = list() self.init_checkers(service) self.init_stats(service) def start(self): self.logger.info('watcher "%s" starting', self.name) self.running = True self.watch() def stop(self): self.logger.info('watcher "%s" stopping', self.name) self.running = False def check(self): status = True for service_check in self.service_checks: if not service_check.service_status(): status = False if status != self.last_status: if status: self.logger.info('service "%s" is now up', self.name) else: self.logger.warn('service "%s" is now down', self.name) self.last_status = status def get_stats(self): """Update service definition with all configured stats""" for stat in self.service_stats: stats = stat.get_stats() self.logger.debug("Stat fetcher '%s' returned %s", str(stat), str(stats)) self.service_definition['tags'].update(stats) def register(self): # Use a boolean so we can easily convert it to a number in conscience self.service_definition['tags']['tag.up'] = self.last_status self.cs.register( self.service['type'], self.service_definition) def watch(self): try: while self.running: self.check() self.get_stats() self.register() sleep(self.check_interval) except Exception as e: self.logger.warn('ERROR in watcher "%s"', e) self.failed = True raise e finally: self.logger.info('watcher "%s" stopped', self.name) def init_checkers(self, service): for check in service['checks']: check['host'] = check.get('host') or service['host'] check['port'] = check.get('port') or service['port'] check['name'] = check.get('name') or "%s|%s|%s" % \ (check['type'], check['host'], check['port']) check['rise'] = check.get('rise') or self.rise check['fall'] = check.get('fall') or self.fall check['type'] = check.get('type') or 'unknown' service_check_class = CHECKERS_MODULES.get(check['type']) if not service_check_class: raise Exception( 'Invalid check type "%s", valid types: %s' % (check['type'], ', '.join(CHECKERS_MODULES.keys()))) service_check = service_check_class(self, check, self.logger) self.service_checks.append(service_check) def init_stats(self, service): """Initialize service stat fetchers""" self.service_stats[:] = [] for stat in service['stats']: stat.setdefault('host', service['host']) stat.setdefault('port', service['port']) stat.setdefault('path', "") service_stat_class = STATS_MODULES.get(stat['type'], None) if not service_stat_class: raise Exception( 'Invalid stat type "%s", valid types: %s' % (stat['type'], ', '.join(STATS_MODULES.keys()))) service_stat = service_stat_class(self, stat, self.logger) self.service_stats.append(service_stat)
class EventWorker(Worker): def init(self): eventlet.monkey_patch(os=False) self.tube = self.conf.get("tube", DEFAULT_TUBE) self.session = requests.Session() self.cs = ConscienceClient(self.conf) self.rdir = RdirClient(self.conf) self._acct_addr = None self.acct_update = 0 self.graceful_timeout = 1 self.acct_refresh_interval = int_value( self.conf.get('acct_refresh_interval'), 60) self.acct_update = true_value(self.conf.get('acct_update', True)) self.rdir_update = true_value(self.conf.get('rdir_update', True)) if 'handlers_conf' not in self.conf: raise ValueError("'handlers_conf' path not defined in conf") self.handlers = loadhandlers(self.conf.get('handlers_conf'), evt_types, app=self) super(EventWorker, self).init() def notify(self): """TODO""" pass def safe_decode_job(self, job_id, data): try: env = json.loads(data) env['job_id'] = job_id return env except Exception as e: self.logger.warn('decoding job "%s"', str(e.message)) return None def run(self): coros = [] queue_url = self.conf.get('queue_url', '127.0.0.1:11300') concurrency = int_value(self.conf.get('concurrency'), 10) server_gt = greenthread.getcurrent() for i in range(concurrency): beanstalk = Beanstalk.from_url(queue_url) gt = eventlet.spawn(self.handle, beanstalk) gt.link(_eventlet_stop, server_gt, beanstalk) coros.append(gt) beanstalk, gt = None, None while self.alive: self.notify() try: eventlet.sleep(1.0) except AssertionError: self.alive = False break self.notify() try: with Timeout(self.graceful_timeout) as t: [c.kill(StopServe()) for c in coros] [c.wait() for c in coros] except Timeout as te: if te != t: raise [c.kill() for c in coros] def handle(self, beanstalk): conn_error = False try: beanstalk.use(self.tube) beanstalk.watch(self.tube) while True: try: job_id, data = beanstalk.reserve() if conn_error: self.logger.warn("beanstalk reconnected") conn_error = False except ConnectionError: if not conn_error: self.logger.warn("beanstalk connection error") conn_error = True eventlet.sleep(BEANSTALK_RECONNECTION) continue try: event = self.safe_decode_job(job_id, data) self.process_event(job_id, event, beanstalk) except ConnectionError: self.logger.warn( "beanstalk connection error during processing") except Exception: beanstalk.bury(job_id) self.logger.exception("handling event %s (bury)", job_id) except StopServe: pass def process_event(self, job_id, event, beanstalk): handler = self.get_handler(event) if not handler: self.logger.warn('no handler found for %r' % event) beanstalk.delete(job_id) return def cb(status, msg): if is_success(status): beanstalk.delete(job_id) elif is_error(status): self.logger.warn('bury event %r' % event) beanstalk.bury(job_id) else: self.logger.warn('release event %r' % event) beanstalk.release(job_id) handler(event, cb) def get_handler(self, event): return self.handlers.get(event.get('event'), None) @property def acct_addr(self): if not self._acct_addr or self.acct_refresh(): try: acct_instance = self.cs.next_instance(ACCOUNT_SERVICE) self._acct_addr = acct_instance.get('addr') self.acct_update = time.time() except Exception: self.logger.warn('Unable to find account instance') return self._acct_addr def acct_refresh(self): return (time.time() - self.acct_update) > self.acct_refresh_interval
class RawxDecommissionTask(XcuteTask): def __init__(self, conf, job_params, logger=None): super(RawxDecommissionTask, self).__init__(conf, job_params, logger=logger) self.service_id = job_params['service_id'] self.rawx_timeout = job_params['rawx_timeout'] self.min_chunk_size = job_params['min_chunk_size'] self.max_chunk_size = job_params['max_chunk_size'] self.excluded_rawx = job_params['excluded_rawx'] self.blob_client = BlobClient(self.conf, logger=self.logger) self.content_factory = ContentFactory(self.conf) self.conscience_client = ConscienceClient(self.conf, logger=self.logger) self.fake_excluded_chunks = self._generate_fake_excluded_chunks( self.excluded_rawx) def _generate_fake_excluded_chunks(self, excluded_rawx): fake_excluded_chunks = list() fake_chunk_id = '0' * 64 for service_id in excluded_rawx: service_addr = self.conscience_client.resolve_service_id( 'rawx', service_id) chunk = dict() chunk['hash'] = '0000000000000000000000000000000000' chunk['pos'] = '0' chunk['size'] = 1 chunk['score'] = 1 chunk['url'] = 'http://{}/{}'.format(service_id, fake_chunk_id) chunk['real_url'] = 'http://{}/{}'.format(service_addr, fake_chunk_id) fake_excluded_chunks.append(chunk) return fake_excluded_chunks def process(self, task_id, task_payload, reqid=None): container_id = task_payload['container_id'] content_id = task_payload['content_id'] chunk_id = task_payload['chunk_id'] chunk_url = 'http://{}/{}'.format(self.service_id, chunk_id) try: meta = self.blob_client.chunk_head(chunk_url, timeout=self.rawx_timeout, reqid=reqid) except NotFound: # The chunk is still present in the rdir, # but the chunk no longer exists in the rawx. # We ignore it because there is nothing to move. return {'skipped_chunks_no_longer_exist': 1} if container_id != meta['container_id']: raise ValueError('Mismatch container ID: %s != %s', container_id, meta['container_id']) if content_id != meta['content_id']: raise ValueError('Mismatch content ID: %s != %s', content_id, meta['content_id']) chunk_size = int(meta['chunk_size']) # Maybe skip the chunk because it doesn't match the size constaint if chunk_size < self.min_chunk_size: self.logger.debug('[reqid=%s] SKIP %s too small', reqid, chunk_url) return {'skipped_chunks_too_small': 1} if self.max_chunk_size > 0 and chunk_size > self.max_chunk_size: self.logger.debug('[reqid=%s] SKIP %s too big', reqid, chunk_url) return {'skipped_chunks_too_big': 1} # Start moving the chunk try: content = self.content_factory.get(container_id, content_id, reqid=reqid) content.move_chunk(chunk_id, fake_excluded_chunks=self.fake_excluded_chunks, reqid=reqid) except (ContentNotFound, OrphanChunk): return {'orphan_chunks': 1} return {'moved_chunks': 1, 'moved_bytes': chunk_size}
def make_client(instance): client = ConscienceClient( instance.get_process_configuration() ) return client