def _wrap(self, job_uuid, job_path, fail_msg_tpl="Failure: %s", ensure_known=True): if job_path: fail_msg_tpl += " (%s)" % (job_path) if ensure_known: if not job_path: raise ValueError("Unable to check if %r is a known path" % (job_path)) if job_path not in self._known_jobs: fail_msg_tpl += ", unknown job" raise excp.NotFound(fail_msg_tpl % (job_uuid)) try: yield except self._client.handler.timeout_exception as e: fail_msg_tpl += ", operation timed out" raise excp.JobFailure(fail_msg_tpl % (job_uuid), e) except k_exceptions.SessionExpiredError as e: fail_msg_tpl += ", session expired" raise excp.JobFailure(fail_msg_tpl % (job_uuid), e) except k_exceptions.NoNodeError: fail_msg_tpl += ", unknown job" raise excp.NotFound(fail_msg_tpl % (job_uuid)) except k_exceptions.KazooException as e: fail_msg_tpl += ", internal error" raise excp.JobFailure(fail_msg_tpl % (job_uuid), e)
def state(self): owner = self.board.find_owner(self) job_data = {} try: raw_data, _data_stat = self._client.get(self.path) job_data = misc.decode_json(raw_data) except k_exceptions.NoNodeError: pass except k_exceptions.SessionExpiredError as e: raise excp.JobFailure( "Can not fetch the state of %s," " session expired" % (self.uuid), e) except self._client.handler.timeout_exception as e: raise excp.JobFailure( "Can not fetch the state of %s," " operation timed out" % (self.uuid), e) except k_exceptions.KazooException as e: raise excp.JobFailure( "Can not fetch the state of %s, internal" " error" % (self.uuid), e) if not job_data: # No data this job has been completed (the owner that we might have # fetched will not be able to be fetched again, since the job node # is a parent node of the owner/lock node). return states.COMPLETE if not owner: # No owner, but data, still work to be done. return states.UNCLAIMED return states.CLAIMED
def trash(self, job, who): script = self._get_script('trash') with _translate_failures(): raw_who = self._encode_owner(who) raw_result = script(keys=[job.owner_key, self.listings_key, job.last_modified_key, self.trash_key], args=[raw_who, job.key, self._dumps(timeutils.utcnow())]) result = self._loads(raw_result) status = result['status'] if status != self.SCRIPT_STATUS_OK: reason = result.get('reason') if reason == self.SCRIPT_UNKNOWN_JOB: raise exc.NotFound("Job %s not found to be" " trashed" % (job.uuid)) elif reason == self.SCRIPT_UNKNOWN_OWNER: raise exc.NotFound("Can not trash job %s" " which we can not determine" " the owner of" % (job.uuid)) elif reason == self.SCRIPT_NOT_EXPECTED_OWNER: raw_owner = result.get('owner') if raw_owner: owner = self._decode_owner(raw_owner) raise exc.JobFailure("Can not trash job %s" " which is not owned by %s (it is" " actively owned by %s)" % (job.uuid, who, owner)) else: raise exc.JobFailure("Can not trash job %s" " which is not owned by %s" % (job.uuid, who)) else: raise exc.JobFailure("Failure to trash job %s," " unknown internal error (reason=%s)" % (job.uuid, reason))
def _get_node_attr(self, path, attr_name, trans_func=None): try: _data, node_stat = self._client.get(path) attr = getattr(node_stat, attr_name) if trans_func is not None: return trans_func(attr) else: return attr except k_exceptions.NoNodeError as e: raise excp.NotFound( "Can not fetch the %r attribute" " of job %s (%s), path %s not found" % (attr_name, self.uuid, self.path, path), e) except self._client.handler.timeout_exception as e: raise excp.JobFailure( "Can not fetch the %r attribute" " of job %s (%s), operation timed out" % (attr_name, self.uuid, self.path), e) except k_exceptions.SessionExpiredError as e: raise excp.JobFailure( "Can not fetch the %r attribute" " of job %s (%s), session expired" % (attr_name, self.uuid, self.path), e) except (AttributeError, k_exceptions.KazooException) as e: raise excp.JobFailure( "Can not fetch the %r attribute" " of job %s (%s), internal error" % (attr_name, self.uuid, self.path), e)
def _force_refresh(self): try: children = self._client.get_children(self.path) except self._client.handler.timeout_exception as e: raise excp.JobFailure("Refreshing failure, operation timed out", e) except k_exceptions.SessionExpiredError as e: raise excp.JobFailure("Refreshing failure, session expired", e) except k_exceptions.NoNodeError: pass except k_exceptions.KazooException as e: raise excp.JobFailure("Refreshing failure, internal error", e) else: self._on_job_posting(children, delayed=False)
def abandon(self, job, who): with self._wrap(job.uuid, job.path, "Abandonment failure: %s"): try: owner_data = self._get_owner_and_data(job) lock_data, lock_stat, data, data_stat = owner_data except k_exceptions.NoNodeError: raise excp.JobFailure("Can not abandon a job %s" " which we can not determine" " the owner of" % (job.uuid)) if lock_data.get("owner") != who: raise excp.JobFailure("Can not abandon a job %s" " which is not owned by %s" % (job.uuid, who)) txn = self._client.transaction() txn.delete(job.lock_path, version=lock_stat.version) kazoo_utils.checked_commit(txn)
def post(self, name, book=None, details=None): job_uuid = uuidutils.generate_uuid() posting = base.format_posting(job_uuid, name, created_on=timeutils.utcnow(), book=book, details=details) with _translate_failures(): sequence = self._client.incr(self.sequence_key) posting.update({ 'sequence': sequence, }) with _translate_failures(): raw_posting = self._dumps(posting) raw_job_uuid = six.b(job_uuid) was_posted = bool(self._client.hsetnx(self.listings_key, raw_job_uuid, raw_posting)) if not was_posted: raise exc.JobFailure("New job located at '%s[%s]' could not" " be posted" % (self.listings_key, raw_job_uuid)) else: return RedisJob(self, name, sequence, raw_job_uuid, uuid=job_uuid, details=details, created_on=posting['created_on'], book=book, book_data=posting.get('book'), backend=self._persistence)
def connect(self): self.close() if self._owns_client: self._client = self._make_client(self._conf) with _translate_failures(): # The client maintains a connection pool, so do a ping and # if that works then assume the connection works, which may or # may not be continuously maintained (if the server dies # at a later time, we will become aware of that when the next # op occurs). self._client.ping() is_new_enough, redis_version = ru.is_server_new_enough( self._client, self.MIN_REDIS_VERSION) if not is_new_enough: wanted_version = ".".join([str(p) for p in self.MIN_REDIS_VERSION]) if redis_version: raise exc.JobFailure("Redis version %s or greater is" " required (version %s is to" " old)" % (wanted_version, redis_version)) else: raise exc.JobFailure("Redis version %s or greater is" " required" % (wanted_version)) else: self._redis_version = redis_version script_params = { # Status field values. 'ok': self.SCRIPT_STATUS_OK, 'error': self.SCRIPT_STATUS_ERROR, # Known error reasons (when status field is error). 'not_expected_owner': self.SCRIPT_NOT_EXPECTED_OWNER, 'unknown_owner': self.SCRIPT_UNKNOWN_OWNER, 'unknown_job': self.SCRIPT_UNKNOWN_JOB, 'already_claimed': self.SCRIPT_ALREADY_CLAIMED, } prepared_scripts = {} for n, raw_script_tpl in six.iteritems(self.SCRIPT_TEMPLATES): script_tpl = string.Template(raw_script_tpl) script_blob = script_tpl.substitute(**script_params) script = self._client.register_script(script_blob) prepared_scripts[n] = script self._scripts.update(prepared_scripts) self._closed = False
def connect(self, timeout=10.0): def try_clean(): # Attempt to do the needed cleanup if post-connection setup does # not succeed (maybe the connection is lost right after it is # obtained). try: self.close() except k_exceptions.KazooException: LOG.exception("Failed cleaning-up after post-connection" " initialization failed") try: if timeout is not None: timeout = float(timeout) self._client.start(timeout=timeout) except (self._client.handler.timeout_exception, k_exceptions.KazooException) as e: raise excp.JobFailure("Failed to connect to zookeeper", e) try: if self._conf.get('check_compatible', True): kazoo_utils.check_compatible(self._client, MIN_ZK_VERSION) if self._worker is None and self._emit_notifications: self._worker = futures.ThreadPoolExecutor(max_workers=1) self._client.ensure_path(self.path) self._client.ensure_path(self.trash_path) if self._job_watcher is None: self._job_watcher = watchers.ChildrenWatch( self._client, self.path, func=self._on_job_posting, allow_session_lost=True) self._connected = True except excp.IncompatibleVersion: with excutils.save_and_reraise_exception(): try_clean() except (self._client.handler.timeout_exception, k_exceptions.KazooException) as e: try_clean() raise excp.JobFailure("Failed to do post-connection" " initialization", e)
def trash(self, job, who): with self._wrap(job.uuid, job.path, "Trash failure: %s"): try: owner_data = self._get_owner_and_data(job) lock_data, lock_stat, data, data_stat = owner_data except k_exceptions.NoNodeError: raise excp.JobFailure("Can not trash a job %s" " which we can not determine" " the owner of" % (job.uuid)) if lock_data.get("owner") != who: raise excp.JobFailure("Can not trash a job %s" " which is not owned by %s" % (job.uuid, who)) trash_path = job.path.replace(self.path, self.trash_path) value = misc.binary_encode(jsonutils.dumps(data)) txn = self._client.transaction() txn.create(trash_path, value=value) txn.delete(job.lock_path, version=lock_stat.version) txn.delete(job.path, version=data_stat.version) kazoo_utils.checked_commit(txn)
def claim(self, job, who, expiry=None): if expiry is None: # On the lua side none doesn't translate to nil so we have # do to this string conversion to make sure that we can tell # the difference. ms_expiry = "none" else: ms_expiry = int(expiry * 1000.0) if ms_expiry <= 0: raise ValueError("Provided expiry (when converted to" " milliseconds) must be greater" " than zero instead of %s" % (expiry)) script = self._get_script('claim') with _translate_failures(): raw_who = self._encode_owner(who) raw_result = script( keys=[job.owner_key, self.listings_key, job.last_modified_key], args=[ raw_who, job.key, # NOTE(harlowja): we need to send this # in as a blob (even if it's not # set/used), since the format can not # currently be created in lua... self._dumps(timeutils.utcnow()), ms_expiry ]) result = self._loads(raw_result) status = result['status'] if status != self.SCRIPT_STATUS_OK: reason = result.get('reason') if reason == self.SCRIPT_UNKNOWN_JOB: raise exc.NotFound("Job %s not found to be" " claimed" % (job.uuid)) elif reason == self.SCRIPT_ALREADY_CLAIMED: raw_owner = result.get('owner') if raw_owner: owner = self._decode_owner(raw_owner) raise exc.UnclaimableJob("Job %s already" " claimed by %s" % (job.uuid, owner)) else: raise exc.UnclaimableJob("Job %s already" " claimed" % (job.uuid)) else: raise exc.JobFailure("Failure to claim job %s," " unknown internal error (reason=%s)" % (job.uuid, reason))
def consume(self, job, who): with self._wrap(job.uuid, job.path, fail_msg_tpl="Consumption failure: %s"): try: owner_data = self._get_owner_and_data(job) lock_data, lock_stat, data, data_stat = owner_data except k_exceptions.NoNodeError: excp.raise_with_cause(excp.NotFound, "Can not consume a job %s" " which we can not determine" " the owner of" % (job.uuid)) if lock_data.get("owner") != who: raise excp.JobFailure("Can not consume a job %s" " which is not owned by %s" % (job.uuid, who)) txn = self._client.transaction() txn.delete(job.lock_path, version=lock_stat.version) txn.delete(job.path, version=data_stat.version) kazoo_utils.checked_commit(txn) self._remove_job(job.path)