def execute(self): while True: q = CoreQueue() listing = find_job(name=re.compile("^meetup.example.job.SwarmJob")) if listing: for doc in listing: q.kill_job(doc["_id"]) q.remove_job(doc["_id"]) else: break
def initialize(self): self.queue = CoreQueue() self._collection = {}
class JobHandler(CoreRequestHandler, core4.queue.query.QueryMixin): """ **DEPRECATED!** Get job listing, job details, kill, delete and restart jobs. """ author = "mra" title = "job manager" tag = "api jobs" # idea is to have a FE app; remove api by then def initialize(self): self.queue = CoreQueue() self._collection = {} def collection(self, name): """ Singleton connect and return async MongoDB connection. :param name: collection name below ``sys`` :return: :class:`core4.base.collection.CoreCollection` """ if name not in self._collection: self._collection[name] = self.config.sys[name] return self._collection[name] async def get(self, _id=None): """ **DEPRECATED!** Use :class:`core4.api.v1.request.job.JobRequest`. Paginated job listing with ``/jobs``, and single job details with ``/jobs/<_id>``. Only jobs with read/execute access permissions granted to the current user are returned. Methods: GET /core4/api/v1/jobs - jobs listing Parameters: - per_page (int): number of jobs per page - page (int): requested page (starts counting with ``0``) - sort (str): sort field - order (int): sort direction (``1`` for ascending, ``-1`` for descending) Returns: data element with list of job attributes as dictionaries. For pagination the following top level attributes are returned: - **total_count**: the total number of records - **count**: the number of records in current page - **page**: current page (starts counting with ``0``) - **page_count**: the total number of pages - **per_page**: the number of elements per page Raises: 401: Unauthorized Examples: >>> from requests import get, post >>> from pprint import pprint >>> import random >>> url = "http://localhost:5001/core4/api/v1" >>> signin = get(url + "/login?username=admin&password=hans") >>> token = signin.json()["data"]["token"] >>> h = {"Authorization": "Bearer " + token} >>> >>> name = "core4.queue.helper.DummyJob" >>> for i in range(50): >>> args = {"sleep": i, "id": random.randint(0, 100)} >>> rv = post(url + "/enqueue?name=" + name, headers=h, json=args) >>> print(i, rv.status_code, "-", rv.json()["message"]) >>> assert rv.status_code == 200 >>> rv = get(url + "/jobs?per_page=10&sort=args.id&order=-1", headers=h) >>> rv <Response [200]> >>> rv.json() { '_id': '5be13b56de8b69468b7ff0b2', 'code': 200, 'message': 'OK', 'timestamp': '2018-11-06T06:57:26.660093', 'total_count': 50.0, 'count': 10, 'page': 0, 'page_count': 5, 'per_page': 10, 'data': [ ... ] } Methods: GET /core4/api/v1/jobs/<_id> - job details Parameters: _id (str): job _id to get details Returns: data element with job attributes, see :class:`core4.queue.job.CoreJob`. Raises: 400: failed to parse job _id 401: Unauthorized 404: job not found Examples: >>> # continue example from above >>> _id = rv.json()["data"][0]["_id"] >>> get(url + "/jobs/" + _id, headers=h).json() { '_id': '5bdb09c6de8b691e497f00ab', 'code': 200, 'message': 'OK', 'timestamp': '2018-11-01T14:12:22.283088', 'data': { '_id': '5bd72861de8b69147a275e22', 'args': { 'i': 4, 'sleep': 23 }, 'attempts': 1, 'attempts_left': 1, 'enqueued': { 'at': '2018-10-29T15:33:53', 'hostname': 'mra.devops', 'parent_id': None, 'username': '******' }, 'finished_at': None, 'journal': False, 'killed_at': '2018-10-29T15:34:07.084000', 'locked': None, 'name': 'core4.queue.helper.DummyJob', 'priority': 0, 'removed_at': None, 'runtime': 21.0, 'started_at': '2018-10-29T15:33:54', 'state': 'killed', 'trial': 1, 'wall_at': None, 'zombie_at': None } } """ if _id: oid = self.parse_id(_id) ret = await self.get_detail(oid) if not ret: raise HTTPError(404, "job _id [{}] not found".format(oid)) else: ret = await self.get_listing() self.reply(ret) async def post(self, _id=None): """ Same as ``GET``. Paginated job listing with ``/jobs`` and single job details with ``/jobs/<_id>``. Additionally this method parses a ``filter`` attribute to filter jobs. Methods: POST /core4/api/v1/jobs - jobs listing Parameters: - per_page (int): number of jobs per page - page (int): requested page (starts counting with ``0``) - sort (str): sort field - order (int): sort direction (``1`` for ascending, ``-1`` for descending) - filter (dict): MongoDB query **Returns:** see ``GET`` **Raises:** see ``GET`` Examples: >>> # example continues from above >>> args = {"page": "0", "filter": {"args.sleep": {"$lte": 5}}} >>> post(url + "/jobs", headers=h, json=args) """ await self.get(_id) def parse_id(self, _id): """ parses str into :class:`bson.objectid.ObjectId` and raises 400 - Bad Request error in case of failure :param _id: _id (str) :return: _id as :class:`bson.objectid.ObjectId`. """ try: return ObjectId(_id) except: raise HTTPError(400, "failed to parse job _id: [{}]".format(_id)) async def get_listing(self): """ Retrieve job listing from ``sys.queue``. Only jobs with read/execute access permissions granted to the current user are returned. :return: :class:`.PageResult` """ per_page = int(self.get_argument("per_page", default=10)) current_page = int(self.get_argument("page", default=0)) query_filter = self.get_argument("filter", default={}) sort_by = self.get_argument("sort", default="_id") sort_order = self.get_argument("order", default=1) data = [] async for doc in self.collection("queue").find(query_filter).sort([ (sort_by, int(sort_order)) ]): if await self.user.has_job_access(doc["name"]): data.append(doc) async def _length(*args, **kwargs): return len(data) async def _query(skip, limit, *args, **kwargs): return data[skip:(skip + limit)] pager = CorePager( per_page=int(per_page), current_page=int(current_page), length=_length, query=_query, # sort_by=[sort_by, int(sort_order)], # filter=query_filter ) return await pager.page() async def get_detail(self, _id): """ Retrieve job listing from ``sys.queue`` and ``sys.journal`` using :meth:`.project_job_listing` to select job attributes. Only jobs with read/execute access permissions granted to the current user are returned. :param _id: job _id :return: dict of job attributes """ doc = await self.collection("queue").find_one( filter={"_id": _id}, projection=self.project_job_listing()) if not doc: # fallback to journal doc = await self.collection("journal").find_one( filter={"_id": _id}, projection=self.project_job_listing()) if doc: doc["journal"] = True else: doc["journal"] = False if not doc: raise HTTPError(404, "job_id [%s] not found", _id) if await self.user.has_job_access(doc["name"]): return doc raise HTTPError(403) async def delete(self, _id=None): """ Only jobs with execute access permissions granted to the current user can be deleted. Methods: DELETE /core4/api/v1/jobs/<_id> - delete job from ``sys.queue`` Parameters: - _id (str): job _id to delete Returns: data element with ``True`` for success, else ``False`` Raises: 400: failed to parse job _id 400: requires job _id 401: Unauthorized 403: Forbidden 404: job _id not found Examples: >>> # continue example from :meth:`.get` >>> from requests import delete >>> rv = delete(url + "/jobs/" + _id, headers=h) >>> rv.json() { '_id': '5bdb0cc8de8b691e4983c4dc', 'code': 200, 'data': True, 'message': 'OK', 'timestamp': '2018-11-01T14:25:12.747484' } """ if _id: oid = self.parse_id(_id) if not await self.remove_job(oid): raise HTTPError(404, "job _id [%s] not found", oid) else: raise HTTPError(400, "requires job _id") self.reply(True) async def put(self, request=None): """ Only jobs with execute access permissions granted to the current user can be updated. Methods: PUT /core4/api/v1/jobs/<action>/<_id> - manage job in ``sys.queue`` Parameters: - action(str): ``delete``, ``kill`` or ``restart`` - _id (str): job _id Returns: data element with - **OK** (str) for actions delete and kill - **_id** (str) with new job ``_id`` for action restart Raises: 400: failed to parse job _id 400: requires action and job _id 400: failed to restart job 401: Unauthorized 403: Forbidden 404: job _id not found Examples: >>> # continue example from :meth:`.get` >>> from requests import delete >>> rv = delete(url + "/jobs/" + _id, headers=h) >>> rv.json() { '_id': '5bdb0cc8de8b691e4983c4dc', 'code': 200, 'data': 'OK', 'message': 'OK', 'timestamp': '2018-11-01T14:25:12.747484' } """ if request: parts = request.split("/") oid = self.parse_id(parts[-1]) if len(parts) == 2: action = parts[0].lower() else: action = self.get_argument("action") action_method = { "delete": self.remove_job, "restart": self.restart_job, "kill": self.kill_job } if action not in action_method: raise HTTPError(400, "requires action in (delete, restart, kill)") await self._access_by_id(oid) self.reply(await action_method[action](oid)) raise HTTPError(400, "requires action and job_id") async def _access_by_id(self, oid): doc = await self.collection("queue").find_one(filter={"_id": oid}, projection=["name"]) if not doc: raise HTTPError(404, "job_id [%s] not found", oid) if not await self.user.has_job_exec_access(doc["name"]): raise HTTPError(403) async def update(self, oid, attr, message, event): """ Update the passed job attribute, used with ``removed_at`` and ``killed_at``. Only jobs with execute access permissions granted to the current user can be deleted. :param oid: :class:`bson.objectid.ObjectId` of the job :param attr: job attribute to update :param message: logging helper string :return: ``True`` for success, else ``False`` """ await self._access_by_id(oid) at = core4.util.node.mongo_now() ret = await self.collection("queue").update_one( { "_id": oid, attr: None }, update={"$set": { attr: at }}) if ret.raw_result["n"] == 1: self.logger.warning("flagged job [%s] to %s at [%s]", oid, message, at) await self.make_stat(event, str(oid)) return True raise HTTPError(404, "failed to flag job [%s] to %s", oid, message) async def remove_job(self, oid): """ Flag the passed job ``_id`` in ``removed_at``. Active workers process this flag and remove the job from ``sys.queue``. :param oid: :class:`bson.objectid.ObjectId` of the job :return: ``True`` for success, else ``False`` """ return await self.update(oid, "removed_at", "remove", "request_remove_job") async def kill_job(self, oid): """ Flag the passed job ``_id`` in ``killed_at``. Active workers process this flag and kill the job. :param oid: :class:`bson.objectid.ObjectId` of the job :return: ``True`` for success, else ``False`` """ return await self.update(oid, "killed_at", "kill", "request_kill_job") async def restart_job(self, oid): """ Restart jobs in state *waiting* (``pending``, ``failed``, ``deferred``) or journal and re-enqueue jobs in state *stopped* (``error``, ``killed``, ``inactive``) :param oid: :class:`bson.objectid.ObjectId` of the job :return: dict with ``old_id`` and ``new_id`` """ if await self.restart_waiting(oid): self.logger.warning('successfully restarted [%s]', oid) return {"old_id": oid, "new_id": oid} else: new_id = await self.restart_stopped(oid) if new_id: self.logger.warning('successfully restarted [%s] ' 'with [%s]', oid, new_id) return {"old_id": oid, "new_id": new_id} raise HTTPError(404, "failed to restart job [%s]", oid) async def restart_waiting(self, _id): """ Restart jobs in state *waiting* (``pending``, ``failed``, ``deferred``). :param _id: :class:`bson.objectid.ObjectId` of the job :return: ``True`` for success, else ``False`` """ ret = await self.collection("queue").update_one( { "_id": _id, "state": { "$in": STATE_WAITING } }, update={"$set": { "query_at": None }}) await self.make_stat("restart_waiting", str(_id)) return ret.modified_count == 1 async def restart_stopped(self, _id): """ Restart job in state *stopped* (``error``, ``killed``, ``inactive``). :param _id: :class:`bson.objectid.ObjectId` of the job :return: new job _id """ queue = self.collection("queue") job = await queue.find_one(filter={"_id": _id}) if job: if job["state"] in STATE_STOPPED: if await self.lock_job(self.application.container.identifier, _id): ret = await queue.delete_one({"_id": _id}) if ret.raw_result["n"] == 1: doc = dict([(k, v) for k, v in job.items() if k in core4.queue.job.ENQUEUE_ARGS]) new_job = self.queue.job_factory(job["name"], **doc) new_job.__dict__["attempts_left"] = new_job.__dict__[ "attempts"] new_job.__dict__[ "state"] = core4.queue.main.STATE_PENDING new_job.__dict__["enqueued"] = self.who() new_job.__dict__["enqueued"]["parent_id"] = job["_id"] new_doc = new_job.serialise() ret = await queue.insert_one(new_doc) new_doc["_id"] = ret.inserted_id self.logger.info( 'successfully enqueued [%s] with [%s]', new_job.qual_name(), new_doc["_id"]) job["enqueued"]["child_id"] = new_doc["_id"] await self.collection("journal").insert_one(job) await self.collection("lock").delete_one({"_id": _id}) await self.make_stat("restart_stopped", str(_id)) return new_doc["_id"] raise HTTPError(400, "cannot restart job [%s] in state [%s]", _id, job["state"]) return None async def lock_job(self, identifier, _id): """ Reserve the job for exclusive processing utilising collection ``sys.lock``. :param identifier: to assign to the reservation :param _id: job ``_id`` :return: ``True`` if reservation succeeded, else ``False`` """ try: await self.collection("lock").insert_one({ "_id": _id, "owner": identifier }) return True except pymongo.errors.DuplicateKeyError: return False except: raise def who(self): """ Creates ``enqueued`` dict attribute with timestamp (``at``), ``hostname``, and ``username``. :return: dict """ x_real_ip = self.request.headers.get("X-Real-IP") return { "at": core4.util.node.mongo_now(), "hostname": x_real_ip or self.request.remote_ip, "username": self.current_user } async def get_queue_count(self): """ Retrieves aggregated information about ``sys.queue`` state. This is * ``n`` - the number of jobs in the given state * ``state`` - job state * ``flags`` - job flags ``zombie``, ``wall``, ``removed`` and ``killed`` :return: dict """ cur = self.collection("queue").aggregate(self.pipeline_queue_count()) ret = {} async for doc in cur: ret[doc["state"]] = doc["n"] return ret async def make_stat(self, event, _id): """ Collects current job state counts from ``sys.queue`` and inserts a record into ``sys.event``. See also :meth:`.CoreQueue.make_stat`. :param event: to log :param _id: job _id """ self.trigger(name=event, channel=core4.const.QUEUE_CHANNEL, data={ "_id": _id, "queue": await self.get_queue_count() })
def execute(self, child=None): q = CoreQueue() for doc in find_job(name=re.compile("^meetup.example.job.SwarmJob")): if random.random() < 0.2: q.kill_job(doc["_id"])