def run(self, params): redis_key_started = Queue.redis_key_started() stats = {"fetched": 0, "requeued": 0} # Fetch all the jobs started more than a minute ago - they should not # be in redis:started anymore job_ids = connections.redis.zrangebyscore(redis_key_started, "-inf", time.time() - params.get("timeout", 60)) # TODO this should be wrapped inside Queue or Worker # we shouldn't access these internals here queue_obj = Queue("default") unserialized_job_ids = queue_obj.unserialize_job_ids(job_ids) for i, job_id in enumerate(job_ids): queue = Job(unserialized_job_ids[i], start=False, fetch=False).fetch(full_data=True).data["queue"] queue_obj = Queue(queue) stats["fetched"] += 1 log.info("Requeueing %s on %s" % (unserialized_job_ids[i], queue)) # TODO LUA script & don't rpush if not in zset anymore. with connections.redis.pipeline(transaction=True) as pipeline: pipeline.zrem(redis_key_started, job_id) pipeline.rpush(queue_obj.redis_key, job_id) pipeline.execute() stats["requeued"] += 1 return stats
def test_known_queues_lifecycle(worker): worker.start(queues="default_reverse xtest test_timed_set", flags="--config tests/fixtures/config-raw1.py") time.sleep(1) # Test known queues from mrq.queue import Queue, send_task assert set(Queue.redis_known_queues().keys()) == set(["default", "xtest", "test_timed_set"]) # Try queueing a task send_task("tests.tasks.general.Add", {"a": 41, "b": 1, "sleep": 1}, queue="x") time.sleep(1) assert set(Queue.redis_known_queues().keys()) == set(["x", "default", "xtest", "test_timed_set"]) Queue("x").add_to_known_queues(timestamp=time.time() - (8 * 86400)) worker.send_task("mrq.basetasks.cleaning.CleanKnownQueues", {}, block=True) # Not removed - not empty yet. assert set(Queue.redis_known_queues().keys()) == set(["x", "default", "xtest", "test_timed_set"]) Queue("x").empty() # Still not removed. assert set(Queue.redis_known_queues().keys()) == set(["x", "default", "xtest", "test_timed_set"]) worker.send_task("mrq.basetasks.cleaning.CleanKnownQueues", {}, block=True) # Now we're good assert set(Queue.redis_known_queues().keys()) == set(["default", "xtest", "test_timed_set"])
def test_interrupt_redis_started_jobs(worker): worker.start( queues="xxx", flags=" --config tests/fixtures/config-lostjobs.py") worker.send_task("tests.tasks.general.Add", { "a": 41, "b": 1, "sleep": 10}, block=False, queue="xxx") worker.send_task("tests.tasks.general.Add", { "a": 41, "b": 1, "sleep": 10}, block=False, queue="xxx") time.sleep(3) worker.stop(deps=False) assert Queue("xxx").size() == 0 assert connections.redis.zcard(Queue.redis_key_started()) == 2 worker.start(queues="default", start_deps=False, flush=False) assert connections.redis.zcard(Queue.redis_key_started()) == 2 res = worker.send_task("mrq.basetasks.cleaning.RequeueRedisStartedJobs", { "timeout": 0 }, block=True, queue="default") assert res["fetched"] == 2 assert res["requeued"] == 2 assert Queue("xxx").size() == 2 assert Queue("default").size() == 0 assert connections.redis.zcard(Queue.redis_key_started()) == 0
def run(self, params): # If there are more than this much items on the queue, we don't try to check if our mongodb # jobs are still queued. max_queue_items = params.get("max_queue_items", 1000) stats = {"fetched": 0, "requeued": 0} # This was only checking in Redis and wasn't resistant to a redis-wide flush. # Doing Queue.all() is slower but covers more edge cases. # all_queues = Queue.all_known() all_queues = Queue.all() log.info("Checking %s queues" % len(all_queues)) for queue_name in all_queues: queue = Queue(queue_name) queue_size = queue.size() # If the queue is raw, the jobs were only stored in redis so they are lost for good. if queue.is_raw: continue log.info("Checking queue %s" % queue_name) if queue_size > max_queue_items: log.info("Stopping because queue %s has %s items" % (queue_name, queue_size)) continue queue_jobs_ids = set(queue.list_job_ids(limit=max_queue_items + 1)) if len(queue_jobs_ids) >= max_queue_items: log.info( "Stopping because queue %s actually had more than %s items" % (queue_name, len(queue_jobs_ids)) ) continue for job_data in connections.mongodb_jobs.mrq_jobs.find( {"queue": queue_name, "status": "queued"}, projection={"_id": 1} ).sort([["_id", 1]]): stats["fetched"] += 1 if str(job_data["_id"]) in queue_jobs_ids: log.info("Found job %s on queue %s. Stopping" % (job_data["_id"], queue.id)) break # At this point, this job is not on the queue and we're sure # the queue is less than max_queue_items # We can safely requeue the job. log.info("Requeueing %s on %s" % (job_data["_id"], queue.id)) stats["requeued"] += 1 job = Job(job_data["_id"]) job.requeue(queue=queue_name) return stats
def send_raw_tasks(self, queue, params_list, start=True, block=True): if not self.started and start: self.start() send_raw_tasks(queue, params_list) if block: # Wait for the queue to be empty. Might be error-prone when tasks are in-memory between the 2 q = Queue(queue) while q.size() > 0 or self.mongodb_jobs.mrq_jobs.find({"status": "started"}).count() > 0: # print "S", q.size(), self.mongodb_jobs.mrq_jobs.find({"status": "started"}).count() time.sleep(0.1)
def run(self, params): max_age = int(params.get("max_age") or (7 * 86400)) known_queues = Queue.redis_known_queues() # Only clean queues older than N days time_threshold = time.time() - max_age for queue, time_last_used in known_queues.iteritems(): if time_last_used < time_threshold: q = Queue(queue, add_to_known_queues=False) if q.size() == 0: q.remove_from_known_queues()
def run(self, params): self.collection = connections.mongodb_jobs.mrq_jobs redis_key_started = Queue.redis_key_started() stats = { "fetched": 0, "requeued": 0 } # Fetch all the jobs started more than a minute ago - they should not be in redis:started anymore job_ids = connections.redis.zrangebyscore(redis_key_started, "-inf", time.time() - params.get("timeout", 60)) for job_id in job_ids: queue = Job(job_id, start=False, fetch=False).fetch(full_data=True).data["queue"] stats["fetched"] += 1 log.info("Requeueing %s on %s" % (job_id, queue)) # TODO LUA script & don't rpush if not in zset anymore. with connections.redis.pipeline(transaction=True) as pipeline: pipeline.zrem(redis_key_started, job_id) pipeline.rpush(Queue(queue).redis_key, job_id) pipeline.execute() stats["requeued"] += 1 return stats
def run(self, params): self.collection = connections.mongodb_jobs.mrq_jobs # If there are more than this much items on the queue, we don't try to check if our mongodb # jobs are still queued. max_queue_items = params.get("max_queue_items", 1000) stats = { "fetched": 0, "requeued": 0 } for job_data in self.collection.find({ "status": "queued" }, fields={"_id": 1, "queue": 1}).sort([("_id", 1)]): stats["fetched"] += 1 queue = Queue(job_data["queue"]) queue_size = queue.size() if queue_size > max_queue_items: log.info("Stopping because queue %s has %s items" % (queue, queue_size)) break queue_jobs_ids = set(queue.list_job_ids(limit=max_queue_items + 1)) if len(queue_jobs_ids) >= max_queue_items: log.info("Stopping because queue %s actually had more than %s items" % (queue, len(queue_jobs_ids))) break if str(job_data["_id"]) in queue_jobs_ids: log.info("Stopping because we found job %s in redis" % job_data["_id"]) break # At this point, this job is not on the queue and we're sure the queue is less than max_queue_items # We can safely requeue the job. log.info("Requeueing %s on %s" % (job_data["_id"], queue.id)) stats["requeued"] += 1 job = Job(job_data["_id"]) job.requeue(queue=job_data["queue"]) return stats
def run(self, params): max_age = int(params.get("max_age") or (7 * 86400)) pretend = bool(params.get("pretend")) check_mongo = bool(params.get("check_mongo")) known_queues = Queue.redis_known_queues() removed_queues = [] queues_from_config = Queue.all_known_from_config() print "Found %s known queues & %s from config" % (len(known_queues), len(queues_from_config)) # Only clean queues older than N days time_threshold = time.time() - max_age for queue, time_last_used in known_queues.iteritems(): if queue in queues_from_config: continue if time_last_used < time_threshold: q = Queue(queue, add_to_known_queues=False) size = q.size() if check_mongo: size += connections.mongodb_jobs.mrq_jobs.count({"queue": queue}) if size == 0: removed_queues.append(queue) print "Removing empty queue '%s' from known queues ..." % queue if not pretend: q.remove_from_known_queues() print "Cleaned %s queues" % len(removed_queues) return removed_queues
def test_general_simple_task_reverse(worker): worker.start(queues="default_reverse xtest test_timed_set", flags="--config tests/fixtures/config-raw1.py") result = worker.send_tasks("tests.tasks.general.Add", [ {"a": 41, "b": 1, "sleep": 1}, {"a": 41, "b": 1, "sleep": 1}, {"a": 40, "b": 1, "sleep": 1} ]) assert result == [42, 42, 41] assert [x["result"] for x in worker.mongodb_jobs.mrq_jobs.find().sort( [["dateupdated", 1]])] == [41, 42, 42] # Test known queues from mrq.queue import Queue, send_task assert Queue.redis_known_queues() == set(["default", "xtest", "test_timed_set"]) # Try queueing a task send_task("tests.tasks.general.Add", {"a": 41, "b": 1, "sleep": 1}, queue="x") time.sleep(1) assert Queue.redis_known_queues() == set(["x", "default", "xtest", "test_timed_set"])
def run(self, params): return list(Queue.all_known())
def api_datatables(unit): # import time # time.sleep(5) collection = None sort = None skip = int(request.args.get("iDisplayStart", 0)) limit = int(request.args.get("iDisplayLength", 20)) if unit == "queues": queues = [] for name, jobs in Queue.all_known().items(): queue = Queue(name) q = { "name": name, "jobs": jobs, # MongoDB size "size": queue.size(), # Redis size "is_sorted": queue.is_sorted, "is_timed": queue.is_timed, "is_raw": queue.is_raw, "is_set": queue.is_set } if queue.is_sorted: raw_config = cfg.get("raw_queues", {}).get(name, {}) q["graph_config"] = raw_config.get("dashboard_graph", lambda: { "start": time.time() - (7 * 24 * 3600), "stop": time.time() + (7 * 24 * 3600), "slices": 30 } if queue.is_timed else { "start": 0, "stop": 100, "slices": 30 })() if q["graph_config"]: q["graph"] = queue.get_sorted_graph(**q["graph_config"]) if queue.is_timed: q["jobs_to_dequeue"] = queue.count_jobs_to_dequeue() queues.append(q) queues.sort(key=lambda x: -(x["jobs"] + x["size"])) data = { "aaData": queues, "iTotalDisplayRecords": len(queues) } elif unit == "workers": fields = None query = {"status": {"$nin": ["stop"]}} collection = connections.mongodb_jobs.mrq_workers sort = [("datestarted", -1)] if request.args.get("showstopped"): query = {} elif unit == "scheduled_jobs": collection = connections.mongodb_jobs.mrq_scheduled_jobs fields = None query = {} elif unit == "jobs": fields = None query = build_api_datatables_query(request) sort = [("_id", 1)] # We can't search easily params because we store it as decoded JSON in mongo :( # Add a string index? # if request.args.get("sSearch"): # query.update(json.loads(request.args.get("sSearch"))) collection = connections.mongodb_jobs.mrq_jobs if collection is not None: cursor = collection.find(query, projection=fields) if sort: cursor.sort(sort) if skip is not None: cursor.skip(skip) if limit is not None: cursor.limit(limit) data = { "aaData": list(cursor), "iTotalDisplayRecords": collection.find(query).count() } data["sEcho"] = request.args["sEcho"] return jsonify(data)
def test_interrupt_worker_double_sigint(worker, p_flags): """ Test what happens when we interrupt a running worker with 2 SIGINTs. """ start_time = time.time() worker.start(flags=p_flags) job_id = worker.send_task("tests.tasks.general.Add", { "a": 41, "b": 1, "sleep": 20 }, block=False) while Job(job_id).fetch().data["status"] == "queued": time.sleep(0.1) job = Job(job_id).fetch().data assert job["status"] == "started" # Stop the worker gracefully. first job should still finish! os.kill(worker.process.pid, 2) time.sleep(1) # Should not be accepting new jobs! job_id2 = worker.send_task("tests.tasks.general.Add", { "a": 42, "b": 1, "sleep": 20 }, block=False, start=False) time.sleep(1) job2 = Job(job_id2).fetch().data assert job2.get("status") == "queued" job = Job(job_id).fetch().data assert job["status"] == "started" # Sending a second kill -2 should make it stop os.kill(worker.process.pid, 2) while Job(job_id).fetch().data["status"] == "started": time.sleep(0.1) job = Job(job_id).fetch().data assert job["status"] == "interrupt" assert time.time() - start_time < 15 # Then try the cleaning task that requeues interrupted jobs assert Queue("default").size() == 1 worker.start(queues="cleaning", deps=False, flush=False) res = worker.send_task("mrq.basetasks.cleaning.RequeueInterruptedJobs", {}, block=True, queue="cleaning") assert res["requeued"] == 1 assert Queue("default").size() == 2 Queue("default").list_job_ids() == [str(job_id2), str(job_id)] job = Job(job_id).fetch().data assert job["status"] == "queued" assert job["queue"] == "default"
def test_interrupt_worker_sigkill(worker, p_flags): """ Test what happens when we interrupt a running worker with 1 SIGKILL. SIGKILLs can't be intercepted by the process so the job should still be in 'started' state. """ start_time = time.time() worker.start(flags=p_flags + " --config tests/fixtures/config-shorttimeout.py") cfg = json.loads( worker.send_task("tests.tasks.general.GetConfig", {}, block=True)) assert cfg["tasks"]["tests.tasks.general.Add"]["timeout"] == 200 job_id = worker.send_task("tests.tasks.general.Add", { "a": 41, "b": 1, "sleep": 20 }, block=False) time.sleep(3) worker.stop(block=True, sig=9, deps=False) time.sleep(1) # This is a bit tricky, but when getting the job from the current python environment, its timeout should # be the default 3600 and not 200 because we didn't configure ourselves # with config-shorttimeout.py job = Job(job_id).fetch().data assert Job(job_id).fetch().timeout == 3600 assert job["status"] == "started" assert time.time() - start_time < 10 # Then try the cleaning task that requeues started jobs # We need to fake the datestarted worker.mongodb_jobs.mrq_jobs.update({"_id": ObjectId(job_id)}, { "$set": { "datestarted": datetime.datetime.utcnow() - datetime.timedelta(seconds=300) } }) assert Queue("default").size() == 0 worker.start(queues="cleaning", deps=False, flush=False, flags=" --config tests/fixtures/config-shorttimeout.py") res = worker.send_task("mrq.basetasks.cleaning.RequeueStartedJobs", {"timeout": 110}, block=True, queue="cleaning") assert res["requeued"] == 0 assert res["started"] == 2 # current job should count too assert Queue("default").size() == 0 job = Job(job_id).fetch().data assert job["status"] == "started" assert job["queue"] == "default" # Now do it again with a small enough timeout res = worker.send_task("mrq.basetasks.cleaning.RequeueStartedJobs", {"timeout": 90}, block=True, queue="cleaning") assert res["requeued"] == 1 assert res["started"] == 2 # current job should count too assert Queue("default").size() == 1 Queue("default").list_job_ids() == [str(job_id)] job = Job(job_id).fetch().data assert job["status"] == "queued" assert job["queue"] == "default"
def api_datatables(unit): # import time # time.sleep(5) collection = None sort = None skip = int(request.args.get("iDisplayStart", 0)) limit = int(request.args.get("iDisplayLength", 20)) if unit == "queues": queues = [] for name in Queue.all_known(): queue = Queue(name) q = { "name": name, "size": queue.size(), # Redis size "is_sorted": queue.is_sorted, "is_timed": queue.is_timed, "is_raw": queue.is_raw, "is_set": queue.is_set } if queue.is_sorted: raw_config = queue.get_config() q["graph_config"] = raw_config.get("dashboard_graph", lambda: { "start": time.time() - (7 * 24 * 3600), "stop": time.time() + (7 * 24 * 3600), "slices": 30 } if queue.is_timed else { "start": 0, "stop": 100, "slices": 30 })() if q["graph_config"]: q["graph"] = queue.get_sorted_graph(**q["graph_config"]) if queue.is_timed: q["jobs_to_dequeue"] = queue.count_jobs_to_dequeue() queues.append(q) queues.sort(key=lambda x: -x["size"]) data = { "aaData": queues, "iTotalDisplayRecords": len(queues) } elif unit == "workers": fields = None collection = connections.mongodb_jobs.mrq_workers sort = [("datestarted", -1)] query = {} if request.args.get("id"): query["_id"] = ObjectId(request.args["id"]) else: if request.args.get("status"): statuses = request.args["status"].split("-") query["status"] = {"$in": statuses} if request.args.get("ip"): query["$or"] = [{"config.local_ip": request.args["ip"]}, {"config.external_ip": request.args["ip"]}] if request.args.get("queue"): query["config.queues"] = request.args["queue"] elif unit == "agents": fields = None query = {"status": {"$nin": ["stop"]}} collection = connections.mongodb_jobs.mrq_agents sort = [("datestarted", -1)] if request.args.get("showstopped"): query = {} elif unit == "scheduled_jobs": collection = connections.mongodb_jobs.mrq_scheduled_jobs fields = None query = {} elif unit == "jobs": fields = None query = build_api_datatables_query(request) sort = None # TODO [("_id", 1)] # We can't search easily params because we store it as decoded JSON in mongo :( # Add a string index? # if request.args.get("sSearch"): # query.update(json.loads(request.args.get("sSearch"))) collection = connections.mongodb_jobs.mrq_jobs if collection is not None: cursor = collection.find(query, projection=fields) if sort: cursor.sort(sort) if skip is not None: cursor.skip(skip) if limit is not None: cursor.limit(limit) data = { "aaData": list(cursor), "iTotalDisplayRecords": collection.find(query).count() } data["sEcho"] = request.args["sEcho"] return jsonify(data)
def perform_action(self, action, query, destination_queue): stats = {"requeued": 0, "cancelled": 0} if action == "cancel": default_job_timeout = get_current_config()["default_job_timeout"] # Finding the ttl here to expire is a bit hard because we may have mixed paths # and hence mixed ttls. # If we are cancelling by path, get this ttl if query.get("path"): result_ttl = get_task_cfg(query["path"]).get( "result_ttl", default_job_timeout) # If not, get the maxmimum ttl of all tasks. else: tasks_defs = get_current_config().get("tasks", {}) tasks_ttls = [ cfg.get("result_ttl", 0) for cfg in tasks_defs.values() ] result_ttl = max([default_job_timeout] + tasks_ttls) now = datetime.datetime.utcnow() ret = self.collection.update(query, { "$set": { "status": "cancel", "dateexpires": now + datetime.timedelta(seconds=result_ttl), "dateupdated": now } }, multi=True) stats["cancelled"] = ret["n"] # Special case when emptying just by queue name: empty it directly! # In this case we could also loose some jobs that were queued after # the MongoDB update. They will be "lost" and requeued later like the other case # after the Redis BLPOP if query.keys() == ["queue"]: Queue(query["queue"]).empty() elif action in ("requeue", "requeue_retry"): # Requeue task by groups of maximum 1k items (if all in the same # queue) cursor = self.collection.find(query, projection=["_id", "queue"]) # We must freeze the list because queries below would change it. # This could not fit in memory, research adding {"stats": {"$ne": # "queued"}} in the query fetched_jobs = list(cursor) for jobs in group_iter(fetched_jobs, n=1000): jobs_by_queue = defaultdict(list) for job in jobs: jobs_by_queue[job["queue"]].append(job["_id"]) stats["requeued"] += 1 for queue in jobs_by_queue: updates = { "status": "queued", "dateupdated": datetime.datetime.utcnow() } if destination_queue is not None: updates["queue"] = destination_queue if action == "requeue": updates["retry_count"] = 0 self.collection.update( {"_id": { "$in": jobs_by_queue[queue] }}, {"$set": updates}, multi=True) # Between these two lines, jobs can become "lost" too. Queue(destination_queue or queue).enqueue_job_ids( [str(x) for x in jobs_by_queue[queue]]) print stats return stats
def run(self, params): # If there are more than this much items on the queue, we don't try to check if our mongodb # jobs are still queued. max_queue_items = params.get("max_queue_items", 1000) stats = {"fetched": 0, "requeued": 0} # This was only checking in Redis and wasn't resistant to a redis-wide flush. # Doing Queue.all() is slower but covers more edge cases. # all_queues = Queue.all_known() all_queues = Queue.all() log.info("Checking %s queues" % len(all_queues)) for queue_name in all_queues: queue = Queue(queue_name) queue_size = queue.size() # If the queue is raw, the jobs were only stored in redis so they are lost for good. if queue.is_raw: continue log.info("Checking queue %s" % queue_name) if queue_size > max_queue_items: log.info("Stopping because queue %s has %s items" % (queue_name, queue_size)) continue queue_jobs_ids = set(queue.list_job_ids(limit=max_queue_items + 1)) if len(queue_jobs_ids) >= max_queue_items: log.info( "Stopping because queue %s actually had more than %s items" % (queue_name, len(queue_jobs_ids))) continue for job_data in connections.mongodb_jobs.mrq_jobs.find( { "queue": queue_name, "status": "queued" }, projection={ "_id": 1 }).sort([["_id", 1]]): stats["fetched"] += 1 if str(job_data["_id"]) in queue_jobs_ids: log.info("Found job %s on queue %s. Stopping" % (job_data["_id"], queue.id)) break # At this point, this job is not on the queue and we're sure # the queue is less than max_queue_items # We can safely requeue the job. log.info("Requeueing %s on %s" % (job_data["_id"], queue.id)) stats["requeued"] += 1 job = Job(job_data["_id"]) job.requeue(queue=queue_name) return stats
def run(self, params): key = "%s:known_queues" % get_current_config()["redis_prefix"] for queue in connections.redis.smembers(key): Queue(queue).add_to_known_queues()
def test_performance_queue_cancel_requeue(worker): worker.start(trace=False) n_tasks = 10000 start_time = time.time() worker.send_tasks("tests.tasks.general.Add", [{ "a": i, "b": 0, "sleep": 0 } for i in range(n_tasks)], queue="noexec", block=False) queue_time = time.time() - start_time print "Queued %s tasks in %s seconds (%s/s)" % ( n_tasks, queue_time, float(n_tasks) / queue_time) assert queue_time < 2 assert Queue("noexec").size() == n_tasks assert worker.mongodb_jobs.mrq_jobs.count() == n_tasks assert worker.mongodb_jobs.mrq_jobs.find({ "status": "queued" }).count() == n_tasks # Then cancel them all start_time = time.time() res = worker.send_task("mrq.basetasks.utils.JobAction", { "queue": "noexec", "action": "cancel" }, block=True) assert res["cancelled"] == n_tasks queue_time = time.time() - start_time print "Cancelled %s tasks in %s seconds (%s/s)" % ( n_tasks, queue_time, float(n_tasks) / queue_time) assert queue_time < 5 assert worker.mongodb_jobs.mrq_jobs.find({ "status": "cancel" }).count() == n_tasks # Special case because we cancelled by queue: they should have been # removed from redis. assert Queue("noexec").size() == 0 # Then requeue them all start_time = time.time() res = worker.send_task("mrq.basetasks.utils.JobAction", { "queue": "noexec", "action": "requeue" }, block=True) queue_time = time.time() - start_time print "Requeued %s tasks in %s seconds (%s/s)" % ( n_tasks, queue_time, float(n_tasks) / queue_time) assert queue_time < 2 assert worker.mongodb_jobs.mrq_jobs.find({ "status": "queued" }).count() == n_tasks # They should be back in the queue assert Queue("noexec").size() == n_tasks assert res["requeued"] == n_tasks
def test_interrupt_redis_flush(worker): """ Test what happens when we flush redis after queueing jobs. The RequeueLostJobs task should put them back in redis. """ worker.start(queues="cleaning", deps=True, flush=True) job_id1 = worker.send_task("tests.tasks.general.Add", { "a": 41, "b": 1, "sleep": 10 }, block=False, queue="default") job_id2 = worker.send_task("tests.tasks.general.Add", { "a": 41, "b": 1, "sleep": 10 }, block=False, queue="default") job_id3 = worker.send_task("tests.tasks.general.Add", { "a": 41, "b": 1, "sleep": 10 }, block=False, queue="otherq") assert Queue("default").size() == 2 assert Queue("otherq").size() == 1 res = worker.send_task("mrq.basetasks.cleaning.RequeueLostJobs", {}, block=True, queue="cleaning") # We should try the first job on each queue only, and when seeing it's there we should # stop. assert res["fetched"] == 2 assert res["requeued"] == 0 assert Queue("default").size() == 2 assert Queue("otherq").size() == 1 # Then flush redis! worker.fixture_redis.flush() # Assert the queues are empty. assert Queue("default").size() == 0 assert Queue("otherq").size() == 0 res = worker.send_task("mrq.basetasks.cleaning.RequeueLostJobs", {}, block=True, queue="cleaning") assert res["fetched"] == 3 assert res["requeued"] == 3 assert Queue("default").size() == 2 assert Queue("otherq").size() == 1 assert Queue("default").list_job_ids() == [str(job_id1), str(job_id2)] assert Queue("otherq").list_job_ids() == [str(job_id3)]
def perform_action(self, action, query, destination_queue): stats = {"requeued": 0, "cancelled": 0} if action == "cancel": default_job_timeout = get_current_config()["default_job_timeout"] # Finding the ttl here to expire is a bit hard because we may have mixed paths # and hence mixed ttls. # If we are cancelling by path, get this ttl if query.get("path"): result_ttl = get_task_cfg(query["path"]).get( "result_ttl", default_job_timeout) # If not, get the maxmimum ttl of all tasks. else: tasks_defs = get_current_config().get("tasks", {}) tasks_ttls = [ cfg.get("result_ttl", 0) for cfg in itervalues(tasks_defs) ] result_ttl = max([default_job_timeout] + tasks_ttls) now = datetime.datetime.utcnow() size_by_queues = defaultdict(int) if "queue" not in query: for job in self.collection.find(query, projection={"queue": 1}): size_by_queues[job["queue"]] += 1 ret = self.collection.update(query, { "$set": { "status": "cancel", "dateexpires": now + datetime.timedelta(seconds=result_ttl), "dateupdated": now } }, multi=True) stats["cancelled"] = ret["n"] if "queue" in query: if isinstance(query["queue"], str): size_by_queues[query["queue"]] = ret["n"] set_queues_size(size_by_queues, action="decr") # Special case when emptying just by queue name: empty it directly! # In this case we could also loose some jobs that were queued after # the MongoDB update. They will be "lost" and requeued later like the other case # after the Redis BLPOP if list(query.keys()) == ["queue"] and isinstance( query["queue"], basestring): Queue(query["queue"]).empty() elif action in ("requeue", "requeue_retry"): # Requeue task by groups of maximum 1k items (if all in the same # queue) status_query = query.get("status") if not status_query: query["status"] = {"$ne": "queued"} cursor = self.collection.find(query, projection=["_id", "queue"]) for jobs in group_iter(cursor, n=1000): jobs_by_queue = defaultdict(list) for job in jobs: jobs_by_queue[job["queue"]].append(job["_id"]) stats["requeued"] += 1 for queue in jobs_by_queue: updates = { "status": "queued", "datequeued": datetime.datetime.utcnow(), "dateupdated": datetime.datetime.utcnow() } if destination_queue is not None: updates["queue"] = destination_queue if action == "requeue": updates["retry_count"] = 0 self.collection.update( {"_id": { "$in": jobs_by_queue[queue] }}, {"$set": updates}, multi=True) set_queues_size({ queue: len(jobs) for queue, jobs in jobs_by_queue.items() }) return stats
def test_cancel_by_path(worker, p_query): expected_action_jobs = p_query[1] # Start the worker with only one greenlet so that tasks execute # sequentially worker.start(flags="--greenlets 1", queues="default q1 q2") job_ids = [] job_ids.append( worker.send_task("tests.tasks.general.Add", { "a": 41, "b": 1, "sleep": 2 }, queue="default", block=False)) params = {"action": "cancel", "status": "queued"} params.update(p_query[0]) requeue_job = worker.send_task("mrq.basetasks.utils.JobAction", params, block=False) job_ids.append( worker.send_task("tests.tasks.general.MongoInsert", {"a": 42}, queue="q1", block=False)) job_ids.append( worker.send_task("tests.tasks.general.MongoInsert", {"a": 42}, queue="q2", block=False)) job_ids.append( worker.send_task("tests.tasks.general.MongoInsert", {"a": 43}, queue="q2", block=False)) job_ids.append( worker.send_task("tests.tasks.general.MongoInsert2", {"a": 44}, queue="q1", block=False)) Job(job_ids[-1]).wait(poll_interval=0.01) # Leave some time to unqueue job_id4 without executing. time.sleep(1) worker.stop(deps=False) jobs = [Job(job_id).fetch().data for job_id in job_ids] assert jobs[0]["status"] == "success" assert jobs[0]["result"] == 42 assert Job(requeue_job).fetch( ).data["result"]["cancelled"] == expected_action_jobs # Check that the right number of jobs ran. assert worker.mongodb_logs.tests_inserts.count( ) == len(job_ids) - 1 - expected_action_jobs action_jobs = list(worker.mongodb_jobs.mrq_jobs.find({"status": "cancel"})) assert len(action_jobs) == expected_action_jobs assert set([x.get("result") for x in action_jobs]) == set([None]) assert Queue("default").size() == 0 assert Queue("q1").size() == 0 assert Queue("q2").size() == 0 worker.mongodb_logs.tests_inserts.remove({}) # Then requeue the same jobs params = {"action": "requeue"} params.update(p_query[0]) worker.start(flags="--gevent 1", queues="default", flush=False) ret = worker.send_task("mrq.basetasks.utils.JobAction", params, block=True) assert ret["requeued"] == expected_action_jobs worker.stop(deps=False) assert worker.mongodb_jobs.mrq_jobs.find({ "status": "queued" }).count() == expected_action_jobs assert Queue("default").size() + Queue("q1").size() + \ Queue("q2").size() == expected_action_jobs
def api_datatables(unit): collection = None sort = None skip = int(request.args.get("iDisplayStart", 0)) limit = int(request.args.get("iDisplayLength", 20)) with_mongodb_size = bool(request.args.get("with_mongodb_size")) if unit == "queues": queues = [] for name in Queue.all_known(): queue = Queue(name) jobs = None if with_mongodb_size: jobs = connections.mongodb_jobs.mrq_jobs.count({ "queue": name, "status": request.args.get("status") or "queued" }) q = { "name": name, "jobs": jobs, # MongoDB size "size": queue.size(), # Redis size "is_sorted": queue.is_sorted, "is_timed": queue.is_timed, "is_raw": queue.is_raw, "is_set": queue.is_set } if queue.is_sorted: raw_config = cfg.get("raw_queues", {}).get(name, {}) q["graph_config"] = raw_config.get( "dashboard_graph", lambda: { "start": time.time() - (7 * 24 * 3600), "stop": time.time() + (7 * 24 * 3600), "slices": 30 } if queue.is_timed else { "start": 0, "stop": 100, "slices": 30 })() if q["graph_config"]: q["graph"] = queue.get_sorted_graph(**q["graph_config"]) if queue.is_timed: q["jobs_to_dequeue"] = queue.count_jobs_to_dequeue() queues.append(q) sSortField, sSortDirection = _get_sort_args(request, 'size', 'desc') queues.sort(key=lambda x: x.get(sSortField, 0), reverse=sSortDirection == 'desc') data = {"aaData": queues, "iTotalDisplayRecords": len(queues)} elif unit == "workers": fields = None query = {"status": {"$nin": ["stop"]}} collection = connections.mongodb_jobs.mrq_workers sSortField, sSortDirection = _get_sort_args(request, 'datestarted', 'desc') sort = [(sSortField, -1 if sSortDirection == 'desc' else 1)] if request.args.get("showstopped"): query = {} elif unit == "scheduled_jobs": collection = connections.mongodb_jobs.mrq_scheduled_jobs fields = None query = {} sSortField, sSortDirection = _get_sort_args(request, 'interval', 'desc') sort = [(sSortField, -1 if sSortDirection == 'desc' else 1)] elif unit == "jobs": fields = None query = build_api_datatables_query(request) sSortField, sSortDirection = _get_sort_args(request) sort = [(sSortField, -1 if sSortDirection == 'desc' else 1)] time_s = request.args.get("time_s", '') time_e = request.args.get("time_e", '') if time_s and not time_e: print 'datestarted', time_s query.update({'datestarted': {'$gte': str2datetime(time_s)}}) elif time_e and not time_s: print 'datestarted', time_e query.update({'datestarted': {'$lte': str2datetime(time_e)}}) elif time_s and time_e: print 'datestarted', time_s, time_e query.update({ 'datestarted': { '$gte': str2datetime(time_s), '$lte': str2datetime(time_e) } }) # We can't search easily params because we store it as decoded JSON in mongo :( # Add a string index? # if request.args.get("sSearch"): # query.update(json.loads(request.args.get("sSearch"))) collection = connections.mongodb_jobs.mrq_jobs if collection is not None: cursor = collection.find(query, projection=fields) if sort: cursor.sort(sort) if skip is not None: cursor.skip(skip) if limit is not None: cursor.limit(limit) data = { "aaData": list(cursor), "iTotalDisplayRecords": collection.find(query).count() } data["sEcho"] = request.args["sEcho"] return jsonify(data)
def test_raw_sorted(worker, p_queue, p_pushback, p_timed, p_flags): worker.start(flags="%s --config tests/fixtures/config-raw1.py" % p_flags, queues=p_queue) test_collection = worker.mongodb_logs.tests_inserts jobs_collection = worker.mongodb_jobs.mrq_jobs current_time = int(time.time()) assert jobs_collection.count() == 0 assert Queue(p_queue).size() == 0 # Schedule one in the past, one in the future worker.send_raw_tasks(p_queue, { "aaa": current_time - 10, "bbb": current_time + 5, "ccc": current_time + 10 }, block=False) # Re-schedule worker.send_raw_tasks(p_queue, { "ccc": current_time + 6 }, block=False) time.sleep(3) if not p_timed: assert Queue(p_queue).size() == 0 assert test_collection.count() == 3 assert list(test_collection.find(projection={"params": 1, "_id": 0}).limit(1)) == [ {"params": {"sorted_set": "aaa"}} ] return if p_pushback: assert Queue(p_queue).size() == 3 assert set(Queue(p_queue).list_raw_jobs()) == set([b"bbb", b"ccc", b"aaa"]) else: assert Queue(p_queue).size() == 2 assert set(Queue(p_queue).list_raw_jobs()) == set([b"bbb", b"ccc"]) # The second one should not yet even exist in mrq_jobs assert jobs_collection.count() == 1 assert list(jobs_collection.find())[0]["status"] == "success" assert list(test_collection.find(projection={"params": 1, "_id": 0})) == [ {"params": {"timed_set": "aaa"}} ] # Then wait for the second job to be done time.sleep(5) if p_pushback: assert Queue(p_queue).size() == 3 else: assert Queue(p_queue).size() == 0 assert jobs_collection.count() == 3 assert list(jobs_collection.find())[1]["status"] == "success" assert list(jobs_collection.find())[2]["status"] == "success" assert list(jobs_collection.find())[2]["worker"] assert test_collection.count() == 3