Exemple #1
0
    def run(self, params):

        redis_key_started = Queue.redis_key_started()

        stats = {"fetched": 0, "requeued": 0}

        # Fetch all the jobs started more than a minute ago - they should not
        # be in redis:started anymore
        job_ids = connections.redis.zrangebyscore(redis_key_started, "-inf", time.time() - params.get("timeout", 60))

        # TODO this should be wrapped inside Queue or Worker
        # we shouldn't access these internals here
        queue_obj = Queue("default")
        unserialized_job_ids = queue_obj.unserialize_job_ids(job_ids)

        for i, job_id in enumerate(job_ids):

            queue = Job(unserialized_job_ids[i], start=False, fetch=False).fetch(full_data=True).data["queue"]

            queue_obj = Queue(queue)

            stats["fetched"] += 1

            log.info("Requeueing %s on %s" % (unserialized_job_ids[i], queue))

            # TODO LUA script & don't rpush if not in zset anymore.
            with connections.redis.pipeline(transaction=True) as pipeline:
                pipeline.zrem(redis_key_started, job_id)
                pipeline.rpush(queue_obj.redis_key, job_id)
                pipeline.execute()

            stats["requeued"] += 1

        return stats
Exemple #2
0
def test_known_queues_lifecycle(worker):

    worker.start(queues="default_reverse xtest test_timed_set", flags="--config tests/fixtures/config-raw1.py")
    time.sleep(1)
    # Test known queues
    from mrq.queue import Queue, send_task
    assert set(Queue.redis_known_queues().keys()) == set(["default", "xtest", "test_timed_set"])

    # Try queueing a task
    send_task("tests.tasks.general.Add", {"a": 41, "b": 1, "sleep": 1}, queue="x")
    time.sleep(1)
    assert set(Queue.redis_known_queues().keys()) == set(["x", "default", "xtest", "test_timed_set"])

    Queue("x").add_to_known_queues(timestamp=time.time() - (8 * 86400))

    worker.send_task("mrq.basetasks.cleaning.CleanKnownQueues", {}, block=True)

    # Not removed - not empty yet.
    assert set(Queue.redis_known_queues().keys()) == set(["x", "default", "xtest", "test_timed_set"])

    Queue("x").empty()

    # Still not removed.
    assert set(Queue.redis_known_queues().keys()) == set(["x", "default", "xtest", "test_timed_set"])

    worker.send_task("mrq.basetasks.cleaning.CleanKnownQueues", {}, block=True)

    # Now we're good
    assert set(Queue.redis_known_queues().keys()) == set(["default", "xtest", "test_timed_set"])
Exemple #3
0
def test_interrupt_redis_started_jobs(worker):

    worker.start(
        queues="xxx", flags=" --config tests/fixtures/config-lostjobs.py")

    worker.send_task("tests.tasks.general.Add", {
                     "a": 41, "b": 1, "sleep": 10}, block=False, queue="xxx")
    worker.send_task("tests.tasks.general.Add", {
                     "a": 41, "b": 1, "sleep": 10}, block=False, queue="xxx")

    time.sleep(3)

    worker.stop(deps=False)

    assert Queue("xxx").size() == 0
    assert connections.redis.zcard(Queue.redis_key_started()) == 2

    worker.start(queues="default", start_deps=False, flush=False)

    assert connections.redis.zcard(Queue.redis_key_started()) == 2

    res = worker.send_task("mrq.basetasks.cleaning.RequeueRedisStartedJobs", {
        "timeout": 0
    }, block=True, queue="default")

    assert res["fetched"] == 2
    assert res["requeued"] == 2

    assert Queue("xxx").size() == 2
    assert Queue("default").size() == 0
    assert connections.redis.zcard(Queue.redis_key_started()) == 0
Exemple #4
0
    def run(self, params):

        # If there are more than this much items on the queue, we don't try to check if our mongodb
        # jobs are still queued.
        max_queue_items = params.get("max_queue_items", 1000)

        stats = {"fetched": 0, "requeued": 0}

        # This was only checking in Redis and wasn't resistant to a redis-wide flush.
        # Doing Queue.all() is slower but covers more edge cases.
        # all_queues = Queue.all_known()

        all_queues = Queue.all()

        log.info("Checking %s queues" % len(all_queues))

        for queue_name in all_queues:

            queue = Queue(queue_name)
            queue_size = queue.size()

            # If the queue is raw, the jobs were only stored in redis so they are lost for good.
            if queue.is_raw:
                continue

            log.info("Checking queue %s" % queue_name)

            if queue_size > max_queue_items:
                log.info("Stopping because queue %s has %s items" % (queue_name, queue_size))
                continue

            queue_jobs_ids = set(queue.list_job_ids(limit=max_queue_items + 1))
            if len(queue_jobs_ids) >= max_queue_items:
                log.info(
                    "Stopping because queue %s actually had more than %s items" % (queue_name, len(queue_jobs_ids))
                )
                continue

            for job_data in connections.mongodb_jobs.mrq_jobs.find(
                {"queue": queue_name, "status": "queued"}, projection={"_id": 1}
            ).sort([["_id", 1]]):

                stats["fetched"] += 1

                if str(job_data["_id"]) in queue_jobs_ids:
                    log.info("Found job %s on queue %s. Stopping" % (job_data["_id"], queue.id))
                    break

                # At this point, this job is not on the queue and we're sure
                # the queue is less than max_queue_items
                # We can safely requeue the job.
                log.info("Requeueing %s on %s" % (job_data["_id"], queue.id))

                stats["requeued"] += 1
                job = Job(job_data["_id"])
                job.requeue(queue=queue_name)

        return stats
Exemple #5
0
  def send_raw_tasks(self, queue, params_list, start=True, block=True):
    if not self.started and start:
      self.start()

    send_raw_tasks(queue, params_list)

    if block:
      # Wait for the queue to be empty. Might be error-prone when tasks are in-memory between the 2
      q = Queue(queue)
      while q.size() > 0 or self.mongodb_jobs.mrq_jobs.find({"status": "started"}).count() > 0:
        # print "S", q.size(), self.mongodb_jobs.mrq_jobs.find({"status": "started"}).count()
        time.sleep(0.1)
Exemple #6
0
    def run(self, params):

        max_age = int(params.get("max_age") or (7 * 86400))

        known_queues = Queue.redis_known_queues()

        # Only clean queues older than N days
        time_threshold = time.time() - max_age
        for queue, time_last_used in known_queues.iteritems():
            if time_last_used < time_threshold:
                q = Queue(queue, add_to_known_queues=False)
                if q.size() == 0:
                    q.remove_from_known_queues()
Exemple #7
0
  def run(self, params):

    self.collection = connections.mongodb_jobs.mrq_jobs

    redis_key_started = Queue.redis_key_started()

    stats = {
      "fetched": 0,
      "requeued": 0
    }

    # Fetch all the jobs started more than a minute ago - they should not be in redis:started anymore
    job_ids = connections.redis.zrangebyscore(redis_key_started, "-inf", time.time() - params.get("timeout", 60))

    for job_id in job_ids:

      queue = Job(job_id, start=False, fetch=False).fetch(full_data=True).data["queue"]

      stats["fetched"] += 1

      log.info("Requeueing %s on %s" % (job_id, queue))

      # TODO LUA script & don't rpush if not in zset anymore.
      with connections.redis.pipeline(transaction=True) as pipeline:
        pipeline.zrem(redis_key_started, job_id)
        pipeline.rpush(Queue(queue).redis_key, job_id)
        pipeline.execute()

      stats["requeued"] += 1

    return stats
Exemple #8
0
  def run(self, params):

    self.collection = connections.mongodb_jobs.mrq_jobs

    # If there are more than this much items on the queue, we don't try to check if our mongodb
    # jobs are still queued.
    max_queue_items = params.get("max_queue_items", 1000)

    stats = {
      "fetched": 0,
      "requeued": 0
    }

    for job_data in self.collection.find({
      "status": "queued"
    }, fields={"_id": 1, "queue": 1}).sort([("_id", 1)]):

      stats["fetched"] += 1

      queue = Queue(job_data["queue"])
      queue_size = queue.size()
      if queue_size > max_queue_items:
        log.info("Stopping because queue %s has %s items" % (queue, queue_size))
        break

      queue_jobs_ids = set(queue.list_job_ids(limit=max_queue_items + 1))
      if len(queue_jobs_ids) >= max_queue_items:
        log.info("Stopping because queue %s actually had more than %s items" % (queue, len(queue_jobs_ids)))
        break

      if str(job_data["_id"]) in queue_jobs_ids:
        log.info("Stopping because we found job %s in redis" % job_data["_id"])
        break

      # At this point, this job is not on the queue and we're sure the queue is less than max_queue_items
      # We can safely requeue the job.
      log.info("Requeueing %s on %s" % (job_data["_id"], queue.id))

      stats["requeued"] += 1
      job = Job(job_data["_id"])
      job.requeue(queue=job_data["queue"])

    return stats
Exemple #9
0
    def run(self, params):

        max_age = int(params.get("max_age") or (7 * 86400))
        pretend = bool(params.get("pretend"))
        check_mongo = bool(params.get("check_mongo"))

        known_queues = Queue.redis_known_queues()

        removed_queues = []

        queues_from_config = Queue.all_known_from_config()

        print "Found %s known queues & %s from config" % (len(known_queues), len(queues_from_config))

        # Only clean queues older than N days
        time_threshold = time.time() - max_age
        for queue, time_last_used in known_queues.iteritems():
            if queue in queues_from_config:
                continue
            if time_last_used < time_threshold:
                q = Queue(queue, add_to_known_queues=False)
                size = q.size()
                if check_mongo:
                    size += connections.mongodb_jobs.mrq_jobs.count({"queue": queue})
                if size == 0:
                    removed_queues.append(queue)
                    print "Removing empty queue '%s' from known queues ..." % queue
                    if not pretend:
                        q.remove_from_known_queues()

        print "Cleaned %s queues" % len(removed_queues)

        return removed_queues
Exemple #10
0
def test_general_simple_task_reverse(worker):

    worker.start(queues="default_reverse xtest test_timed_set", flags="--config tests/fixtures/config-raw1.py")

    result = worker.send_tasks("tests.tasks.general.Add", [
        {"a": 41, "b": 1, "sleep": 1},
        {"a": 41, "b": 1, "sleep": 1},
        {"a": 40, "b": 1, "sleep": 1}
    ])

    assert result == [42, 42, 41]

    assert [x["result"] for x in worker.mongodb_jobs.mrq_jobs.find().sort(
        [["dateupdated", 1]])] == [41, 42, 42]

    # Test known queues
    from mrq.queue import Queue, send_task
    assert Queue.redis_known_queues() == set(["default", "xtest", "test_timed_set"])

    # Try queueing a task
    send_task("tests.tasks.general.Add", {"a": 41, "b": 1, "sleep": 1}, queue="x")
    time.sleep(1)
    assert Queue.redis_known_queues() == set(["x", "default", "xtest", "test_timed_set"])
Exemple #11
0
 def run(self, params):
     return list(Queue.all_known())
Exemple #12
0
def api_datatables(unit):

    # import time
    # time.sleep(5)

    collection = None
    sort = None
    skip = int(request.args.get("iDisplayStart", 0))
    limit = int(request.args.get("iDisplayLength", 20))

    if unit == "queues":

        queues = []
        for name, jobs in Queue.all_known().items():
            queue = Queue(name)
            q = {
                "name": name,
                "jobs": jobs,  # MongoDB size
                "size": queue.size(),  # Redis size
                "is_sorted": queue.is_sorted,
                "is_timed": queue.is_timed,
                "is_raw": queue.is_raw,
                "is_set": queue.is_set
            }

            if queue.is_sorted:
                raw_config = cfg.get("raw_queues", {}).get(name, {})
                q["graph_config"] = raw_config.get("dashboard_graph", lambda: {
                    "start": time.time() - (7 * 24 * 3600),
                    "stop": time.time() + (7 * 24 * 3600),
                    "slices": 30
                } if queue.is_timed else {
                    "start": 0,
                    "stop": 100,
                    "slices": 30
                })()
                if q["graph_config"]:
                    q["graph"] = queue.get_sorted_graph(**q["graph_config"])

            if queue.is_timed:
                q["jobs_to_dequeue"] = queue.count_jobs_to_dequeue()

            queues.append(q)

        queues.sort(key=lambda x: -(x["jobs"] + x["size"]))

        data = {
            "aaData": queues,
            "iTotalDisplayRecords": len(queues)
        }

    elif unit == "workers":
        fields = None
        query = {"status": {"$nin": ["stop"]}}
        collection = connections.mongodb_jobs.mrq_workers
        sort = [("datestarted", -1)]

        if request.args.get("showstopped"):
            query = {}

    elif unit == "scheduled_jobs":
        collection = connections.mongodb_jobs.mrq_scheduled_jobs
        fields = None
        query = {}

    elif unit == "jobs":

        fields = None
        query = build_api_datatables_query(request)
        sort = [("_id", 1)]

        # We can't search easily params because we store it as decoded JSON in mongo :(
        # Add a string index?
        # if request.args.get("sSearch"):
        #   query.update(json.loads(request.args.get("sSearch")))
        collection = connections.mongodb_jobs.mrq_jobs

    if collection is not None:

        cursor = collection.find(query, projection=fields)

        if sort:
            cursor.sort(sort)

        if skip is not None:
            cursor.skip(skip)

        if limit is not None:
            cursor.limit(limit)

        data = {
            "aaData": list(cursor),
            "iTotalDisplayRecords": collection.find(query).count()
        }

    data["sEcho"] = request.args["sEcho"]

    return jsonify(data)
Exemple #13
0
def test_interrupt_worker_double_sigint(worker, p_flags):
    """ Test what happens when we interrupt a running worker with 2 SIGINTs. """

    start_time = time.time()

    worker.start(flags=p_flags)

    job_id = worker.send_task("tests.tasks.general.Add", {
        "a": 41,
        "b": 1,
        "sleep": 20
    },
                              block=False)

    while Job(job_id).fetch().data["status"] == "queued":
        time.sleep(0.1)

    job = Job(job_id).fetch().data
    assert job["status"] == "started"

    # Stop the worker gracefully. first job should still finish!
    os.kill(worker.process.pid, 2)

    time.sleep(1)

    # Should not be accepting new jobs!
    job_id2 = worker.send_task("tests.tasks.general.Add", {
        "a": 42,
        "b": 1,
        "sleep": 20
    },
                               block=False,
                               start=False)

    time.sleep(1)

    job2 = Job(job_id2).fetch().data
    assert job2.get("status") == "queued"

    job = Job(job_id).fetch().data
    assert job["status"] == "started"

    # Sending a second kill -2 should make it stop
    os.kill(worker.process.pid, 2)

    while Job(job_id).fetch().data["status"] == "started":
        time.sleep(0.1)

    job = Job(job_id).fetch().data
    assert job["status"] == "interrupt"

    assert time.time() - start_time < 15

    # Then try the cleaning task that requeues interrupted jobs

    assert Queue("default").size() == 1

    worker.start(queues="cleaning", deps=False, flush=False)

    res = worker.send_task("mrq.basetasks.cleaning.RequeueInterruptedJobs", {},
                           block=True,
                           queue="cleaning")

    assert res["requeued"] == 1

    assert Queue("default").size() == 2

    Queue("default").list_job_ids() == [str(job_id2), str(job_id)]

    job = Job(job_id).fetch().data
    assert job["status"] == "queued"
    assert job["queue"] == "default"
Exemple #14
0
def test_interrupt_worker_sigkill(worker, p_flags):
    """ Test what happens when we interrupt a running worker with 1 SIGKILL.

        SIGKILLs can't be intercepted by the process so the job should still be in 'started' state.
    """

    start_time = time.time()

    worker.start(flags=p_flags +
                 " --config tests/fixtures/config-shorttimeout.py")

    cfg = json.loads(
        worker.send_task("tests.tasks.general.GetConfig", {}, block=True))

    assert cfg["tasks"]["tests.tasks.general.Add"]["timeout"] == 200

    job_id = worker.send_task("tests.tasks.general.Add", {
        "a": 41,
        "b": 1,
        "sleep": 20
    },
                              block=False)

    time.sleep(3)

    worker.stop(block=True, sig=9, deps=False)

    time.sleep(1)

    # This is a bit tricky, but when getting the job from the current python environment, its timeout should
    # be the default 3600 and not 200 because we didn't configure ourselves
    # with config-shorttimeout.py
    job = Job(job_id).fetch().data
    assert Job(job_id).fetch().timeout == 3600

    assert job["status"] == "started"

    assert time.time() - start_time < 10

    # Then try the cleaning task that requeues started jobs

    # We need to fake the datestarted
    worker.mongodb_jobs.mrq_jobs.update({"_id": ObjectId(job_id)}, {
        "$set": {
            "datestarted":
            datetime.datetime.utcnow() - datetime.timedelta(seconds=300)
        }
    })

    assert Queue("default").size() == 0

    worker.start(queues="cleaning",
                 deps=False,
                 flush=False,
                 flags=" --config tests/fixtures/config-shorttimeout.py")

    res = worker.send_task("mrq.basetasks.cleaning.RequeueStartedJobs",
                           {"timeout": 110},
                           block=True,
                           queue="cleaning")

    assert res["requeued"] == 0
    assert res["started"] == 2  # current job should count too

    assert Queue("default").size() == 0

    job = Job(job_id).fetch().data
    assert job["status"] == "started"
    assert job["queue"] == "default"

    # Now do it again with a small enough timeout
    res = worker.send_task("mrq.basetasks.cleaning.RequeueStartedJobs",
                           {"timeout": 90},
                           block=True,
                           queue="cleaning")

    assert res["requeued"] == 1
    assert res["started"] == 2  # current job should count too
    assert Queue("default").size() == 1

    Queue("default").list_job_ids() == [str(job_id)]

    job = Job(job_id).fetch().data
    assert job["status"] == "queued"
    assert job["queue"] == "default"
Exemple #15
0
def api_datatables(unit):

    # import time
    # time.sleep(5)

    collection = None
    sort = None
    skip = int(request.args.get("iDisplayStart", 0))
    limit = int(request.args.get("iDisplayLength", 20))

    if unit == "queues":

        queues = []
        for name in Queue.all_known():
            queue = Queue(name)

            q = {
                "name": name,
                "size": queue.size(),  # Redis size
                "is_sorted": queue.is_sorted,
                "is_timed": queue.is_timed,
                "is_raw": queue.is_raw,
                "is_set": queue.is_set
            }

            if queue.is_sorted:
                raw_config = queue.get_config()
                q["graph_config"] = raw_config.get("dashboard_graph", lambda: {
                    "start": time.time() - (7 * 24 * 3600),
                    "stop": time.time() + (7 * 24 * 3600),
                    "slices": 30
                } if queue.is_timed else {
                    "start": 0,
                    "stop": 100,
                    "slices": 30
                })()
                if q["graph_config"]:
                    q["graph"] = queue.get_sorted_graph(**q["graph_config"])

            if queue.is_timed:
                q["jobs_to_dequeue"] = queue.count_jobs_to_dequeue()

            queues.append(q)

        queues.sort(key=lambda x: -x["size"])

        data = {
            "aaData": queues,
            "iTotalDisplayRecords": len(queues)
        }

    elif unit == "workers":
        fields = None
        collection = connections.mongodb_jobs.mrq_workers
        sort = [("datestarted", -1)]

        query = {}
        if request.args.get("id"):
            query["_id"] = ObjectId(request.args["id"])
        else:
            if request.args.get("status"):
                statuses = request.args["status"].split("-")
                query["status"] = {"$in": statuses}
            if request.args.get("ip"):
                query["$or"] = [{"config.local_ip": request.args["ip"]}, {"config.external_ip": request.args["ip"]}]
            if request.args.get("queue"):
                query["config.queues"] = request.args["queue"]

    elif unit == "agents":
        fields = None
        query = {"status": {"$nin": ["stop"]}}
        collection = connections.mongodb_jobs.mrq_agents
        sort = [("datestarted", -1)]

        if request.args.get("showstopped"):
            query = {}

    elif unit == "scheduled_jobs":
        collection = connections.mongodb_jobs.mrq_scheduled_jobs
        fields = None
        query = {}

    elif unit == "jobs":

        fields = None
        query = build_api_datatables_query(request)
        sort = None  # TODO [("_id", 1)]

        # We can't search easily params because we store it as decoded JSON in mongo :(
        # Add a string index?
        # if request.args.get("sSearch"):
        #   query.update(json.loads(request.args.get("sSearch")))
        collection = connections.mongodb_jobs.mrq_jobs

    if collection is not None:

        cursor = collection.find(query, projection=fields)

        if sort:
            cursor.sort(sort)

        if skip is not None:
            cursor.skip(skip)

        if limit is not None:
            cursor.limit(limit)

        data = {
            "aaData": list(cursor),
            "iTotalDisplayRecords": collection.find(query).count()
        }

    data["sEcho"] = request.args["sEcho"]

    return jsonify(data)
Exemple #16
0
    def perform_action(self, action, query, destination_queue):

        stats = {"requeued": 0, "cancelled": 0}

        if action == "cancel":

            default_job_timeout = get_current_config()["default_job_timeout"]

            # Finding the ttl here to expire is a bit hard because we may have mixed paths
            # and hence mixed ttls.
            # If we are cancelling by path, get this ttl
            if query.get("path"):
                result_ttl = get_task_cfg(query["path"]).get(
                    "result_ttl", default_job_timeout)

            # If not, get the maxmimum ttl of all tasks.
            else:

                tasks_defs = get_current_config().get("tasks", {})
                tasks_ttls = [
                    cfg.get("result_ttl", 0) for cfg in tasks_defs.values()
                ]

                result_ttl = max([default_job_timeout] + tasks_ttls)

            now = datetime.datetime.utcnow()
            ret = self.collection.update(query, {
                "$set": {
                    "status": "cancel",
                    "dateexpires":
                    now + datetime.timedelta(seconds=result_ttl),
                    "dateupdated": now
                }
            },
                                         multi=True)
            stats["cancelled"] = ret["n"]

            # Special case when emptying just by queue name: empty it directly!
            # In this case we could also loose some jobs that were queued after
            # the MongoDB update. They will be "lost" and requeued later like the other case
            # after the Redis BLPOP
            if query.keys() == ["queue"]:
                Queue(query["queue"]).empty()

        elif action in ("requeue", "requeue_retry"):

            # Requeue task by groups of maximum 1k items (if all in the same
            # queue)
            cursor = self.collection.find(query, projection=["_id", "queue"])

            # We must freeze the list because queries below would change it.
            # This could not fit in memory, research adding {"stats": {"$ne":
            # "queued"}} in the query
            fetched_jobs = list(cursor)

            for jobs in group_iter(fetched_jobs, n=1000):

                jobs_by_queue = defaultdict(list)
                for job in jobs:
                    jobs_by_queue[job["queue"]].append(job["_id"])
                    stats["requeued"] += 1

                for queue in jobs_by_queue:

                    updates = {
                        "status": "queued",
                        "dateupdated": datetime.datetime.utcnow()
                    }

                    if destination_queue is not None:
                        updates["queue"] = destination_queue

                    if action == "requeue":
                        updates["retry_count"] = 0

                    self.collection.update(
                        {"_id": {
                            "$in": jobs_by_queue[queue]
                        }}, {"$set": updates},
                        multi=True)

                    # Between these two lines, jobs can become "lost" too.

                    Queue(destination_queue or queue).enqueue_job_ids(
                        [str(x) for x in jobs_by_queue[queue]])

        print stats

        return stats
Exemple #17
0
    def run(self, params):

        # If there are more than this much items on the queue, we don't try to check if our mongodb
        # jobs are still queued.
        max_queue_items = params.get("max_queue_items", 1000)

        stats = {"fetched": 0, "requeued": 0}

        # This was only checking in Redis and wasn't resistant to a redis-wide flush.
        # Doing Queue.all() is slower but covers more edge cases.
        # all_queues = Queue.all_known()

        all_queues = Queue.all()

        log.info("Checking %s queues" % len(all_queues))

        for queue_name in all_queues:

            queue = Queue(queue_name)
            queue_size = queue.size()

            # If the queue is raw, the jobs were only stored in redis so they are lost for good.
            if queue.is_raw:
                continue

            log.info("Checking queue %s" % queue_name)

            if queue_size > max_queue_items:
                log.info("Stopping because queue %s has %s items" %
                         (queue_name, queue_size))
                continue

            queue_jobs_ids = set(queue.list_job_ids(limit=max_queue_items + 1))
            if len(queue_jobs_ids) >= max_queue_items:
                log.info(
                    "Stopping because queue %s actually had more than %s items"
                    % (queue_name, len(queue_jobs_ids)))
                continue

            for job_data in connections.mongodb_jobs.mrq_jobs.find(
                {
                    "queue": queue_name,
                    "status": "queued"
                },
                    projection={
                        "_id": 1
                    }).sort([["_id", 1]]):

                stats["fetched"] += 1

                if str(job_data["_id"]) in queue_jobs_ids:
                    log.info("Found job %s on queue %s. Stopping" %
                             (job_data["_id"], queue.id))
                    break

                # At this point, this job is not on the queue and we're sure
                # the queue is less than max_queue_items
                # We can safely requeue the job.
                log.info("Requeueing %s on %s" % (job_data["_id"], queue.id))

                stats["requeued"] += 1
                job = Job(job_data["_id"])
                job.requeue(queue=queue_name)

        return stats
Exemple #18
0
 def run(self, params):
     key = "%s:known_queues" % get_current_config()["redis_prefix"]
     for queue in connections.redis.smembers(key):
         Queue(queue).add_to_known_queues()
Exemple #19
0
def test_performance_queue_cancel_requeue(worker):

    worker.start(trace=False)

    n_tasks = 10000

    start_time = time.time()

    worker.send_tasks("tests.tasks.general.Add", [{
        "a": i,
        "b": 0,
        "sleep": 0
    } for i in range(n_tasks)],
                      queue="noexec",
                      block=False)

    queue_time = time.time() - start_time

    print "Queued %s tasks in %s seconds (%s/s)" % (
        n_tasks, queue_time, float(n_tasks) / queue_time)
    assert queue_time < 2

    assert Queue("noexec").size() == n_tasks
    assert worker.mongodb_jobs.mrq_jobs.count() == n_tasks
    assert worker.mongodb_jobs.mrq_jobs.find({
        "status": "queued"
    }).count() == n_tasks

    # Then cancel them all
    start_time = time.time()

    res = worker.send_task("mrq.basetasks.utils.JobAction", {
        "queue": "noexec",
        "action": "cancel"
    },
                           block=True)
    assert res["cancelled"] == n_tasks
    queue_time = time.time() - start_time
    print "Cancelled %s tasks in %s seconds (%s/s)" % (
        n_tasks, queue_time, float(n_tasks) / queue_time)
    assert queue_time < 5
    assert worker.mongodb_jobs.mrq_jobs.find({
        "status": "cancel"
    }).count() == n_tasks

    # Special case because we cancelled by queue: they should have been
    # removed from redis.
    assert Queue("noexec").size() == 0

    # Then requeue them all
    start_time = time.time()

    res = worker.send_task("mrq.basetasks.utils.JobAction", {
        "queue": "noexec",
        "action": "requeue"
    },
                           block=True)

    queue_time = time.time() - start_time
    print "Requeued %s tasks in %s seconds (%s/s)" % (
        n_tasks, queue_time, float(n_tasks) / queue_time)
    assert queue_time < 2
    assert worker.mongodb_jobs.mrq_jobs.find({
        "status": "queued"
    }).count() == n_tasks

    # They should be back in the queue
    assert Queue("noexec").size() == n_tasks

    assert res["requeued"] == n_tasks
Exemple #20
0
def test_interrupt_redis_flush(worker):
    """ Test what happens when we flush redis after queueing jobs.

        The RequeueLostJobs task should put them back in redis.
    """

    worker.start(queues="cleaning", deps=True, flush=True)

    job_id1 = worker.send_task("tests.tasks.general.Add", {
        "a": 41,
        "b": 1,
        "sleep": 10
    },
                               block=False,
                               queue="default")
    job_id2 = worker.send_task("tests.tasks.general.Add", {
        "a": 41,
        "b": 1,
        "sleep": 10
    },
                               block=False,
                               queue="default")
    job_id3 = worker.send_task("tests.tasks.general.Add", {
        "a": 41,
        "b": 1,
        "sleep": 10
    },
                               block=False,
                               queue="otherq")

    assert Queue("default").size() == 2
    assert Queue("otherq").size() == 1

    res = worker.send_task("mrq.basetasks.cleaning.RequeueLostJobs", {},
                           block=True,
                           queue="cleaning")

    # We should try the first job on each queue only, and when seeing it's there we should
    # stop.
    assert res["fetched"] == 2
    assert res["requeued"] == 0

    assert Queue("default").size() == 2
    assert Queue("otherq").size() == 1

    # Then flush redis!
    worker.fixture_redis.flush()

    # Assert the queues are empty.
    assert Queue("default").size() == 0
    assert Queue("otherq").size() == 0

    res = worker.send_task("mrq.basetasks.cleaning.RequeueLostJobs", {},
                           block=True,
                           queue="cleaning")

    assert res["fetched"] == 3
    assert res["requeued"] == 3

    assert Queue("default").size() == 2
    assert Queue("otherq").size() == 1

    assert Queue("default").list_job_ids() == [str(job_id1), str(job_id2)]
    assert Queue("otherq").list_job_ids() == [str(job_id3)]
Exemple #21
0
    def perform_action(self, action, query, destination_queue):

        stats = {"requeued": 0, "cancelled": 0}

        if action == "cancel":

            default_job_timeout = get_current_config()["default_job_timeout"]

            # Finding the ttl here to expire is a bit hard because we may have mixed paths
            # and hence mixed ttls.
            # If we are cancelling by path, get this ttl
            if query.get("path"):
                result_ttl = get_task_cfg(query["path"]).get(
                    "result_ttl", default_job_timeout)

            # If not, get the maxmimum ttl of all tasks.
            else:

                tasks_defs = get_current_config().get("tasks", {})
                tasks_ttls = [
                    cfg.get("result_ttl", 0) for cfg in itervalues(tasks_defs)
                ]

                result_ttl = max([default_job_timeout] + tasks_ttls)

            now = datetime.datetime.utcnow()

            size_by_queues = defaultdict(int)
            if "queue" not in query:
                for job in self.collection.find(query, projection={"queue":
                                                                   1}):
                    size_by_queues[job["queue"]] += 1

            ret = self.collection.update(query, {
                "$set": {
                    "status": "cancel",
                    "dateexpires":
                    now + datetime.timedelta(seconds=result_ttl),
                    "dateupdated": now
                }
            },
                                         multi=True)
            stats["cancelled"] = ret["n"]

            if "queue" in query:
                if isinstance(query["queue"], str):
                    size_by_queues[query["queue"]] = ret["n"]
            set_queues_size(size_by_queues, action="decr")

            # Special case when emptying just by queue name: empty it directly!
            # In this case we could also loose some jobs that were queued after
            # the MongoDB update. They will be "lost" and requeued later like the other case
            # after the Redis BLPOP
            if list(query.keys()) == ["queue"] and isinstance(
                    query["queue"], basestring):
                Queue(query["queue"]).empty()

        elif action in ("requeue", "requeue_retry"):

            # Requeue task by groups of maximum 1k items (if all in the same
            # queue)
            status_query = query.get("status")
            if not status_query:
                query["status"] = {"$ne": "queued"}

            cursor = self.collection.find(query, projection=["_id", "queue"])

            for jobs in group_iter(cursor, n=1000):

                jobs_by_queue = defaultdict(list)
                for job in jobs:
                    jobs_by_queue[job["queue"]].append(job["_id"])
                    stats["requeued"] += 1

                for queue in jobs_by_queue:
                    updates = {
                        "status": "queued",
                        "datequeued": datetime.datetime.utcnow(),
                        "dateupdated": datetime.datetime.utcnow()
                    }

                    if destination_queue is not None:
                        updates["queue"] = destination_queue

                    if action == "requeue":
                        updates["retry_count"] = 0

                    self.collection.update(
                        {"_id": {
                            "$in": jobs_by_queue[queue]
                        }}, {"$set": updates},
                        multi=True)

                set_queues_size({
                    queue: len(jobs)
                    for queue, jobs in jobs_by_queue.items()
                })

        return stats
Exemple #22
0
def test_cancel_by_path(worker, p_query):

    expected_action_jobs = p_query[1]

    # Start the worker with only one greenlet so that tasks execute
    # sequentially
    worker.start(flags="--greenlets 1", queues="default q1 q2")

    job_ids = []
    job_ids.append(
        worker.send_task("tests.tasks.general.Add", {
            "a": 41,
            "b": 1,
            "sleep": 2
        },
                         queue="default",
                         block=False))

    params = {"action": "cancel", "status": "queued"}
    params.update(p_query[0])

    requeue_job = worker.send_task("mrq.basetasks.utils.JobAction",
                                   params,
                                   block=False)

    job_ids.append(
        worker.send_task("tests.tasks.general.MongoInsert", {"a": 42},
                         queue="q1",
                         block=False))
    job_ids.append(
        worker.send_task("tests.tasks.general.MongoInsert", {"a": 42},
                         queue="q2",
                         block=False))
    job_ids.append(
        worker.send_task("tests.tasks.general.MongoInsert", {"a": 43},
                         queue="q2",
                         block=False))
    job_ids.append(
        worker.send_task("tests.tasks.general.MongoInsert2", {"a": 44},
                         queue="q1",
                         block=False))

    Job(job_ids[-1]).wait(poll_interval=0.01)

    # Leave some time to unqueue job_id4 without executing.
    time.sleep(1)
    worker.stop(deps=False)

    jobs = [Job(job_id).fetch().data for job_id in job_ids]

    assert jobs[0]["status"] == "success"
    assert jobs[0]["result"] == 42

    assert Job(requeue_job).fetch(
    ).data["result"]["cancelled"] == expected_action_jobs

    # Check that the right number of jobs ran.
    assert worker.mongodb_logs.tests_inserts.count(
    ) == len(job_ids) - 1 - expected_action_jobs

    action_jobs = list(worker.mongodb_jobs.mrq_jobs.find({"status": "cancel"}))
    assert len(action_jobs) == expected_action_jobs
    assert set([x.get("result") for x in action_jobs]) == set([None])

    assert Queue("default").size() == 0
    assert Queue("q1").size() == 0
    assert Queue("q2").size() == 0

    worker.mongodb_logs.tests_inserts.remove({})

    # Then requeue the same jobs
    params = {"action": "requeue"}
    params.update(p_query[0])

    worker.start(flags="--gevent 1", queues="default", flush=False)

    ret = worker.send_task("mrq.basetasks.utils.JobAction", params, block=True)

    assert ret["requeued"] == expected_action_jobs

    worker.stop(deps=False)

    assert worker.mongodb_jobs.mrq_jobs.find({
        "status": "queued"
    }).count() == expected_action_jobs

    assert Queue("default").size() + Queue("q1").size() + \
        Queue("q2").size() == expected_action_jobs
Exemple #23
0
def api_datatables(unit):
    collection = None
    sort = None
    skip = int(request.args.get("iDisplayStart", 0))
    limit = int(request.args.get("iDisplayLength", 20))
    with_mongodb_size = bool(request.args.get("with_mongodb_size"))

    if unit == "queues":

        queues = []
        for name in Queue.all_known():
            queue = Queue(name)

            jobs = None
            if with_mongodb_size:
                jobs = connections.mongodb_jobs.mrq_jobs.count({
                    "queue":
                    name,
                    "status":
                    request.args.get("status") or "queued"
                })

            q = {
                "name": name,
                "jobs": jobs,  # MongoDB size
                "size": queue.size(),  # Redis size
                "is_sorted": queue.is_sorted,
                "is_timed": queue.is_timed,
                "is_raw": queue.is_raw,
                "is_set": queue.is_set
            }

            if queue.is_sorted:
                raw_config = cfg.get("raw_queues", {}).get(name, {})
                q["graph_config"] = raw_config.get(
                    "dashboard_graph", lambda: {
                        "start": time.time() - (7 * 24 * 3600),
                        "stop": time.time() + (7 * 24 * 3600),
                        "slices": 30
                    } if queue.is_timed else {
                        "start": 0,
                        "stop": 100,
                        "slices": 30
                    })()
                if q["graph_config"]:
                    q["graph"] = queue.get_sorted_graph(**q["graph_config"])

            if queue.is_timed:
                q["jobs_to_dequeue"] = queue.count_jobs_to_dequeue()

            queues.append(q)

        sSortField, sSortDirection = _get_sort_args(request, 'size', 'desc')
        queues.sort(key=lambda x: x.get(sSortField, 0),
                    reverse=sSortDirection == 'desc')

        data = {"aaData": queues, "iTotalDisplayRecords": len(queues)}

    elif unit == "workers":
        fields = None
        query = {"status": {"$nin": ["stop"]}}
        collection = connections.mongodb_jobs.mrq_workers
        sSortField, sSortDirection = _get_sort_args(request, 'datestarted',
                                                    'desc')
        sort = [(sSortField, -1 if sSortDirection == 'desc' else 1)]

        if request.args.get("showstopped"):
            query = {}

    elif unit == "scheduled_jobs":
        collection = connections.mongodb_jobs.mrq_scheduled_jobs
        fields = None
        query = {}
        sSortField, sSortDirection = _get_sort_args(request, 'interval',
                                                    'desc')
        sort = [(sSortField, -1 if sSortDirection == 'desc' else 1)]

    elif unit == "jobs":

        fields = None
        query = build_api_datatables_query(request)
        sSortField, sSortDirection = _get_sort_args(request)
        sort = [(sSortField, -1 if sSortDirection == 'desc' else 1)]

        time_s = request.args.get("time_s", '')
        time_e = request.args.get("time_e", '')
        if time_s and not time_e:
            print 'datestarted', time_s
            query.update({'datestarted': {'$gte': str2datetime(time_s)}})
        elif time_e and not time_s:
            print 'datestarted', time_e
            query.update({'datestarted': {'$lte': str2datetime(time_e)}})
        elif time_s and time_e:
            print 'datestarted', time_s, time_e
            query.update({
                'datestarted': {
                    '$gte': str2datetime(time_s),
                    '$lte': str2datetime(time_e)
                }
            })

        # We can't search easily params because we store it as decoded JSON in mongo :(
        # Add a string index?
        # if request.args.get("sSearch"):
        #   query.update(json.loads(request.args.get("sSearch")))
        collection = connections.mongodb_jobs.mrq_jobs

    if collection is not None:

        cursor = collection.find(query, projection=fields)

        if sort:
            cursor.sort(sort)

        if skip is not None:
            cursor.skip(skip)

        if limit is not None:
            cursor.limit(limit)

        data = {
            "aaData": list(cursor),
            "iTotalDisplayRecords": collection.find(query).count()
        }

    data["sEcho"] = request.args["sEcho"]

    return jsonify(data)
Exemple #24
0
def test_raw_sorted(worker, p_queue, p_pushback, p_timed, p_flags):

    worker.start(flags="%s --config tests/fixtures/config-raw1.py" %
                 p_flags, queues=p_queue)

    test_collection = worker.mongodb_logs.tests_inserts
    jobs_collection = worker.mongodb_jobs.mrq_jobs

    current_time = int(time.time())

    assert jobs_collection.count() == 0

    assert Queue(p_queue).size() == 0

    # Schedule one in the past, one in the future
    worker.send_raw_tasks(p_queue, {
        "aaa": current_time - 10,
        "bbb": current_time + 5,
        "ccc": current_time + 10
    }, block=False)

    # Re-schedule
    worker.send_raw_tasks(p_queue, {
        "ccc": current_time + 6
    }, block=False)

    time.sleep(3)

    if not p_timed:

        assert Queue(p_queue).size() == 0
        assert test_collection.count() == 3
        assert list(test_collection.find(projection={"params": 1, "_id": 0}).limit(1)) == [
            {"params": {"sorted_set": "aaa"}}
        ]
        return

    if p_pushback:
        assert Queue(p_queue).size() == 3
        assert set(Queue(p_queue).list_raw_jobs()) == set([b"bbb", b"ccc", b"aaa"])
    else:
        assert Queue(p_queue).size() == 2
        assert set(Queue(p_queue).list_raw_jobs()) == set([b"bbb", b"ccc"])

    # The second one should not yet even exist in mrq_jobs
    assert jobs_collection.count() == 1
    assert list(jobs_collection.find())[0]["status"] == "success"

    assert list(test_collection.find(projection={"params": 1, "_id": 0})) == [
        {"params": {"timed_set": "aaa"}}
    ]

    # Then wait for the second job to be done
    time.sleep(5)

    if p_pushback:
        assert Queue(p_queue).size() == 3
    else:
        assert Queue(p_queue).size() == 0

    assert jobs_collection.count() == 3
    assert list(jobs_collection.find())[1]["status"] == "success"
    assert list(jobs_collection.find())[2]["status"] == "success"

    assert list(jobs_collection.find())[2]["worker"]

    assert test_collection.count() == 3