Example #1
0
def cleanup_tasks():
    # in case of cold restart of the workers, there might be jobs that still have their "lock" object, but aren't really
    # going to run. this job removes them.
    lock_keys = redis_connection.keys(
        "query_hash_job:*")  # TODO: use set instead of keys command
    if not lock_keys:
        return

    query_tasks = [
        QueryTask(job_id=j) for j in redis_connection.mget(lock_keys)
    ]

    logger.info("Found %d locks", len(query_tasks))

    inspect = celery.control.inspect()
    active_tasks = inspect.active()
    if active_tasks is None:
        active_tasks = []
    else:
        active_tasks = active_tasks.values()

    all_tasks = set()
    for task_list in active_tasks:
        for task in task_list:
            all_tasks.add(task['id'])

    logger.info("Active jobs count: %d", len(all_tasks))

    for i, t in enumerate(query_tasks):
        if t.ready():
            # if locked task is ready already (failed, finished, revoked), we don't need the lock anymore
            logger.warning("%s is ready (%s), removing lock.", lock_keys[i],
                           t.celery_status)
            redis_connection.delete(lock_keys[i])
Example #2
0
def cleanup_tasks():
    # in case of cold restart of the workers, there might be jobs that still have their "lock" object, but aren't really
    # going to run. this job removes them.
    lock_keys = redis_connection.keys("query_hash_job:*") # TODO: use set instead of keys command
    if not lock_keys:
        return
    
    query_tasks = [QueryTask(job_id=j) for j in redis_connection.mget(lock_keys)]

    logger.info("Found %d locks", len(query_tasks))

    inspect = celery.control.inspect()
    active_tasks = inspect.active()
    if active_tasks is None:
        active_tasks = []
    else:
        active_tasks = active_tasks.values()

    all_tasks = set()
    for task_list in active_tasks:
        for task in task_list:
            all_tasks.add(task['id'])

    logger.info("Active jobs count: %d", len(all_tasks))

    for i, t in enumerate(query_tasks):
        if t.ready():
            # if locked task is ready already (failed, finished, revoked), we don't need the lock anymore
            logger.warning("%s is ready (%s), removing lock.", lock_keys[i], t.celery_status)
            redis_connection.delete(lock_keys[i])
Example #3
0
def remove_ghost_locks():
    """
    Removes query locks that reference a non existing RQ job.
    """
    keys = redis_connection.keys("query_hash_job:*")
    locks = {k: redis_connection.get(k) for k in keys}
    jobs = list(rq_job_ids())

    count = 0

    for lock, job_id in locks.items():
        if job_id not in jobs:
            redis_connection.delete(lock)
            count += 1

    logger.info("Locks found: {}, Locks removed: {}".format(len(locks), count))
import time
from redash.data import worker
from redash import models, data_manager, redis_connection

if __name__ == '__main__':
    models.create_db(True, False)

    print "Creating data source..."
    data_source = models.DataSource.create(name="Concurrency",
                                           type="pg",
                                           options="dbname=postgres")

    print "Clear jobs/hashes:"
    redis_connection.delete("jobs")
    query_hashes = redis_connection.keys("query_hash_*")
    if query_hashes:
        redis_connection.delete(*query_hashes)

    starting_query_results_count = models.QueryResult.select().count()
    jobs_count = 5000
    workers_count = 10

    print "Creating jobs..."
    for i in xrange(jobs_count):
        query = "SELECT {}".format(i)
        print "Inserting: {}".format(query)
        data_manager.add_job(query=query,
                             priority=worker.Job.LOW_PRIORITY,
                             data_source=data_source)
import atfork.stdlib_fixer
atfork.stdlib_fixer.fix_logging_module()

import time
from redash.data import worker
from redash import models, data_manager, redis_connection

if __name__ == '__main__':
    models.create_db(True, False)

    print "Creating data source..."
    data_source = models.DataSource.create(name="Concurrency", type="pg", options="dbname=postgres")

    print "Clear jobs/hashes:"
    redis_connection.delete("jobs")
    query_hashes = redis_connection.keys("query_hash_*")
    if query_hashes:
        redis_connection.delete(*query_hashes)

    starting_query_results_count = models.QueryResult.select().count()
    jobs_count = 5000
    workers_count = 10

    print "Creating jobs..."
    for i in xrange(jobs_count):
        query = "SELECT {}".format(i)
        print "Inserting: {}".format(query)
        data_manager.add_job(query=query, priority=worker.Job.LOW_PRIORITY,
                             data_source=data_source)

    print "Starting workers..."