def cleanup_tasks(): # in case of cold restart of the workers, there might be jobs that still have their "lock" object, but aren't really # going to run. this job removes them. lock_keys = redis_connection.keys( "query_hash_job:*") # TODO: use set instead of keys command if not lock_keys: return query_tasks = [ QueryTask(job_id=j) for j in redis_connection.mget(lock_keys) ] logger.info("Found %d locks", len(query_tasks)) inspect = celery.control.inspect() active_tasks = inspect.active() if active_tasks is None: active_tasks = [] else: active_tasks = active_tasks.values() all_tasks = set() for task_list in active_tasks: for task in task_list: all_tasks.add(task['id']) logger.info("Active jobs count: %d", len(all_tasks)) for i, t in enumerate(query_tasks): if t.ready(): # if locked task is ready already (failed, finished, revoked), we don't need the lock anymore logger.warning("%s is ready (%s), removing lock.", lock_keys[i], t.celery_status) redis_connection.delete(lock_keys[i])
def cleanup_tasks(): # in case of cold restart of the workers, there might be jobs that still have their "lock" object, but aren't really # going to run. this job removes them. lock_keys = redis_connection.keys("query_hash_job:*") # TODO: use set instead of keys command if not lock_keys: return query_tasks = [QueryTask(job_id=j) for j in redis_connection.mget(lock_keys)] logger.info("Found %d locks", len(query_tasks)) inspect = celery.control.inspect() active_tasks = inspect.active() if active_tasks is None: active_tasks = [] else: active_tasks = active_tasks.values() all_tasks = set() for task_list in active_tasks: for task in task_list: all_tasks.add(task['id']) logger.info("Active jobs count: %d", len(all_tasks)) for i, t in enumerate(query_tasks): if t.ready(): # if locked task is ready already (failed, finished, revoked), we don't need the lock anymore logger.warning("%s is ready (%s), removing lock.", lock_keys[i], t.celery_status) redis_connection.delete(lock_keys[i])
def remove_ghost_locks(): """ Removes query locks that reference a non existing RQ job. """ keys = redis_connection.keys("query_hash_job:*") locks = {k: redis_connection.get(k) for k in keys} jobs = list(rq_job_ids()) count = 0 for lock, job_id in locks.items(): if job_id not in jobs: redis_connection.delete(lock) count += 1 logger.info("Locks found: {}, Locks removed: {}".format(len(locks), count))
import time from redash.data import worker from redash import models, data_manager, redis_connection if __name__ == '__main__': models.create_db(True, False) print "Creating data source..." data_source = models.DataSource.create(name="Concurrency", type="pg", options="dbname=postgres") print "Clear jobs/hashes:" redis_connection.delete("jobs") query_hashes = redis_connection.keys("query_hash_*") if query_hashes: redis_connection.delete(*query_hashes) starting_query_results_count = models.QueryResult.select().count() jobs_count = 5000 workers_count = 10 print "Creating jobs..." for i in xrange(jobs_count): query = "SELECT {}".format(i) print "Inserting: {}".format(query) data_manager.add_job(query=query, priority=worker.Job.LOW_PRIORITY, data_source=data_source)
import atfork.stdlib_fixer atfork.stdlib_fixer.fix_logging_module() import time from redash.data import worker from redash import models, data_manager, redis_connection if __name__ == '__main__': models.create_db(True, False) print "Creating data source..." data_source = models.DataSource.create(name="Concurrency", type="pg", options="dbname=postgres") print "Clear jobs/hashes:" redis_connection.delete("jobs") query_hashes = redis_connection.keys("query_hash_*") if query_hashes: redis_connection.delete(*query_hashes) starting_query_results_count = models.QueryResult.select().count() jobs_count = 5000 workers_count = 10 print "Creating jobs..." for i in xrange(jobs_count): query = "SELECT {}".format(i) print "Inserting: {}".format(query) data_manager.add_job(query=query, priority=worker.Job.LOW_PRIORITY, data_source=data_source) print "Starting workers..."