def upgrade() -> None: with op.batch_alter_table("replies", schema=None) as batch_op: batch_op.add_column( sa.Column("checksum", sa.String(length=255), nullable=True)) with op.batch_alter_table("submissions", schema=None) as batch_op: batch_op.add_column( sa.Column("checksum", sa.String(length=255), nullable=True)) op.create_table( "revoked_tokens", sa.Column("id", sa.Integer(), nullable=False), sa.Column("journalist_id", sa.Integer(), nullable=True), sa.Column("token", sa.Text(), nullable=False), sa.ForeignKeyConstraint(["journalist_id"], ["journalists.id"]), sa.PrimaryKeyConstraint("id"), sa.UniqueConstraint("token"), ) try: app = create_app(config) # we need an app context for the rq worker extension to work properly with app.app_context(): conn = op.get_bind() query = sa.text( """SELECT submissions.id, sources.filesystem_id, submissions.filename FROM submissions INNER JOIN sources ON submissions.source_id = sources.id """) for (sub_id, filesystem_id, filename) in conn.execute(query): full_path = Storage.get_default().path(filesystem_id, filename) create_queue().enqueue( queued_add_checksum_for_file, Submission, int(sub_id), full_path, app.config["SQLALCHEMY_DATABASE_URI"], ) query = sa.text( """SELECT replies.id, sources.filesystem_id, replies.filename FROM replies INNER JOIN sources ON replies.source_id = sources.id """) for (rep_id, filesystem_id, filename) in conn.execute(query): full_path = Storage.get_default().path(filesystem_id, filename) create_queue().enqueue( queued_add_checksum_for_file, Reply, int(rep_id), full_path, app.config["SQLALCHEMY_DATABASE_URI"], ) except: # noqa if raise_errors: raise
def test_worker_for_job(config): """ Tests that worker_for_job works when there are multiple workers. """ queue_name = "test_worker_for_job" q = worker_process = second_process = None try: q = worker.create_queue(queue_name) assert len(worker.rq_workers(q)) == 0 # launch worker processes worker_process = start_rq_worker(config, queue_name) second_process = start_rq_worker(config, queue_name) i = 0 while i < 20: if len(worker.rq_workers(q)) == 2: break time.sleep(0.1) assert len(worker.rq_workers(q)) == 2 worker.rq_workers(q)[0].set_state(WorkerStatus.SUSPENDED) logging.debug( [ "{}: state={}, job={}".format(w.pid, w.get_state(), w.get_current_job_id()) for w in worker.rq_workers(q) ] ) # submit a job that sleeps for an hour job = q.enqueue(layabout) i = 0 while i < 20: w = worker.worker_for_job(job.id) if w: break i += 1 time.sleep(0.1) assert w is not None finally: q.delete() if worker_process: try: os.killpg(worker_process.pid, 0) os.killpg(worker_process.pid, signal.SIGKILL) except OSError: logging.debug("worker_process already gone.") if second_process: try: os.killpg(second_process.pid, 0) os.killpg(second_process.pid, signal.SIGKILL) except OSError: logging.debug("second_process already gone.")
def async_add_checksum_for_file(db_obj: 'Union[Submission, Reply]') -> str: return create_queue().enqueue( queued_add_checksum_for_file, type(db_obj), db_obj.id, current_app.storage.path(db_obj.source.filesystem_id, db_obj.filename), current_app.config['SQLALCHEMY_DATABASE_URI'], )
def test_job_interruption(config, caplog): """ Tests that a job is requeued unless it is already being run. """ caplog.set_level(logging.DEBUG) queue_name = "test_job_interruption" q = worker_process = None try: q = worker.create_queue(queue_name) # submit a job that sleeps for an hour job = q.enqueue(layabout) assert len(q.get_job_ids()) == 1 # launch worker processes worker_process = start_rq_worker(config, queue_name) i = 0 while i < 20: if len(worker.rq_workers(q)) == 1: break time.sleep(0.1) assert len(worker.rq_workers(q)) == 1 i = 0 while i < 20: w = worker.worker_for_job(job.id) if w: break i += 1 time.sleep(0.1) assert w is not None # the running job should not be requeued worker.requeue_interrupted_jobs(queue_name) skipped = "Skipping job {}, which is already being run by worker {}".format(job.id, w.key) assert skipped in caplog.text # kill the process group, to kill the worker and its workhorse os.killpg(worker_process.pid, signal.SIGKILL) worker_process.wait() caplog.clear() # after killing the worker, the interrupted job should be requeued worker.requeue_interrupted_jobs(queue_name) print(caplog.text) assert "Requeuing job {}".format(job) in caplog.text assert len(q.get_job_ids()) == 1 finally: q.delete() if worker_process: try: os.killpg(worker_process.pid, 0) os.killpg(worker_process.pid, signal.SIGKILL) except OSError: logging.debug("worker_process already gone.")
def test_no_interrupted_jobs(caplog): """ Tests requeue_interrupted_jobs when there are no interrupted jobs. """ caplog.set_level(logging.DEBUG) q = worker.create_queue() try: assert len(q.get_job_ids()) == 0 worker.requeue_interrupted_jobs() assert "No interrupted jobs found in started job registry." in caplog.text finally: q.delete()