def upgrade() -> None:
    with op.batch_alter_table("replies", schema=None) as batch_op:
        batch_op.add_column(
            sa.Column("checksum", sa.String(length=255), nullable=True))

    with op.batch_alter_table("submissions", schema=None) as batch_op:
        batch_op.add_column(
            sa.Column("checksum", sa.String(length=255), nullable=True))

    op.create_table(
        "revoked_tokens",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("journalist_id", sa.Integer(), nullable=True),
        sa.Column("token", sa.Text(), nullable=False),
        sa.ForeignKeyConstraint(["journalist_id"], ["journalists.id"]),
        sa.PrimaryKeyConstraint("id"),
        sa.UniqueConstraint("token"),
    )

    try:
        app = create_app(config)

        # we need an app context for the rq worker extension to work properly
        with app.app_context():
            conn = op.get_bind()
            query = sa.text(
                """SELECT submissions.id, sources.filesystem_id, submissions.filename
                               FROM submissions
                               INNER JOIN sources
                               ON submissions.source_id = sources.id
                            """)
            for (sub_id, filesystem_id, filename) in conn.execute(query):
                full_path = Storage.get_default().path(filesystem_id, filename)
                create_queue().enqueue(
                    queued_add_checksum_for_file,
                    Submission,
                    int(sub_id),
                    full_path,
                    app.config["SQLALCHEMY_DATABASE_URI"],
                )

            query = sa.text(
                """SELECT replies.id, sources.filesystem_id, replies.filename
                               FROM replies
                               INNER JOIN sources
                               ON replies.source_id = sources.id
                            """)
            for (rep_id, filesystem_id, filename) in conn.execute(query):
                full_path = Storage.get_default().path(filesystem_id, filename)
                create_queue().enqueue(
                    queued_add_checksum_for_file,
                    Reply,
                    int(rep_id),
                    full_path,
                    app.config["SQLALCHEMY_DATABASE_URI"],
                )
    except:  # noqa
        if raise_errors:
            raise
def test_worker_for_job(config):
    """
    Tests that worker_for_job works when there are multiple workers.
    """

    queue_name = "test_worker_for_job"
    q = worker_process = second_process = None
    try:
        q = worker.create_queue(queue_name)
        assert len(worker.rq_workers(q)) == 0

        # launch worker processes
        worker_process = start_rq_worker(config, queue_name)
        second_process = start_rq_worker(config, queue_name)

        i = 0
        while i < 20:
            if len(worker.rq_workers(q)) == 2:
                break
            time.sleep(0.1)

        assert len(worker.rq_workers(q)) == 2

        worker.rq_workers(q)[0].set_state(WorkerStatus.SUSPENDED)

        logging.debug(
            [
                "{}: state={}, job={}".format(w.pid, w.get_state(), w.get_current_job_id())
                for w in worker.rq_workers(q)
            ]
        )

        # submit a job that sleeps for an hour
        job = q.enqueue(layabout)

        i = 0
        while i < 20:
            w = worker.worker_for_job(job.id)
            if w:
                break
            i += 1
            time.sleep(0.1)
        assert w is not None

    finally:
        q.delete()
        if worker_process:
            try:
                os.killpg(worker_process.pid, 0)
                os.killpg(worker_process.pid, signal.SIGKILL)
            except OSError:
                logging.debug("worker_process already gone.")

        if second_process:
            try:
                os.killpg(second_process.pid, 0)
                os.killpg(second_process.pid, signal.SIGKILL)
            except OSError:
                logging.debug("second_process already gone.")
Beispiel #3
0
def async_add_checksum_for_file(db_obj: 'Union[Submission, Reply]') -> str:
    return create_queue().enqueue(
        queued_add_checksum_for_file,
        type(db_obj),
        db_obj.id,
        current_app.storage.path(db_obj.source.filesystem_id, db_obj.filename),
        current_app.config['SQLALCHEMY_DATABASE_URI'],
    )
def test_job_interruption(config, caplog):
    """
    Tests that a job is requeued unless it is already being run.
    """
    caplog.set_level(logging.DEBUG)

    queue_name = "test_job_interruption"
    q = worker_process = None
    try:
        q = worker.create_queue(queue_name)

        # submit a job that sleeps for an hour
        job = q.enqueue(layabout)
        assert len(q.get_job_ids()) == 1

        # launch worker processes
        worker_process = start_rq_worker(config, queue_name)

        i = 0
        while i < 20:
            if len(worker.rq_workers(q)) == 1:
                break
            time.sleep(0.1)

        assert len(worker.rq_workers(q)) == 1

        i = 0
        while i < 20:
            w = worker.worker_for_job(job.id)
            if w:
                break
            i += 1
            time.sleep(0.1)
        assert w is not None

        # the running job should not be requeued
        worker.requeue_interrupted_jobs(queue_name)
        skipped = "Skipping job {}, which is already being run by worker {}".format(job.id, w.key)
        assert skipped in caplog.text

        # kill the process group, to kill the worker and its workhorse
        os.killpg(worker_process.pid, signal.SIGKILL)
        worker_process.wait()
        caplog.clear()

        # after killing the worker, the interrupted job should be requeued
        worker.requeue_interrupted_jobs(queue_name)
        print(caplog.text)
        assert "Requeuing job {}".format(job) in caplog.text
        assert len(q.get_job_ids()) == 1
    finally:
        q.delete()
        if worker_process:
            try:
                os.killpg(worker_process.pid, 0)
                os.killpg(worker_process.pid, signal.SIGKILL)
            except OSError:
                logging.debug("worker_process already gone.")
def test_no_interrupted_jobs(caplog):
    """
    Tests requeue_interrupted_jobs when there are no interrupted jobs.
    """
    caplog.set_level(logging.DEBUG)

    q = worker.create_queue()
    try:
        assert len(q.get_job_ids()) == 0
        worker.requeue_interrupted_jobs()
        assert "No interrupted jobs found in started job registry." in caplog.text
    finally:
        q.delete()