def test_reenqueue_object_success(reenqueue_object, session, redis, museum_object, museum_package): # Create fake DB entries museum_package.downloaded = True museum_package.packaged = True museum_package.uploaded = True museum_package.rejected = True session.commit() # Create a job that was completed prior to re-enqueuing queue = get_queue(QueueType.CONFIRM_SIP) queue.enqueue(successful_job, job_id="confirm_sip_123456") SimpleWorker([queue], connection=queue.connection).work(burst=True) finished_registry = FinishedJobRegistry(queue=queue) assert finished_registry.get_job_ids() == ["confirm_sip_123456"] result = reenqueue_object(["123456"]) assert "Object 123456 re-enqueued" in result.stdout # New RQ task was enqueued queue = get_queue(QueueType.DOWNLOAD_OBJECT) assert "download_object_123456" in queue.job_ids # Database was updated db_museum_object = session.query(MuseumObject).filter_by(id=123456).one() assert len(db_museum_object.packages) == 1 assert not db_museum_object.latest_package # Prior finished job was removed assert finished_registry.get_job_ids() == []
def navbar_stats(): """ Retrieve object counts used for the navbar """ # Check cache first redis = get_redis_connection() result = redis.get("navbar_stats") if result: result = json.loads(result) return jsonify(result) queues = (get_queue(QueueType.DOWNLOAD_OBJECT), get_queue(QueueType.CREATE_SIP), get_queue(QueueType.SUBMIT_SIP), get_queue(QueueType.CONFIRM_SIP)) result = {"queues": {}} for queue in queues: result["queues"][queue.name] = { "pending": queue.count, "processing": StartedJobRegistry(queue=queue).count } # Add failed result["failed"] = sum( [FailedJobRegistry(queue=queue).count for queue in queues]) # Cache result for 2 seconds redis.set("navbar_stats", json.dumps(result), ex=2) return jsonify(result)
def test_list_sips_queues(self, client, session, museum_object_factory, museum_package_factory): """ Test that the queue names for object are provided correctly """ museum_object_a = museum_object_factory(id=10, preserved=True, title="Object A") museum_object_b = museum_object_factory(id=20, title="Object B") museum_package_a = museum_package_factory( museum_object=museum_object_a, sip_filename="testA.tar") museum_object_a.latest_package = museum_package_a museum_package_factory(museum_object=museum_object_b, sip_filename="testB.tar") session.commit() # Enqueue two tasks for each package get_queue(QueueType.DOWNLOAD_OBJECT).enqueue( successful_job, job_id="download_object_10") get_queue(QueueType.SUBMIT_SIP).enqueue(successful_job, job_id="submit_sip_20") result = client.get("/api/list-sips", ).json # Only object A will report the queue names, since it's the latest # package assert len(result["results"]) == 2 assert result["results"][0]["filename"] == "testB.tar" assert result["results"][0]["queues"] == [] assert result["results"][1]["filename"] == "testA.tar" assert result["results"][1]["queues"] == ["download_object"]
def test_freeze_objects_delete_jobs(session, redis, freeze_objects, museum_object_factory): """ Freeze object with one pending and one failed job, and ensure they are both deleted """ def successful_job(): return ":)" def failing_job(): raise RuntimeError(":(") museum_object_factory(id=123456) queue_a = get_queue(QueueType.DOWNLOAD_OBJECT) queue_b = get_queue(QueueType.SUBMIT_SIP) queue_a.enqueue(successful_job, job_id="download_object_123456") queue_b.enqueue(failing_job, job_id="submit_sip_123456") SimpleWorker([queue_b], connection=queue_b.connection).work(burst=True) freeze_objects(["--delete-jobs", "--reason", "Deleting job", "123456"]) assert len(queue_a.job_ids) == 0 assert len(queue_b.job_ids) == 0 assert session.query(MuseumObject).filter_by( id=123456, freeze_reason="Deleting job").count() == 1
def test_get_object_id2queue_map(redis): """ Test that 'get_object_id2queue_map' returns a correct dictionary """ queue_a = get_queue(QueueType.DOWNLOAD_OBJECT) queue_b = get_queue(QueueType.SUBMIT_SIP) queue_a.enqueue(successful_job, job_id="download_object_123456") queue_b.enqueue(failing_job, job_id="submit_sip_654321") SimpleWorker([queue_b], connection=queue_b.connection).work(burst=True) queue_map = get_object_id2queue_map([123456, 654321, 111111]) assert queue_map[123456] == ["download_object"] assert queue_map[654321] == ["submit_sip", "failed"] assert queue_map[111111] == []
def test_unfreeze_objects(self, client, session, museum_object_factory): """ Test unfreezing two objects with a specific reason """ museum_object_factory(id=1, frozen=True, freeze_reason="Test reason A") museum_object_factory(id=2, frozen=True, freeze_reason="Test reason B") museum_object_factory(id=3, frozen=True, freeze_reason="Test reason A") museum_object_factory(id=4, frozen=True, freeze_reason="Test reason B") # Auto-completion entries can be found result = client.get("/web-ui/unfreeze-objects/") assert b"Test reason A" in result.data assert b"Test reason B" in result.data # Objects 1 and 3 will be unfrozen result = client.post("/web-ui/unfreeze-objects/", data={"reason": "Test reason A"}, follow_redirects=True) assert b"2 object(s) were unfrozen." in result.data assert (session.query(MuseumObject).filter(MuseumObject.id.in_( [1, 3])).filter_by(frozen=False).count() == 2) queue = get_queue(QueueType.DOWNLOAD_OBJECT) # Objects are not enqueued by default assert len(queue.job_ids) == 0
def test_create_sip(session, create_sip, museum_package, create_sip_call): """ Test running the 'create_sip' workflow job """ museum_package.downloaded = True museum_package.created_date = datetime.datetime( 2019, 1, 2, 10, 0, 0, 0, tzinfo=datetime.timezone.utc) session.commit() create_sip(123456, sip_id="testID") # Database should be updated db_museum_package = session.query(MuseumPackage).filter_by( sip_filename="fake_package-testID.tar").one() # 'create_sip' was called correctly assert not create_sip_call["update"] assert create_sip_call["create_date"] == datetime.datetime( 2019, 1, 2, 10, 0, 0, 0, tzinfo=datetime.timezone.utc) assert not create_sip_call["modify_date"] assert db_museum_package.downloaded assert db_museum_package.packaged assert not db_museum_package.uploaded # New job should be enqueued queue = get_queue(QueueType.SUBMIT_SIP) assert queue.jobs[0].id == "submit_sip_123456" assert queue.jobs[0].kwargs == {"object_id": 123456, "sip_id": "testID"}
def test_enqueue_objects( redis, session, enqueue_objects, museum_object_factory): """ Enqueue ten objects from a list of 20 objects """ for i in range(0, 20): museum_object_factory( id=i, preserved=False, metadata_hash="", attachment_metadata_hash="" ) result = enqueue_objects(["--object-count", "5"]) assert "5 object(s) enqueued" in result.stdout queue = get_queue(QueueType.DOWNLOAD_OBJECT) # Five jobs are enqueued assert len(queue.job_ids) == 5 result = enqueue_objects(["--object-count", "5"]) assert "5 object(s) enqueued" in result.stdout # Five more jobs are enqueued on second run assert len(queue.job_ids) == 10 # Rest of the jobs are enqueued on third run result = enqueue_objects(["--object-count", "100"]) assert "10 object(s) enqueued" in result.stdout assert len(queue.job_ids) == 20
def test_unfreeze_objects(session, unfreeze_objects, museum_object_factory): museum_object_factory(id=10, frozen=True, freeze_reason="Test reason") museum_object_factory(id=20, frozen=True, freeze_reason="Test reason") museum_object_factory(id=30, frozen=True, freeze_reason="Test reason") # Unfreeze the first object result = unfreeze_objects([ "--with-object-ids", "10", "--with-reason", "Test reason" ]) assert "1 object(s) were updated" in result.stdout assert session.query( MuseumObject ).filter_by(frozen=False, id=10).count() == 1 # Unfreeze the second and third object result = unfreeze_objects(["--with-reason", "Test reason"]) assert "2 object(s) were updated" in result.stdout assert ( session.query(MuseumObject) .filter_by(frozen=False) .filter(MuseumObject.id.in_([20, 30])) .count() == 2 ) queue = get_queue(QueueType.DOWNLOAD_OBJECT) # Museum object is not enqueued by default assert len(queue.job_ids) == 0
def test_freeze_objects_already_running(self, session, client, museum_object_factory): """ Test freezing two objects that already have running jobs """ def successful_job(): return ":)" confirm_queue = get_queue(QueueType.CONFIRM_SIP) started_registry = StartedJobRegistry(queue=confirm_queue) for i in [5, 10]: museum_object_factory(id=i) job = confirm_queue.enqueue(successful_job, job_id=f"download_object_{i}") started_registry.add(job, -1) result = client.post("/web-ui/freeze-objects/", data={ "reason": "Test reason", "object_ids": "10\n5" }) assert (escape( "following object IDs have running jobs and can't be frozen: " "5, 10").encode("utf-8") in result.data)
def test_delete_jobs_for_object_id(redis): queue_a = get_queue(QueueType.DOWNLOAD_OBJECT) queue_b = get_queue(QueueType.SUBMIT_SIP) queue_a.enqueue(successful_job, job_id="download_object_123456") queue_b.enqueue(failing_job, job_id="submit_sip_123456") SimpleWorker([queue_b], connection=queue_b.connection).work(burst=True) # Both the pending and failed jobs should be cancelled assert delete_jobs_for_object_id(123456) == 2 assert len(queue_a.job_ids) == 0 assert len(queue_b.job_ids) == 0 # Second run does nothing assert delete_jobs_for_object_id(123456) == 0
def update_sips(sip_results, sftp): """ Update processed SIPs one-by-one """ # TODO: We could process SIPs in chunks to reduce DB load # (eg. 50 SIPs per DB session). However, this requires a bit more # complexity and may not be necessary performance-wise. queue = get_queue(QueueType.CONFIRM_SIP) for sip in sip_results: update_sip(sip, sftp=sftp, queue=queue)
def deferred_enqueue_objects(object_count): """ Enqueue given number of objects to the preservation workflow using a background RQ job :param int object_count: How many objects to enqueue at most """ queue = get_queue(QueueType.ENQUEUE_OBJECTS) queue.enqueue(enqueue_objects, kwargs={"object_count": object_count}) print(f"{object_count} object(s) will be enqueued") return object_count
def enqueue_object(object_id): """ Enqueue a single object. This can be called separately outside of 'enqueue_objects'. In this case, the caller needs to ensure the workflow is locked. """ object_id = int(object_id) queue = get_queue(QueueType.DOWNLOAD_OBJECT) job_id = f"download_object_{object_id}" return queue.enqueue(download_object, kwargs={"object_id": object_id}, job_id=job_id)
def test_preservation_error(session, create_sip, monkeypatch, museum_package, museum_packages_dir, archive_dir): """ Test that encountering a PreservationError during a 'create_sip' job will freeze the object and remove the object from the workflow """ def mock_create_sip(object_id, package_dir, sip_id, create_date, modify_date, update): raise PreservationError(detail="Mock error message.", error="Unsupported file format: wad") # Create the fake museum package directory (museum_packages_dir / "123456" / "sip").mkdir(parents=True) (museum_packages_dir / "123456" / "reports").mkdir(parents=True) monkeypatch.setattr("passari_workflow.jobs.create_sip.main", mock_create_sip) museum_package.downloaded = True session.commit() create_sip(123456, sip_id="testID") # Database should be updated db_museum_package = session.query(MuseumPackage).filter_by( sip_filename="fake_package-testID.tar").one() db_museum_object = session.query(MuseumObject).filter_by(id=123456).one() assert db_museum_package.downloaded assert not db_museum_package.packaged assert not db_museum_package.uploaded # The package was cancelled assert db_museum_package.cancelled assert db_museum_object.frozen assert db_museum_object.freeze_reason == "Unsupported file format: wad" assert db_museum_object.freeze_source == FreezeSource.AUTOMATIC # No new job was enqueued queue = get_queue(QueueType.SUBMIT_SIP) assert not queue.job_ids # The museum package directory was deleted assert not (museum_packages_dir / "123456").is_dir() # The log file was archived. # We only test for the existence of the directory since the actual method # is mocked and only creates a directory. assert (archive_dir / "123456").is_dir()
def test_get_enqueued_object_ids(redis): queue = get_queue(QueueType.CREATE_SIP) # Complete two jobs queue.enqueue(successful_job, job_id="create_sip_124578") queue.enqueue(failing_job, job_id="create_sip_998877") SimpleWorker([queue], connection=queue.connection).work(burst=True) # Don't finish this job queue.enqueue(successful_job, job_id="create_sip_555555") # Pending and failed object IDs should all be found # Finished job ID won't be included object_ids = get_enqueued_object_ids() assert 124578 not in object_ids assert 998877 in object_ids assert 555555 in object_ids
def test_reenqueue_object_package_enqueued(reenqueue_object, session, redis, museum_object, museum_package): # If a task is already enqueued, nothing will be done museum_package.downloaded = True museum_package.packaged = True museum_package.uploaded = True museum_package.rejected = True session.commit() queue = get_queue(QueueType.CREATE_SIP) queue.enqueue(print, kwargs={"object_id": 123456}, job_id="create_sip_123456") with pytest.raises(ValueError) as exc: reenqueue_object(["123456"], success=False) assert "Object is still in the workflow" in str(exc.value)
def test_unfreeze_objects_enqueue( session, unfreeze_objects, museum_object_factory): """ Test that an object is enqueued after unfreezing if the command-line flag is used """ museum_object_factory(id=10, frozen=True, freeze_reason="Test reason") result = unfreeze_objects(["--with-reason", "Test reason", "--enqueue"]) assert "1 object(s) were updated" in result.stdout assert session.query( MuseumObject ).filter_by(frozen=False, id=10).count() == 1 queue = get_queue(QueueType.DOWNLOAD_OBJECT) # Job was enqueued assert "download_object_10" in queue.job_ids
def reenqueue_object(object_id: int): """ Re-enqueue rejected object into the workflow """ object_id = int(object_id) connect_db() queue = get_queue(QueueType.DOWNLOAD_OBJECT) with scoped_session() as db: museum_object = ( db.query(MuseumObject) .join( MuseumPackage, MuseumObject.latest_package_id == MuseumPackage.id ) .filter(MuseumObject.id == object_id) .one() ) if museum_object.latest_package and \ not museum_object.latest_package.rejected: raise ValueError( f"Latest package {museum_object.latest_package.sip_filename} " f"wasn't rejected" ) object_ids = get_enqueued_object_ids() if object_id in object_ids: raise ValueError( f"Object is still in the workflow and can't be re-enqueued" ) museum_object.latest_package = None delete_jobs_for_object_id(object_id) queue.enqueue( download_object, kwargs={"object_id": object_id}, job_id=f"download_object_{object_id}" )
def test_enqueue_objects_with_object_ids( redis, session, enqueue_objects, museum_object_factory, museum_package_factory): """ Enqueue two specific object IDs """ for i in range(0, 20): museum_object_factory( id=i, preserved=False, metadata_hash="", attachment_metadata_hash="" ) result = enqueue_objects(["--object-ids", "5,8"]) assert "2 object(s) enqueued" in result.stdout queue = get_queue(QueueType.DOWNLOAD_OBJECT) assert len(queue.job_ids) == 2 assert "download_object_5" in queue.job_ids assert "download_object_8" in queue.job_ids
def test_unfreeze_objects_enqueue(self, client, session, museum_object_factory): """ Unfreeze an object and enqueue it """ museum_object_factory(id=1, frozen=True, freeze_reason="Test reason A") museum_object_factory(id=2, frozen=True, freeze_reason="Test reason B") # Object 2 will be unfrozen result = client.post("/api/unfreeze-objects", data={ "reason": "Test reason B", "enqueue": "true" }) assert result.json == {"success": True, "count": 1} assert (session.query(MuseumObject).filter_by( id=2, frozen=False).count() == 1) queue = get_queue(QueueType.DOWNLOAD_OBJECT) assert set(["download_object_2"]) == set(queue.job_ids)
def test_unfreeze_objects_reason(self, client, session, museum_object_factory): """ Unfreeze two objects using a reason as the filter """ museum_object_factory(id=1, frozen=True, freeze_reason="Test reason A") museum_object_factory(id=2, frozen=True, freeze_reason="Test reason B") museum_object_factory(id=3, frozen=True, freeze_reason="Test reason A") museum_object_factory(id=4, frozen=True, freeze_reason="Test reason B") # Objects 1 and 3 will be unfrozen result = client.post("/api/unfreeze-objects", data={"reason": "Test reason A"}) assert result.json == {"success": True, "count": 2} assert (session.query(MuseumObject).filter(MuseumObject.id.in_( [1, 3])).filter(MuseumObject.frozen == False).count() == 2) queue = get_queue(QueueType.DOWNLOAD_OBJECT) # Object is not enqueued by default assert len(queue.job_ids) == 0
def test_museum_package_missing(redis, session, download_object, museum_object, freeze_time): """ Download a museum object when the museum object directory doesn't exist yet """ # Do the 'download_object' job. freeze_time("2019-02-03 12:00:00") download_object(123456) # MuseumPackage should be created db_museum_object = session.query(MuseumObject).filter( MuseumObject.id == 123456).first() latest_package = db_museum_object.latest_package assert latest_package in db_museum_object.packages # The current time is used as the SIP ID assert latest_package.sip_filename == "fake_package-20190203-120000.tar" # The current time "2019-02-03 12:00:00" is used as the sip ID assert latest_package.sip_id == "20190203-120000" assert latest_package.downloaded assert not latest_package.packaged # Metadata hashes are copied from the latest version of the object assert latest_package.metadata_hash == museum_object.metadata_hash assert latest_package.attachment_metadata_hash == \ museum_object.attachment_metadata_hash # MuseumAttachments are added assert len(latest_package.attachments) == 2 assert latest_package.attachments[0].id == 1234560 assert latest_package.attachments[1].id == 2469120 # New job should be enqueued queue = get_queue(QueueType.CREATE_SIP) assert queue.jobs[0].id == "create_sip_123456" assert queue.jobs[0].kwargs == { "object_id": 123456, "sip_id": "20190203-120000" }
def test_freeze_objects_running_jobs(session, redis, freeze_objects, museum_object_factory): """ Try freezing two objects when they have running jobs. """ def successful_job(): return ":)" museum_object_factory(id=123456) museum_object_factory(id=654321) queue = get_queue(QueueType.DOWNLOAD_OBJECT) started_registry = StartedJobRegistry(queue=queue) job_a = queue.enqueue(successful_job, job_id="download_object_123456") job_b = queue.enqueue(successful_job, job_id="download_object_654321") started_registry.add(job_a, -1) started_registry.add(job_b, -1) with pytest.raises(WorkflowJobRunningError) as exc: freeze_objects(["--reason", "Won't succeed", "654321", "123456"], success=False) assert "can't be frozen: 123456, 654321" in str(exc.value)
def test_unfreeze_objects_enqueue(self, client, session, museum_object_factory): """ Test unfreezing an object and enqueuing it immediately """ museum_object_factory(id=1, frozen=True, freeze_reason="Test reason A") museum_object_factory(id=2, frozen=True, freeze_reason="Test reason B") # Unfreeze object 2 result = client.post("/web-ui/unfreeze-objects/", data={ "reason": "Test reason B", "enqueue": True }, follow_redirects=True) assert b"1 object(s) were unfrozen." in result.data assert (session.query(MuseumObject).filter_by( id=2, frozen=False).count() == 1) queue = get_queue(QueueType.DOWNLOAD_OBJECT) # Object was enqueued assert set(["download_object_2"]) == set(queue.job_ids)
def test_navbar_stats(self, session, client): # Create 1 'download_object' job get_queue(QueueType.DOWNLOAD_OBJECT).enqueue( successful_job, job_id="download_object_1") # Create 2 'create_sip' jobs for i in range(2, 4): get_queue(QueueType.CREATE_SIP).enqueue(successful_job, job_id=f"create_sip_{i}") # Create 1 failed 'submit_sip' job submit_queue = get_queue(QueueType.SUBMIT_SIP) submit_queue.enqueue(failing_job, job_id="submit_sip_4") SimpleWorker([submit_queue], connection=submit_queue.connection).work(burst=True) # Create 1 started 'confirm_sip' job confirm_queue = get_queue(QueueType.CONFIRM_SIP) started_registry = StartedJobRegistry(queue=confirm_queue) job = confirm_queue.enqueue(successful_job, job_id="confirm_sip_5") started_registry.add(job, -1) result = client.get("/api/navbar-stats").json assert result["queues"]["download_object"] \ == {"processing": 0, "pending": 1} assert result["queues"]["create_sip"] == \ {"processing": 0, "pending": 2} assert result["queues"]["submit_sip"] == \ {"processing": 0, "pending": 0} # TODO: In practice, if one worker is working on a job and there are # no pending jobs, this should be 'processing': 1, 'pending': 0. # How can we mimic a similar situation in this test scenario? assert result["queues"]["confirm_sip"] == \ {"processing": 1, "pending": 1} assert result["failed"] == 1
def test_preservation_error(session, download_object, monkeypatch, museum_packages_dir, archive_dir, museum_object, museum_package_factory, with_existing_package): """ Test that encountering a PreservationError during a 'download_object' job will freeze the object and remove the object from the workflow. The test case has been parametrized with two different scenarios: one where a MuseumObject already has one preserved package, and a second one where no package has been created yet """ def mock_download_object(object_id, package_dir, sip_id): raise PreservationError(detail="Mock detailed error message", error="Filename was not supported") # Create the fake museum package directory (museum_packages_dir / "123456" / "sip").mkdir(parents=True) monkeypatch.setattr("passari_workflow.jobs.download_object.main", mock_download_object) # For the test case with an existing package, create a museum package that # was uploaded successfully earlier. # The PreservationError should *not* affect this package. if with_existing_package: db_museum_package = museum_package_factory( sip_filename="fake_package-testID2.tar", created_date=datetime.datetime(2018, 9, 1, 12, 0, 0, 0, tzinfo=datetime.timezone.utc), preserved=True, museum_object=museum_object) museum_object.latest_package = db_museum_package session.commit() download_object(123456) # Database should be updated db_museum_object = session.query(MuseumObject).get(123456) assert db_museum_object.frozen assert db_museum_object.freeze_reason == "Filename was not supported" assert db_museum_object.freeze_source == FreezeSource.AUTOMATIC # The previous successful package was not updated. # This is because a new package is not created unless the 'download_object' # job is successful if with_existing_package: latest_package = db_museum_object.latest_package assert not latest_package.cancelled assert latest_package.preserved assert latest_package.sip_filename == "fake_package-testID2.tar" else: assert not db_museum_object.latest_package # No new job was enqueued queue = get_queue(QueueType.CREATE_SIP) assert not queue.job_ids # The museum package directory was deleted assert not (museum_packages_dir / "123456").is_dir()
def download_object(object_id): """ Download an object from MuseumPlus and enqueue the task 'create_sip' once the object is downloaded """ object_id = int(object_id) connect_db() # Create a SIP id from the current time sip_id = datetime.datetime.now( datetime.timezone.utc).strftime("%Y%m%d-%H%M%S") try: museum_package = main( object_id=int(object_id), package_dir=PACKAGE_DIR, # 'sip_id' is optional, but giving it as a kwarg ensures the # filename of the SIP is correct before it is created. sip_id=sip_id) except PreservationError as exc: # If a PreservationError was raised, freeze the object freeze_running_object(object_id=object_id, sip_id=sip_id, freeze_reason=exc.error) return except OSError as exc: if exc.errno == errno.ENOSPC: raise OSError( errno.ENOSPC, "Ran out of disk space. This may have happened because the " "package directory ran out of space while downloading a " "large attachment. Try removing packages from the directory " "and trying again by processing less packages at the same " "time.") raise filename = museum_package.sip_filename with scoped_session() as db: db_museum_object = db.query(MuseumObject).filter( MuseumObject.id == object_id).one() db_package = db.query(MuseumPackage).filter_by( sip_filename=filename).first() # Get the attachments that currently exist for this object # and add them to the new MuseumPackage attachment_ids = museum_package.museum_object.attachment_ids db_attachments = bulk_create_or_get(db, MuseumAttachment, attachment_ids) if not db_package: db_package = MuseumPackage( sip_filename=filename, sip_id=sip_id, object_modified_date=( museum_package.museum_object.modified_date), downloaded=True, metadata_hash=db_museum_object.metadata_hash, attachment_metadata_hash=( db_museum_object.attachment_metadata_hash), attachments=db_attachments) db_package.museum_object = db_museum_object else: raise EnvironmentError( f"Package with filename {filename} already exists") db_museum_object.latest_package = db_package queue = get_queue(QueueType.CREATE_SIP) queue.enqueue(create_sip, kwargs={ "object_id": object_id, "sip_id": sip_id }, job_id=f"create_sip_{object_id}")
def overview_stats(): """ Retrieve real-time statistics used in the 'Overview' page """ # Check cache first redis = get_redis_connection() result = redis.get("overview_stats") if result: result = json.loads(result) return jsonify(result) queues = (get_queue(QueueType.DOWNLOAD_OBJECT), get_queue(QueueType.CREATE_SIP), get_queue(QueueType.SUBMIT_SIP), get_queue(QueueType.CONFIRM_SIP)) job_count = sum([queue.count for queue in queues]) failed_count = sum( [FailedJobRegistry(queue=queue).count for queue in queues]) total_count = db.session.query(MuseumObject).count() frozen_count = (db.session.query(MuseumObject).filter( MuseumObject.frozen).count()) submitted_count = (db.session.query(MuseumObject).join( MuseumPackage, MuseumObject.latest_package_id == MuseumPackage.id).filter( and_(MuseumObject.latest_package, MuseumPackage.rejected == False, MuseumPackage.preserved == False, MuseumPackage.uploaded)).count()) rejected_count = (db.session.query(MuseumObject).join( MuseumPackage, MuseumObject.latest_package_id == MuseumPackage.id).filter( and_(MuseumObject.latest_package, MuseumPackage.rejected)).count()) preserved_count = (db.session.query(MuseumObject).with_transformation( MuseumObject.exclude_preservation_pending).filter( MuseumObject.preserved).count()) result = { "steps": { "pending": { "count": int(total_count - job_count - failed_count - frozen_count - rejected_count - submitted_count - preserved_count) }, }, "total_count": total_count } # Add the individual queues for queue in queues: result["steps"][queue.name] = {"count": queue.count} # Add counts outside of queues other_steps = [("preserved", preserved_count), ("rejected", rejected_count), ("submitted", submitted_count), ("frozen", frozen_count), ("failed", failed_count)] for name, count in other_steps: result["steps"][name] = {"count": count} # Cache result for 2 seconds redis.set("overview_stats", json.dumps(result), ex=2) return jsonify(result)
def create_sip(object_id, sip_id): """ Create SIP from a downloaded objec and enqueue the task 'submit_sip' once the object is packaged into a SIP """ object_id = int(object_id) connect_db() # Are we creating a SIP for the first time or updating a preserved # package? created_date, modified_date = None, None with scoped_session() as db: last_preserved_package = ( db.query(MuseumPackage) .filter(MuseumPackage.museum_object_id == object_id) .filter(MuseumPackage.preserved == True) .order_by(MuseumPackage.created_date.desc()) .first() ) current_package = ( db.query(MuseumObject) .join( MuseumPackage, MuseumObject.latest_package_id == MuseumPackage.id ) .filter(MuseumObject.id == object_id) .one() .latest_package ) if not last_preserved_package: # We haven't created a preserved SIP yet print(f"Creating submission SIP for Object {object_id}") created_date = current_package.created_date else: # We are updating an existing package print(f"Creating update SIP for Object {object_id}") created_date = last_preserved_package.created_date modified_date = current_package.created_date # Run the 'create_sip' script try: museum_package = main( object_id=object_id, package_dir=PACKAGE_DIR, sip_id=sip_id, create_date=created_date, modify_date=modified_date, update=bool(modified_date) ) except PreservationError as exc: # If a PreservationError was raised, freeze the object and prevent # the object from going further in the workflow. freeze_running_object( object_id=object_id, sip_id=sip_id, freeze_reason=exc.error ) return except OSError as exc: if exc.errno == errno.ENOSPC: raise OSError( errno.ENOSPC, "Ran out of disk space. This may have happened because the " "package directory ran out of space while downloading a " "large attachment. Try removing packages from the directory " "and trying again by processing less packages at the same " "time." ) raise filename = museum_package.sip_filename print(f"Created SIP for Object {object_id}, updating database") with scoped_session() as db: db_package = db.query(MuseumPackage).filter( MuseumPackage.sip_filename == filename ).one() db_package.packaged = True db.query(MuseumObject).filter( MuseumObject.id == object_id ).update({MuseumObject.latest_package_id: db_package.id}) queue = get_queue(QueueType.SUBMIT_SIP) queue.enqueue( submit_sip, kwargs={"object_id": object_id, "sip_id": sip_id}, job_id=f"submit_sip_{object_id}" )