def test_packages(self, session): mus_object = MuseumObject( id=1337, preserved=True) mus_package_a = MuseumPackage( sip_filename="test_one.tar", museum_object=mus_object, ) mus_package_b = MuseumPackage( sip_filename="test_two.tar", museum_object=mus_object, created_date=( datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(minutes=10) ) ) session.add_all([mus_package_a, mus_package_b]) session.commit() mus_object = session.query( MuseumObject ).filter_by(id=1337).first() assert mus_object.packages[0].sip_filename == "test_one.tar" assert mus_object.packages[1].sip_filename == "test_two.tar" assert len(mus_object.packages) == 2 session.delete(mus_package_b) session.commit() assert len(mus_object.packages) == 1 assert mus_object.packages[0].museum_object.id == 1337
def test_preservation_pending_museum_package_frozen(self, session): """ Check the 'preservation_pending' status for a MuseumObject that would be eligible for preservation but is frozen """ now = datetime.datetime.now(datetime.timezone.utc) mus_object = MuseumObject( id=1, preserved=True, frozen=True, modified_date=now - datetime.timedelta(days=15), created_date=now - datetime.timedelta(days=90) ) mus_package = MuseumPackage( sip_filename="fake_package.tar", object_modified_date=now - datetime.timedelta(days=50), downloaded=True, packaged=True, uploaded=True ) mus_object.packages.append(mus_package) mus_object.latest_package = mus_package assert mus_object.packages[0] == mus_object.latest_package session.add(mus_object) session.commit() assert not mus_object.preservation_pending assert_preservation_pending_count(session.query(MuseumObject), 0)
def test_preservation_pending_museum_package_no_date(self, session): now = ( datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=50) ) mus_object = MuseumObject( id=1, preserved=True, metadata_hash="new_hash", attachment_metadata_hash="" ) mus_package = MuseumPackage( sip_filename="fake_package.tar", downloaded=True, packaged=True, uploaded=True, preserved=True, metadata_hash="old_hash", attachment_metadata_hash="" ) mus_object.packages.append(mus_package) mus_object.latest_package = mus_package session.add(mus_object) session.commit() assert not mus_object.preservation_pending assert_preservation_pending_count(session.query(MuseumObject), 0) mus_object.modified_date = now session.commit() assert mus_object.preservation_pending assert_preservation_pending_count(session.query(MuseumObject), 1)
def func(**kwargs): if not kwargs.get("object_modified_date"): kwargs["object_modified_date"] = TEST_DATE museum_package = MuseumPackage(**kwargs) session.add(museum_package) session.commit() return museum_package
def test_preservation_pending_museum_package(self, session): now = datetime.datetime.now(datetime.timezone.utc) # MuseumObject has one package already, but it was modified again # 35 days later. This needs preservation again. mus_object = MuseumObject( id=1, preserved=True, modified_date=now - datetime.timedelta(days=15), created_date=now - datetime.timedelta(days=90), metadata_hash="new_hash", attachment_metadata_hash="" ) mus_package = MuseumPackage( sip_filename="fake_package.tar", object_modified_date=now - datetime.timedelta(days=50), downloaded=True, packaged=True, uploaded=True, metadata_hash="old_hash", attachment_metadata_hash="" ) mus_object.packages.append(mus_package) mus_object.latest_package = mus_package assert mus_object.packages[0] == mus_object.latest_package session.add(mus_object) session.add( MuseumObject( id=2, created_date=datetime.datetime.now(datetime.timezone.utc), modified_date=datetime.datetime.now(datetime.timezone.utc), preserved=True ) ) session.commit() # Check that only the preserved object is found assert ( session.query(MuseumObject) .with_transformation(MuseumObject.exclude_preservation_pending) .one().id == 2 ) assert mus_object.preservation_pending assert ( session.query(MuseumObject) .with_transformation(MuseumObject.filter_preservation_pending) .one().id == 1 ) # If the modification date is still the same, no preservation is needed mus_object.modified_date = now - datetime.timedelta(days=50) assert not mus_object.preservation_pending session.commit() assert_preservation_pending_count(session.query(MuseumObject), 0)
def download_object(object_id): """ Download an object from MuseumPlus and enqueue the task 'create_sip' once the object is downloaded """ object_id = int(object_id) connect_db() # Create a SIP id from the current time sip_id = datetime.datetime.now( datetime.timezone.utc).strftime("%Y%m%d-%H%M%S") try: museum_package = main( object_id=int(object_id), package_dir=PACKAGE_DIR, # 'sip_id' is optional, but giving it as a kwarg ensures the # filename of the SIP is correct before it is created. sip_id=sip_id) except PreservationError as exc: # If a PreservationError was raised, freeze the object freeze_running_object(object_id=object_id, sip_id=sip_id, freeze_reason=exc.error) return except OSError as exc: if exc.errno == errno.ENOSPC: raise OSError( errno.ENOSPC, "Ran out of disk space. This may have happened because the " "package directory ran out of space while downloading a " "large attachment. Try removing packages from the directory " "and trying again by processing less packages at the same " "time.") raise filename = museum_package.sip_filename with scoped_session() as db: db_museum_object = db.query(MuseumObject).filter( MuseumObject.id == object_id).one() db_package = db.query(MuseumPackage).filter_by( sip_filename=filename).first() # Get the attachments that currently exist for this object # and add them to the new MuseumPackage attachment_ids = museum_package.museum_object.attachment_ids db_attachments = bulk_create_or_get(db, MuseumAttachment, attachment_ids) if not db_package: db_package = MuseumPackage( sip_filename=filename, sip_id=sip_id, object_modified_date=( museum_package.museum_object.modified_date), downloaded=True, metadata_hash=db_museum_object.metadata_hash, attachment_metadata_hash=( db_museum_object.attachment_metadata_hash), attachments=db_attachments) db_package.museum_object = db_museum_object else: raise EnvironmentError( f"Package with filename {filename} already exists") db_museum_object.latest_package = db_package queue = get_queue(QueueType.CREATE_SIP) queue.enqueue(create_sip, kwargs={ "object_id": object_id, "sip_id": sip_id }, job_id=f"create_sip_{object_id}")
def test_sync_processed_sips_accepted(session, museum_packages_dir, sftp_dir, redis, sftp_package_factory, sync_processed_sips): # Create local package directory museum_packages_dir.joinpath("123456", "logs").mkdir(parents=True) museum_packages_dir.joinpath("123456", "sip", "reports").mkdir(parents=True) # Create two accepted SIPs on the mocked SFTP server. # The newer one will be selected according to its newer modification date new_package_dir = sftp_package_factory(status="accepted", date=datetime.datetime(2019, 5, 28), object_id=123456, sip_id="AABBCC2", transfer_id="aabbcc", content="New report") os.utime( new_package_dir / "20190102_Object_123456-AABBCC2.tar-aabbcc-ingest-report.xml", (time.time() - 600, time.time() - 600)) old_package_dir = sftp_package_factory(status="accepted", date=datetime.datetime(2019, 5, 28), object_id=123456, sip_id="CCBBAA2", transfer_id="ccbbaa", content="Old report") os.utime( old_package_dir / "20190102_Object_123456-CCBBAA2.tar-ccbbaa-ingest-report.xml", (time.time() - 1200, time.time() - 1200)) # Object.xml is required to load MuseumObjectPackage locally report_path = Path(__file__).parent.resolve() / "data" / "Object.xml" shutil.copyfile( report_path, museum_packages_dir / "123456" / "sip" / "reports" / "Object.xml") db_museum_object = MuseumObject(id=123456, created_date=TEST_DATE, modified_date=TEST_DATE) db_museum_package = MuseumPackage( sip_filename="20190102_Object_123456-AABBCC2.tar", sip_id="AABBCC2", object_modified_date=TEST_DATE, downloaded=True, packaged=True, uploaded=True, museum_object=db_museum_object) db_museum_object.latest_package = db_museum_package session.add(db_museum_object) session.commit() with freezegun.freeze_time("2019-06-01"): result = sync_processed_sips(["--days", "7"]) assert "Found 2 on 2019-05-28" in result.stdout assert "Found 2 accepted SIPs" in result.stdout assert "Found 0 rejected SIPs" in result.stdout # Ingest reports are downloaded assert museum_packages_dir.joinpath( "123456", "logs", "ingest-report.xml").read_text( ) == "<xml><content>New report</content></xml>" assert museum_packages_dir.joinpath( "123456", "logs", "ingest-report.html").read_text( ) == "<html><body>New report</body></html>" # Museum package is updated db_museum_package = session.query(MuseumPackage).filter_by( sip_filename="20190102_Object_123456-AABBCC2.tar").one() assert db_museum_package.preserved # Status file is created assert ( museum_packages_dir / "123456" / "20190102_Object_123456-AABBCC2.tar.status").read_text() == "accepted" # RQ task is enqueued queue = get_queue(QueueType.CONFIRM_SIP) job = queue.jobs[0] assert job.id == "confirm_sip_123456" assert job.kwargs == {"object_id": 123456, "sip_id": "AABBCC2"}
def func(**kwargs): museum_package = MuseumPackage(**kwargs) session.add(museum_package) session.commit() return museum_package