def test_packages(self, session):
        mus_object = MuseumObject(
            id=1337,
            preserved=True)

        mus_package_a = MuseumPackage(
            sip_filename="test_one.tar",
            museum_object=mus_object,
        )
        mus_package_b = MuseumPackage(
            sip_filename="test_two.tar",
            museum_object=mus_object,
            created_date=(
                datetime.datetime.now(datetime.timezone.utc)
                + datetime.timedelta(minutes=10)
            )
        )
        session.add_all([mus_package_a, mus_package_b])

        session.commit()

        mus_object = session.query(
            MuseumObject
        ).filter_by(id=1337).first()

        assert mus_object.packages[0].sip_filename == "test_one.tar"
        assert mus_object.packages[1].sip_filename == "test_two.tar"
        assert len(mus_object.packages) == 2

        session.delete(mus_package_b)
        session.commit()

        assert len(mus_object.packages) == 1

        assert mus_object.packages[0].museum_object.id == 1337
    def test_preservation_pending_museum_package_frozen(self, session):
        """
        Check the 'preservation_pending' status for a MuseumObject that
        would be eligible for preservation but is frozen
        """
        now = datetime.datetime.now(datetime.timezone.utc)

        mus_object = MuseumObject(
            id=1,
            preserved=True,
            frozen=True,
            modified_date=now - datetime.timedelta(days=15),
            created_date=now - datetime.timedelta(days=90)
        )
        mus_package = MuseumPackage(
            sip_filename="fake_package.tar",
            object_modified_date=now - datetime.timedelta(days=50),
            downloaded=True,
            packaged=True,
            uploaded=True
        )
        mus_object.packages.append(mus_package)
        mus_object.latest_package = mus_package
        assert mus_object.packages[0] == mus_object.latest_package

        session.add(mus_object)
        session.commit()

        assert not mus_object.preservation_pending
        assert_preservation_pending_count(session.query(MuseumObject), 0)
    def test_preservation_pending_museum_package_no_date(self, session):
        now = (
            datetime.datetime.now(datetime.timezone.utc)
            - datetime.timedelta(days=50)
        )

        mus_object = MuseumObject(
            id=1, preserved=True,
            metadata_hash="new_hash", attachment_metadata_hash=""
        )
        mus_package = MuseumPackage(
            sip_filename="fake_package.tar",
            downloaded=True,
            packaged=True,
            uploaded=True,
            preserved=True,
            metadata_hash="old_hash",
            attachment_metadata_hash=""
        )
        mus_object.packages.append(mus_package)
        mus_object.latest_package = mus_package

        session.add(mus_object)
        session.commit()

        assert not mus_object.preservation_pending
        assert_preservation_pending_count(session.query(MuseumObject), 0)

        mus_object.modified_date = now
        session.commit()

        assert mus_object.preservation_pending
        assert_preservation_pending_count(session.query(MuseumObject), 1)
    def func(**kwargs):
        if not kwargs.get("object_modified_date"):
            kwargs["object_modified_date"] = TEST_DATE
        museum_package = MuseumPackage(**kwargs)
        session.add(museum_package)
        session.commit()

        return museum_package
    def test_preservation_pending_museum_package(self, session):
        now = datetime.datetime.now(datetime.timezone.utc)

        # MuseumObject has one package already, but it was modified again
        # 35 days later. This needs preservation again.
        mus_object = MuseumObject(
            id=1,
            preserved=True,
            modified_date=now - datetime.timedelta(days=15),
            created_date=now - datetime.timedelta(days=90),
            metadata_hash="new_hash",
            attachment_metadata_hash=""
        )
        mus_package = MuseumPackage(
            sip_filename="fake_package.tar",
            object_modified_date=now - datetime.timedelta(days=50),
            downloaded=True,
            packaged=True,
            uploaded=True,
            metadata_hash="old_hash",
            attachment_metadata_hash=""
        )
        mus_object.packages.append(mus_package)
        mus_object.latest_package = mus_package
        assert mus_object.packages[0] == mus_object.latest_package

        session.add(mus_object)
        session.add(
            MuseumObject(
                id=2, created_date=datetime.datetime.now(datetime.timezone.utc),
                modified_date=datetime.datetime.now(datetime.timezone.utc),
                preserved=True
            )
        )
        session.commit()

        # Check that only the preserved object is found
        assert (
            session.query(MuseumObject)
            .with_transformation(MuseumObject.exclude_preservation_pending)
            .one().id == 2
        )
        assert mus_object.preservation_pending
        assert (
            session.query(MuseumObject)
            .with_transformation(MuseumObject.filter_preservation_pending)
            .one().id == 1
        )

        # If the modification date is still the same, no preservation is needed
        mus_object.modified_date = now - datetime.timedelta(days=50)
        assert not mus_object.preservation_pending

        session.commit()

        assert_preservation_pending_count(session.query(MuseumObject), 0)
Example #6
0
def download_object(object_id):
    """
    Download an object from MuseumPlus and enqueue the task 'create_sip'
    once the object is downloaded
    """
    object_id = int(object_id)
    connect_db()

    # Create a SIP id from the current time
    sip_id = datetime.datetime.now(
        datetime.timezone.utc).strftime("%Y%m%d-%H%M%S")

    try:
        museum_package = main(
            object_id=int(object_id),
            package_dir=PACKAGE_DIR,
            # 'sip_id' is optional, but giving it as a kwarg ensures the
            # filename of the SIP is correct before it is created.
            sip_id=sip_id)
    except PreservationError as exc:
        # If a PreservationError was raised, freeze the object
        freeze_running_object(object_id=object_id,
                              sip_id=sip_id,
                              freeze_reason=exc.error)
        return
    except OSError as exc:
        if exc.errno == errno.ENOSPC:
            raise OSError(
                errno.ENOSPC,
                "Ran out of disk space. This may have happened because the "
                "package directory ran out of space while downloading a "
                "large attachment. Try removing packages from the directory "
                "and trying again by processing less packages at the same "
                "time.")

        raise

    filename = museum_package.sip_filename

    with scoped_session() as db:
        db_museum_object = db.query(MuseumObject).filter(
            MuseumObject.id == object_id).one()

        db_package = db.query(MuseumPackage).filter_by(
            sip_filename=filename).first()

        # Get the attachments that currently exist for this object
        # and add them to the new MuseumPackage
        attachment_ids = museum_package.museum_object.attachment_ids
        db_attachments = bulk_create_or_get(db, MuseumAttachment,
                                            attachment_ids)

        if not db_package:
            db_package = MuseumPackage(
                sip_filename=filename,
                sip_id=sip_id,
                object_modified_date=(
                    museum_package.museum_object.modified_date),
                downloaded=True,
                metadata_hash=db_museum_object.metadata_hash,
                attachment_metadata_hash=(
                    db_museum_object.attachment_metadata_hash),
                attachments=db_attachments)
            db_package.museum_object = db_museum_object
        else:
            raise EnvironmentError(
                f"Package with filename {filename} already exists")

        db_museum_object.latest_package = db_package

        queue = get_queue(QueueType.CREATE_SIP)
        queue.enqueue(create_sip,
                      kwargs={
                          "object_id": object_id,
                          "sip_id": sip_id
                      },
                      job_id=f"create_sip_{object_id}")
def test_sync_processed_sips_accepted(session, museum_packages_dir, sftp_dir,
                                      redis, sftp_package_factory,
                                      sync_processed_sips):
    # Create local package directory
    museum_packages_dir.joinpath("123456", "logs").mkdir(parents=True)
    museum_packages_dir.joinpath("123456", "sip",
                                 "reports").mkdir(parents=True)

    # Create two accepted SIPs on the mocked SFTP server.
    # The newer one will be selected according to its newer modification date
    new_package_dir = sftp_package_factory(status="accepted",
                                           date=datetime.datetime(2019, 5, 28),
                                           object_id=123456,
                                           sip_id="AABBCC2",
                                           transfer_id="aabbcc",
                                           content="New report")
    os.utime(
        new_package_dir /
        "20190102_Object_123456-AABBCC2.tar-aabbcc-ingest-report.xml",
        (time.time() - 600, time.time() - 600))

    old_package_dir = sftp_package_factory(status="accepted",
                                           date=datetime.datetime(2019, 5, 28),
                                           object_id=123456,
                                           sip_id="CCBBAA2",
                                           transfer_id="ccbbaa",
                                           content="Old report")
    os.utime(
        old_package_dir /
        "20190102_Object_123456-CCBBAA2.tar-ccbbaa-ingest-report.xml",
        (time.time() - 1200, time.time() - 1200))

    # Object.xml is required to load MuseumObjectPackage locally
    report_path = Path(__file__).parent.resolve() / "data" / "Object.xml"
    shutil.copyfile(
        report_path,
        museum_packages_dir / "123456" / "sip" / "reports" / "Object.xml")

    db_museum_object = MuseumObject(id=123456,
                                    created_date=TEST_DATE,
                                    modified_date=TEST_DATE)
    db_museum_package = MuseumPackage(
        sip_filename="20190102_Object_123456-AABBCC2.tar",
        sip_id="AABBCC2",
        object_modified_date=TEST_DATE,
        downloaded=True,
        packaged=True,
        uploaded=True,
        museum_object=db_museum_object)
    db_museum_object.latest_package = db_museum_package

    session.add(db_museum_object)
    session.commit()

    with freezegun.freeze_time("2019-06-01"):
        result = sync_processed_sips(["--days", "7"])

    assert "Found 2 on 2019-05-28" in result.stdout
    assert "Found 2 accepted SIPs" in result.stdout
    assert "Found 0 rejected SIPs" in result.stdout

    # Ingest reports are downloaded
    assert museum_packages_dir.joinpath(
        "123456", "logs", "ingest-report.xml").read_text(
        ) == "<xml><content>New report</content></xml>"
    assert museum_packages_dir.joinpath(
        "123456", "logs", "ingest-report.html").read_text(
        ) == "<html><body>New report</body></html>"

    # Museum package is updated
    db_museum_package = session.query(MuseumPackage).filter_by(
        sip_filename="20190102_Object_123456-AABBCC2.tar").one()
    assert db_museum_package.preserved

    # Status file is created
    assert (
        museum_packages_dir / "123456" /
        "20190102_Object_123456-AABBCC2.tar.status").read_text() == "accepted"

    # RQ task is enqueued
    queue = get_queue(QueueType.CONFIRM_SIP)
    job = queue.jobs[0]
    assert job.id == "confirm_sip_123456"
    assert job.kwargs == {"object_id": 123456, "sip_id": "AABBCC2"}
    def func(**kwargs):
        museum_package = MuseumPackage(**kwargs)
        session.add(museum_package)
        session.commit()

        return museum_package