def test_replicate_aip_when_file(self):
     space_dir = tempfile.mkdtemp(dir=self.tmp_dir, prefix="space")
     replication_dir = tempfile.mkdtemp(dir=self.tmp_dir,
                                        prefix="replication")
     aip = models.Package.objects.get(
         uuid="88deec53-c7dc-4828-865c-7356386e9399")
     aip.current_location.space.staging_path = space_dir
     aip.current_location.space.save()
     aip.current_location.replicators.create(
         space=aip.current_location.space,
         relative_path=replication_dir,
         purpose=models.Location.REPLICATOR,
     )
     aip.create_replicas()
     assert aip.replicas.count() == 1
     replica = aip.replicas.first()
     assert replica is not None
     assert replica.origin_pipeline == aip.origin_pipeline
     assert replica.replicas.count() == 0
     package_name = "working_bag.7z"
     dest_dir = os.path.join(replication_dir,
                             utils.uuid_to_path(replica.uuid))
     repl_file_path = os.path.join(replication_dir,
                                   utils.uuid_to_path(replica.uuid),
                                   package_name)
     assert package_name in os.listdir(dest_dir)
     assert os.path.isfile(repl_file_path)
 def test_replicate_aip_when_file(self):
     """Ensure that a replica can be created and its resulting
     properties are consistent for one of Archivematica's file-like
     AIP package types, e.g. 7z.
     """
     space_dir = tempfile.mkdtemp(dir=self.tmp_dir, prefix="space")
     replication_dir = tempfile.mkdtemp(dir=self.tmp_dir,
                                        prefix="replication")
     aip = models.Package.objects.get(
         uuid="88deec53-c7dc-4828-865c-7356386e9399")
     aip.current_location.space.staging_path = space_dir
     aip.current_location.space.save()
     aip.current_location.replicators.create(
         space=aip.current_location.space,
         relative_path=replication_dir,
         purpose=models.Location.REPLICATOR,
     )
     assert aip.replicas.count() == 0
     aip.create_replicas()
     assert aip.replicas.count() == 1
     replica = aip.replicas.first()
     assert replica is not None
     assert replica.origin_pipeline == aip.origin_pipeline
     assert replica.replicas.count() == 0
     package_name = "working_bag.7z"
     dest_dir = os.path.join(replication_dir,
                             utils.uuid_to_path(replica.uuid))
     repl_file_path = os.path.join(replication_dir,
                                   utils.uuid_to_path(replica.uuid),
                                   package_name)
     assert package_name in os.listdir(dest_dir)
     assert os.path.isfile(repl_file_path)
 def _test_bagit_structure(replica, replication_dir):
     """Ensure that the contents of a bag are consistent with the
     contents that were created during testing so that we know
     structure is preserved accurately.
     """
     bag_contents = [
         "tagmanifest-md5.txt",
         "bagit.txt",
         "manifest-md5.txt",
         "bag-info.txt",
         os.path.join("data", "test.txt"),
     ]
     expected_bag_path = os.path.join(replication_dir,
                                      utils.uuid_to_path(replica.uuid),
                                      "working_bag")
     expected_bagit_structure = [
         os.path.join(expected_bag_path, bag_path)
         for bag_path in bag_contents
     ]
     found_structure = []
     for subdir, _, files in os.walk(replica.current_location.full_path):
         for file_ in files:
             found_structure.append(os.path.join(subdir, file_))
     assert set(found_structure) == set(
         expected_bagit_structure), "unexpected bag structure found:"
 def _test_bagit_structure(replica, replication_dir):
     """Ensure the replicated bagit structure remains."""
     bag_contents = [
         "tagmanifest-md5.txt",
         "bagit.txt",
         "manifest-md5.txt",
         "bag-info.txt",
         os.path.join("data", "test.txt"),
     ]
     expected_bag_path = os.path.join(replication_dir,
                                      utils.uuid_to_path(replica.uuid),
                                      "working_bag")
     expected_bagit_structure = [
         os.path.join(expected_bag_path, bag_path)
         for bag_path in bag_contents
     ]
     for subdir, _, files in os.walk(replica.current_location.full_path):
         for file_ in files:
             file_path = os.path.join(subdir, file_)
             if file_path not in expected_bagit_structure:
                 pytest.fail(
                     "Unexpected file in Bagit structure: {}".format(
                         file_path))
    def test_deletion_and_creation_of_replicas_uncompressed(self):
        """Ensure that an uncompressed package is created properly.
        Because replication seeks to also update the package we try
        adding new files, e.g. it could be through enabling
        normalization during partial-reingest. Through these tests we
        also verify some properties about those files which support the
        storage service's preservation functions.
        """
        PACKAGE = "working_bag"
        FILES = ["file_one", "file_two", "file_three"]
        AIP_LOC = "615103f0-0ee0-4a12-ba17-43192d1143ea"

        new_aip_store = self._create_mutable_fixture_for_replication(
            PACKAGE, FILES)
        self._point_location_at_on_disk_storage(AIP_LOC, new_aip_store)

        original_dir = os.path.join(new_aip_store, PACKAGE)

        space_dir = tempfile.mkdtemp(dir=self.tmp_dir, prefix="space")
        replication_dir = tempfile.mkdtemp(dir=self.tmp_dir,
                                           prefix="replication")
        aip = models.Package.objects.get(
            uuid="0d4e739b-bf60-4b87-bc20-67a379b28cea")
        aip.current_location.space.staging_path = space_dir
        aip.current_location.space.save()
        aip.current_location.replicators.create(
            space=aip.current_location.space,
            relative_path=replication_dir,
            purpose=models.Location.REPLICATOR,
        )

        # Make sure there is no existing date polluting the tests.
        assert aip.replicas.count() == 0

        # Create the replica and assert some properties about it and
        # the original AIP's relationships.
        aip.create_replicas()
        assert aip.replicas.count() == 1
        replica = aip.replicas.first()
        assert replica is not None
        assert replica.origin_pipeline == aip.origin_pipeline
        assert replica.replicas.count() == 0

        # Ensure that our first replication was created as expected.
        first_repl_uuid = replica.uuid
        first_expected_repl = os.path.join(replication_dir,
                                           utils.uuid_to_path(first_repl_uuid),
                                           PACKAGE)
        assert set(os.listdir(first_expected_repl)) == set(FILES)
        assert replica.status == models.Package.UPLOADED

        # Add some new data to our original package and create some
        # properties that we can then measure.
        FILE_TO_ADD = "new_normalization"
        DATA_TO_ADD = "new data"
        new_file = os.path.join(original_dir, FILE_TO_ADD)
        with open(new_file, "w") as normalize_example:
            normalize_example.write(DATA_TO_ADD)
        # Because we have a mutable store to play with, we can have
        # some more fun, so lets test preservation of dates during
        # replication.
        TEST_DATETIME = datetime.datetime(year=1970,
                                          month=1,
                                          day=1,
                                          hour=22,
                                          minute=13,
                                          second=0)
        DATE_FORMAT = "%Y-%m-%dT%H:%M:%S"
        DATE_STRING = "1970-01-01T22:13:00"
        mod_time = time.mktime(TEST_DATETIME.timetuple())
        os.utime(new_file, (mod_time, mod_time))

        # Create the replica and ensure the first one no-longer exists.
        aip.create_replicas()
        assert not os.path.isdir(first_expected_repl)

        # We're only creating a second version of a replica so last() is
        # available as a shortcut to get to it.
        replica = aip.replicas.last()
        second_repl_uuid = replica.uuid
        second_expected_repl = os.path.join(
            replication_dir, utils.uuid_to_path(second_repl_uuid), PACKAGE)

        # Ensure the replicated directory structure is what we expect.
        assert set(os.listdir(second_expected_repl)) == set(FILES +
                                                            [FILE_TO_ADD])

        # Make sure the replicated statuses are correct.
        assert aip.replicas.first().status == models.Package.DELETED
        assert aip.replicas.last().status == models.Package.UPLOADED

        new_replicated_file = os.path.join(second_expected_repl, FILE_TO_ADD)
        repl_file_timestamp = os.path.getmtime(new_replicated_file)

        # Ensure the timestamp is preserved.
        pretty_timestamp = datetime.datetime.fromtimestamp(
            repl_file_timestamp).strftime(DATE_FORMAT)
        assert pretty_timestamp == TEST_DATETIME.strftime(DATE_FORMAT)
        assert pretty_timestamp == DATE_STRING

        # Ensure data was copied as expected.
        assert os.path.getsize(new_replicated_file) == len(DATA_TO_ADD)
Ejemplo n.º 6
0
    def store_aip(self, origin_location, origin_path):
        """ Stores an AIP in the correct Location.

        Invokes different transfer mechanisms depending on what the source and
        destination Spaces are.  Checks if there is space in the Space and
        Location for the AIP, and raises a StorageException if not.  All sizes
        expected to be in bytes.
        """
        self.origin_location = origin_location
        self.origin_path = origin_path
        # TODO Move some of the procesing in archivematica
        # clientScripts/storeAIP to here?

        # Check if enough space on the space and location
        # All sizes expected to be in bytes
        src_space = self.origin_location.space
        dest_space = self.current_location.space
        self._check_quotas(dest_space, self.current_location)

        # Store AIP at
        # destination_location/uuid/split/into/chunks/destination_path
        uuid_path = utils.uuid_to_path(self.uuid)
        self.current_path = os.path.join(uuid_path, self.current_path)
        self.save()

        # Store AIP Pointer File at
        # internal_usage_location/uuid/split/into/chunks/pointer.uuid.xml
        if self.package_type in (Package.AIP, Package.AIC):
            self.pointer_file_location = Location.active.get(purpose=Location.STORAGE_SERVICE_INTERNAL)
            self.pointer_file_path = os.path.join(uuid_path, 'pointer.{}.xml'.format(self.uuid))
            pointer_file_src = os.path.join(self.origin_location.relative_path, os.path.dirname(self.origin_path), 'pointer.xml')
            pointer_file_dst = os.path.join(self.pointer_file_location.relative_path, self.pointer_file_path)

        self.status = Package.PENDING
        self.save()

        # Move pointer file
        if self.package_type in (Package.AIP, Package.AIC):
            try:
                src_space.move_to_storage_service(pointer_file_src, self.pointer_file_path, self.pointer_file_location.space)
                self.pointer_file_location.space.move_from_storage_service(self.pointer_file_path, pointer_file_dst)
            except:
                LOGGER.warning("No pointer file found")
                self.pointer_file_location = None
                self.pointer_file_path = None
                self.save()

        # Move AIP
        src_space.move_to_storage_service(
            source_path=os.path.join(self.origin_location.relative_path, self.origin_path),
            destination_path=self.current_path,  # This should include Location.path
            destination_space=dest_space)
        self.status = Package.STAGING
        self.save()
        src_space.post_move_to_storage_service()

        dest_space.move_from_storage_service(
            source_path=self.current_path,  # This should include Location.path
            destination_path=os.path.join(self.current_location.relative_path, self.current_path),
        )
        # Update package status once transferred to SS
        if dest_space.access_protocol not in (Space.LOM, Space.ARKIVUM):
            self.status = Package.UPLOADED
        self.save()
        dest_space.post_move_from_storage_service(
            staging_path=self.current_path,
            destination_path=os.path.join(self.current_location.relative_path, self.current_path),
            package=self)

        self._update_quotas(dest_space, self.current_location)

        # Update pointer file's location information
        if self.pointer_file_path and self.package_type in (Package.AIP, Package.AIC):
            pointer_absolute_path = self.full_pointer_file_path
            root = etree.parse(pointer_absolute_path)
            element = root.find('.//mets:file', namespaces=utils.NSMAP)
            flocat = element.find('mets:FLocat', namespaces=utils.NSMAP)
            if self.uuid in element.get('ID', '') and flocat is not None:
                flocat.set('{{{ns}}}href'.format(ns=utils.NSMAP['xlink']), self.full_path)
            # Add USE="Archival Information Package" to fileGrp.  Required for
            # LOCKSS, and not provided in Archivematica <=1.1
            if root.find('.//mets:fileGrp[@USE="Archival Information Package"]', namespaces=utils.NSMAP) is not None:
                root.find('.//mets:fileGrp', namespaces=utils.NSMAP).set('USE', 'Archival Information Package')

            with open(pointer_absolute_path, 'w') as f:
                f.write(etree.tostring(root, pretty_print=True))