コード例 #1
0
def test_record_backlog_event(tmp_path):
    transfer = Transfer.objects.create(uuid="756db89c-1380-459d-83bc-d3772f1e7dd8")
    user = User.objects.create(id=1)
    transfer.update_active_agent(user_id=user.id)
    file_obj = File.objects.create(
        uuid="3c567bc8-0847-4d12-a77d-0ed3a0361c0a", transfer=transfer
    )

    # ``_record_backlog_event`` expects the METS file to exist already.
    # We're creating one with a single file in it.
    (tmp_path / "metadata/submissionDocumentation").mkdir(parents=True)
    mets_path = str(tmp_path / "metadata/submissionDocumentation/METS.xml")
    mets = metsrw.METSDocument()
    mets.append_file(
        metsrw.FSEntry(
            path="foobar.jpg", label="foobar", type="Item", file_uuid=file_obj.uuid
        )
    )
    mets.write(mets_path, pretty_print=True)

    move_to_backlog._record_backlog_event(transfer.uuid, str(tmp_path), "2019-03-12")

    # Load METS document again and test that the file has a PREMIS event.
    mets = metsrw.METSDocument().fromfile(mets_path)
    fsentry = next(iter(mets.all_files()))
    premis_events = fsentry.get_premis_events()
    assert len(premis_events) == 1
    assert premis_events[0].event_type == "placement in backlog"
    assert premis_events[0].event_date_time == "2019-03-12"
コード例 #2
0
 def test_remove_file(self):
     """ It should """
     # Setup
     f3_uuid = str(uuid.uuid4())
     f3 = metsrw.FSEntry('dir1/dir2/level3.txt', file_uuid=f3_uuid)
     d2 = metsrw.FSEntry('dir1/dir2', type='Directory', children=[f3])
     f2_uuid = str(uuid.uuid4())
     f2 = metsrw.FSEntry('dir1/level2.txt', file_uuid=f2_uuid)
     d1 = metsrw.FSEntry('dir1', type='Directory', children=[d2, f2])
     f1_uuid = str(uuid.uuid4())
     f1 = metsrw.FSEntry('level1.txt', file_uuid=f1_uuid)
     d = metsrw.FSEntry('root', type='Directory', children=[d1, f1])
     mw = metsrw.METSDocument()
     mw.append_file(d)
     assert len(mw.all_files()) == 6
     # Test remove file
     mw.remove_entry(f3)
     assert len(mw.all_files()) == 5
     assert mw.get_file(file_uuid=f3_uuid) is None
     assert f3 not in d2.children
     assert f3 not in mw.all_files()
     # Test remove dir
     mw.remove_entry(d1)
     assert len(mw.all_files()) == 2
     assert mw.get_file(path='dir1') is None
     assert d1 not in d.children
     assert d1 not in mw.all_files()
     assert f2 not in mw.all_files()
     assert d2 not in mw.all_files()
     assert f1 in d.children
     # Test remove root element
     mw.remove_entry(d)
     assert len(mw.all_files()) == 0
コード例 #3
0
 def test_files(self):
     # Test collects several children deep
     f3 = metsrw.FSEntry('level3.txt', file_uuid=str(uuid.uuid4()))
     d2 = metsrw.FSEntry('dir2', type='Directory', children=[f3])
     f2 = metsrw.FSEntry('level2.txt', file_uuid=str(uuid.uuid4()))
     d1 = metsrw.FSEntry('dir1', type='Directory', children=[d2, f2])
     f1 = metsrw.FSEntry('level1.txt', file_uuid=str(uuid.uuid4()))
     d = metsrw.FSEntry('root', type='Directory', children=[d1, f1])
     mw = metsrw.METSDocument()
     mw.append_file(d)
     files = mw.all_files()
     assert files
     assert len(files) == 6
     assert d in files
     assert f1 in files
     assert d1 in files
     assert f2 in files
     assert d2 in files
     assert f3 in files
     f4_uuid = str(uuid.uuid4())
     f4 = metsrw.FSEntry('file4.txt', file_uuid=f4_uuid)
     mw.append_file(f4)
     files = mw.all_files()
     assert len(files) == 7
     assert f4 in files
コード例 #4
0
 def create_test_pointer_file(self):
     # 1. Get the PREMIS events and object as premisrw class instances.
     compression_event = premisrw.PREMISEvent(data=c.EX_COMPR_EVT)
     events = [compression_event]
     _, compression_program_version, archive_tool = (
         compression_event.compression_details)
     premis_object = premisrw.PREMISObject(
         xsi_type=c.EX_PTR_XSI_TYPE,
         identifier_value=c.EX_PTR_IDENTIFIER_VALUE,
         message_digest_algorithm=c.EX_PTR_MESSAGE_DIGEST_ALGORITHM,
         message_digest=c.EX_PTR_MESSAGE_DIGEST,
         size=c.EX_PTR_SIZE,
         format_name=c.EX_PTR_FORMAT_NAME,
         format_registry_key=c.EX_PTR_FORMAT_REGISTRY_KEY,
         creating_application_name=archive_tool,
         creating_application_version=compression_program_version,
         date_created_by_application=c.EX_PTR_DATE_CREATED_BY_APPLICATION)
     transform_files = compression_event.get_decompression_transform_files()
     # 2. Construct the METS pointer file
     mw = metsrw.METSDocument()
     mets_fs_entry = metsrw.FSEntry(path=c.EX_PTR_PATH,
                                    file_uuid=c.EX_PTR_IDENTIFIER_VALUE,
                                    use=c.EX_PTR_PACKAGE_TYPE,
                                    type=c.EX_PTR_PACKAGE_TYPE,
                                    transform_files=transform_files,
                                    mets_div_type=c.EX_PTR_AIP_SUBTYPE)
     mets_fs_entry.add_premis_object(premis_object.serialize())
     for event in events:
         mets_fs_entry.add_premis_event(event.serialize())
     for agent in [c.EX_AGT_1, c.EX_AGT_2]:
         mets_fs_entry.add_premis_agent(premisrw.data_to_premis(agent))
     mw.append_file(mets_fs_entry)
     return mw
コード例 #5
0
def write_mets_to_file(sip, unit_path, output_md_path, output_md_name):
    """Write METS to file."""
    metadata_path = output_md_path
    if metadata_path is None:
        metadata_path = os.path.join(unit_path, "metadata")
    if not os.path.exists(metadata_path):
        os.makedirs(metadata_path)

    metadata_name = output_md_name
    if metadata_name is None:
        metadata_name = "METS.xml"
    mets_path = os.path.join(metadata_path, metadata_name)

    # Write the data structure out to a file and ensure that the encoding is
    # purposely set to UTF-8. This pattern is used in ```create_mets_v2.py```.
    # Given the opportunity we should add an encoding feature to the metsrw
    # package.
    mets_f = metsrw.METSDocument()
    mets_f.append_file(sip)
    with open(mets_path, "w") as xml_file:
        xml_file.write(
            etree.tostring(
                mets_f.serialize(),
                pretty_print=True,
                encoding="utf-8",
                xml_declaration=True,
            )
        )
コード例 #6
0
def _record_backlog_event(transfer_id, transfer_path, created_at):
    """Record backlog event in both the database and the transfer METS."""
    mets_path = os.path.join(
        transfer_path, "metadata", "submissionDocumentation", "METS.xml"
    )
    mets = metsrw.METSDocument().fromfile(mets_path)

    # Run all_files once, convert into a dict for faster lookups.
    fsentries = {entry.file_uuid: entry for entry in mets.all_files()}

    # Assuming the same agents apply to all files.
    agents = _transfer_agents(transfer_id)

    for file_obj in File.objects.filter(transfer_id=transfer_id).iterator():
        try:
            fsentry = fsentries[file_obj.uuid]
        except KeyError:
            continue
        event_id, event_type = str(uuid.uuid4()), "placement in backlog"
        fsentry.add_premis_event(
            _premis_event_data(event_id, event_type, created_at, agents)
        )
        insertIntoEvents(
            fileUUID=file_obj.uuid,
            eventIdentifierUUID=event_id,
            eventType=event_type,
            eventDateTime=created_at,
            agents=agents,
        )

    mets.write(mets_path, pretty_print=True)
コード例 #7
0
    def test_full_mets(self):
        mw = metsrw.METSDocument()
        file1 = metsrw.FSEntry('objects/object1.ext', file_uuid=str(uuid.uuid4()))
        file2 = metsrw.FSEntry('objects/object2.ext', file_uuid=str(uuid.uuid4()))
        file1p = metsrw.FSEntry('objects/object1-preservation.ext', use='preservation', file_uuid=str(uuid.uuid4()), derived_from=file1)
        file2p = metsrw.FSEntry('objects/object2-preservation.ext', use='preservation', file_uuid=str(uuid.uuid4()), derived_from=file2)
        children = [file1, file2, file1p, file2p]
        objects = metsrw.FSEntry('objects', type='Directory', children=children)
        children = [
            metsrw.FSEntry('transfers', type='Directory', children=[]),
            metsrw.FSEntry('metadata/metadata.csv', use='metadata', file_uuid=str(uuid.uuid4())),
        ]
        metadata = metsrw.FSEntry('metadata', type='Directory', children=children)
        children = [
            metsrw.FSEntry('submissionDocumentation/METS.xml', use='submissionDocumentation', file_uuid=str(uuid.uuid4())),
        ]
        sub_doc = metsrw.FSEntry('submissionDocumentation', type='Directory', children=children)
        children = [objects, metadata, sub_doc]
        sip = metsrw.FSEntry('sipname-uuid', type='Directory', children=children)
        sip.add_dublin_core('<dublincore>sip metadata</dublincore>')
        file1.add_premis_object('<premis>object</premis>')
        file1.add_premis_event('<premis>event</premis>')
        file1.add_premis_agent('<premis>agent</premis>')
        rights = file1.add_premis_rights('<premis>rights</premis>')
        rights.replace_with(file1.add_premis_rights('<premis>newer rights</premis>'))
        dc = file1.add_dublin_core('<dublincore>metadata</dublincore>')
        dc.replace_with(file1.add_dublin_core('<dublincore>newer metadata</dublincore>'))

        mw.append_file(sip)
        mw.write('full_metsrw.xml', fully_qualified=True, pretty_print=True)
        os.remove('full_metsrw.xml')
コード例 #8
0
 def test_write(self):
     mw = metsrw.METSDocument()
     # mock serialize
     parser = etree.XMLParser(remove_blank_text=True)
     root = etree.parse('fixtures/complete_mets.xml', parser=parser).getroot()
     mw.serialize = lambda fully_qualified=True: root
     mw.write('test_write.xml', pretty_print=True)
     assert filecmp.cmp('fixtures/complete_mets.xml', 'test_write.xml', shallow=False)
     os.remove('test_write.xml')
コード例 #9
0
 def test_parse(self):
     """
     It should set the correct createdate.
     It should create FSEntrys for every file and directory.
     It should associate amdSec and dmdSec with the FSEntry.
     It should associated derived files.
     """
     mw = metsrw.METSDocument()
     parser = etree.XMLParser(remove_blank_text=True)
     root = etree.parse('fixtures/complete_mets.xml', parser=parser)
     mw.tree = root
     mw._parse_tree()
     assert mw.createdate == '2014-07-23T21:48:33'
     assert len(mw.all_files()) == 14
     assert mw.get_file(
         type='Directory',
         label='csv-48accdf3-e425-4874-aad3-67ade019a214') is not None
     parent = mw.get_file(type='Directory', label='objects')
     assert parent is not None
     f = mw.get_file(type='Item', label='Landing_zone.jpg')
     assert f is not None
     assert f.path == 'objects/Landing_zone.jpg'
     assert f.use == 'original'
     assert f.parent == parent
     assert f.children == []
     assert f.file_uuid == 'ab5c67fc-8f80-4e46-9f20-8d5ae29c43f2'
     assert f.derived_from is None
     assert f.admids == ['amdSec_1']
     assert f.dmdids == ['dmdSec_1']
     assert f.file_id() == 'file-ab5c67fc-8f80-4e46-9f20-8d5ae29c43f2'
     assert f.group_id() == 'Group-ab5c67fc-8f80-4e46-9f20-8d5ae29c43f2'
     f = mw.get_file(
         type='Item',
         label='Landing_zone-fc33fc0e-40ef-4ad9-ba52-860368e8ce5a.tif')
     assert f is not None
     assert f.path == 'objects/Landing_zone-fc33fc0e-40ef-4ad9-ba52-860368e8ce5a.tif'
     assert f.use == 'preservation'
     assert f.parent == parent
     assert f.children == []
     assert f.file_uuid == 'e284d015-cfb0-45dd-961d-512bf0f47cf6'
     assert f.derived_from == mw.get_file(type='Item',
                                          label='Landing_zone.jpg')
     assert f.admids == ['amdSec_2']
     assert f.dmdids == []
     assert f.file_id() == 'file-e284d015-cfb0-45dd-961d-512bf0f47cf6'
     assert f.group_id() == 'Group-ab5c67fc-8f80-4e46-9f20-8d5ae29c43f2'
     assert mw.get_file(type='Directory', label='metadata') is not None
     assert mw.get_file(type='Directory', label='transfers') is not None
     assert mw.get_file(
         type='Directory',
         label='csv-55599568-90bd-46ac-b1be-d1a538793cae') is not None
     assert mw.get_file(type='Directory',
                        label='submissionDocumentation') is not None
     assert mw.get_file(
         type='Directory',
         label='transfer-csv-55599568-90bd-46ac-b1be-d1a538793cae'
     ) is not None
コード例 #10
0
 def test_mets_header_lastmoddate(self):
     mw = metsrw.METSDocument()
     date = '2014-07-16T22:52:02.480108'
     new_date = '3014-07-16T22:52:02.480108'
     mw.createdate = date
     header = mw._mets_header(new_date)
     assert header.tag == '{http://www.loc.gov/METS/}metsHdr'
     assert header.attrib['CREATEDATE'] == date
     assert header.attrib['LASTMODDATE'] == new_date
     assert header.attrib['CREATEDATE'] < header.attrib['LASTMODDATE']
コード例 #11
0
    def test_parse_tree_no_createdate(self):
        mw = metsrw.METSDocument()
        mets_string = b"""<?xml version='1.0' encoding='ASCII'?>
<mets xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.loc.gov/METS/" xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version18/mets.xsd">
  <metsHdr/><structMap TYPE="physical"></structMap>
</mets>
"""
        root = etree.fromstring(mets_string)
        mw.tree = root
        mw._parse_tree()
        assert mw.createdate is None
コード例 #12
0
 def test_mets_root(self):
     mw = metsrw.METSDocument()
     root = mw._document_root()
     location = "http://www.loc.gov/METS/ " + \
         "http://www.loc.gov/standards/mets/version18/mets.xsd"
     assert root.tag == '{http://www.loc.gov/METS/}mets'
     assert root.attrib[metsrw.lxmlns('xsi') + 'schemaLocation'] == location
     nsmap = {
         'mets': "http://www.loc.gov/METS/",
         'xsi': "http://www.w3.org/2001/XMLSchema-instance",
         'xlink': "http://www.w3.org/1999/xlink",
     }
     assert root.nsmap == nsmap
コード例 #13
0
 def test_filesec(self):
     o = metsrw.FSEntry('objects/file1.txt', file_uuid=str(uuid.uuid4()))
     p = metsrw.FSEntry('objects/file1-preservation.txt', use='preservaton', file_uuid=str(uuid.uuid4()))
     o2 = metsrw.FSEntry('objects/file2.txt', file_uuid=str(uuid.uuid4()))
     mw = metsrw.METSDocument()
     element = mw._filesec([o, p, o2])
     assert isinstance(element, etree._Element)
     assert element.tag == '{http://www.loc.gov/METS/}fileSec'
     assert len(element) == 2  # 2 groups
     assert element[0].tag == '{http://www.loc.gov/METS/}fileGrp'
     assert element[0].get('USE') == 'original'
     assert element[1].tag == '{http://www.loc.gov/METS/}fileGrp'
     assert element[1].get('USE') == 'preservaton'
コード例 #14
0
 def test_collect_mdsec_elements(self):
     f1 = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4()))
     f1.amdsecs.append(metsrw.AMDSec())
     f1.dmdsecs.append(metsrw.SubSection('dmdSec', None))
     f2 = metsrw.FSEntry('file2.txt', file_uuid=str(uuid.uuid4()))
     f2.dmdsecs.append(metsrw.SubSection('dmdSec', None))
     mw = metsrw.METSDocument()
     elements = mw._collect_mdsec_elements([f1, f2])
     # Check ordering - dmdSec before amdSec
     assert isinstance(elements, list)
     assert len(elements) == 3
     assert isinstance(elements[0], metsrw.SubSection)
     assert elements[0].subsection == 'dmdSec'
     assert isinstance(elements[1], metsrw.SubSection)
     assert elements[1].subsection == 'dmdSec'
     assert isinstance(elements[2], metsrw.AMDSec)
コード例 #15
0
def write_mets(mets_path, transfer_dir_path, base_path_placeholder,
               transfer_uuid):
    """
    Writes a METS XML file to disk, containing all the data we can find.

    Args:
        mets_path: Output path for METS XML output
        transfer_dir_path: Location of the files on disk
        base_path_placeholder: The placeholder string for the base path, e.g. 'transferDirectory'
        identifier_group: The database column used to lookup file UUIDs, e.g. 'transfer_id'
        transfer_uuid: The UUID for the transfer
    """
    transfer_dir_path = os.path.expanduser(transfer_dir_path)
    transfer_dir_path = os.path.normpath(transfer_dir_path)

    db_base_path = rf"%{base_path_placeholder}%"

    mets = metsrw.METSDocument()
    mets.objid = str(transfer_uuid)

    instance_id = django_settings.INSTANCE_ID
    if instance_id:
        agent = metsrw.Agent(
            "CREATOR",
            type="SOFTWARE",
            name=str(instance_id),
            notes=["Archivematica dashboard UUID"],
        )
        mets.agents.append(agent)

    try:
        transfer = Transfer.objects.get(uuid=transfer_uuid)
    except Transfer.DoesNotExist:
        logger.debug("No record in database for transfer: %s", transfer_uuid)
        raise

    if transfer.accessionid:
        alt_record_id = metsrw.AltRecordID(transfer.accessionid,
                                           type="Accession ID")
        mets.alternate_ids.append(alt_record_id)

    fsentry_tree = FSEntriesTree(transfer_dir_path, db_base_path, transfer)
    fsentry_tree.scan()

    mets.append_file(fsentry_tree.root_node)
    mets.write(mets_path, pretty_print=True)
コード例 #16
0
 def test_get_file(self):
     # Setup
     f3_uuid = str(uuid.uuid4())
     f3 = metsrw.FSEntry('dir1/dir2/level3.txt', file_uuid=f3_uuid)
     d2 = metsrw.FSEntry('dir1/dir2', type='Directory', children=[f3])
     f2_uuid = str(uuid.uuid4())
     f2 = metsrw.FSEntry('dir1/level2.txt', file_uuid=f2_uuid)
     d1 = metsrw.FSEntry('dir1', type='Directory', children=[d2, f2])
     f1_uuid = str(uuid.uuid4())
     f1 = metsrw.FSEntry('level1.txt', file_uuid=f1_uuid)
     d = metsrw.FSEntry('root', type='Directory', children=[d1, f1])
     mw = metsrw.METSDocument()
     mw.append_file(d)
     # Test
     # By UUID
     assert mw.get_file(file_uuid=f3_uuid) == f3
     assert mw.get_file(file_uuid=f2_uuid) == f2
     assert mw.get_file(file_uuid=f1_uuid) == f1
     assert mw.get_file(file_uuid='does not exist') is None
     # By path
     assert mw.get_file(path='dir1/dir2/level3.txt') == f3
     assert mw.get_file(path='dir1/dir2') == d2
     assert mw.get_file(path='dir1/level2.txt') == f2
     assert mw.get_file(path='dir1') == d1
     assert mw.get_file(path='level1.txt') == f1
     assert mw.get_file(path='does not exist') is None
     # By label
     assert mw.get_file(label='level3.txt') == f3
     assert mw.get_file(label='dir2') == d2
     assert mw.get_file(label='level2.txt') == f2
     assert mw.get_file(label='dir1') == d1
     assert mw.get_file(label='level1.txt') == f1
     assert mw.get_file(label='does not exist') is None
     # By multiple
     assert mw.get_file(label='level3.txt',
                        path='dir1/dir2/level3.txt') == f3
     assert mw.get_file(label='dir2', type='Directory') == d2
     assert mw.get_file(label='level2.txt', type='Item') == f2
     assert mw.get_file(file_uuid=None, type='Item') is None
     # Updates list
     f4_uuid = str(uuid.uuid4())
     f4 = metsrw.FSEntry('file4.txt', file_uuid=f4_uuid)
     mw.append_file(f4)
     assert mw.get_file(file_uuid=f4_uuid) == f4
     assert mw.get_file(path='file4.txt') == f4
コード例 #17
0
    def test_add_file_to_child(self):
        # Test collects several children deep
        f2 = metsrw.FSEntry('level2.txt', file_uuid=str(uuid.uuid4()))
        d1 = metsrw.FSEntry('dir1', type='Directory', children=[f2])
        f1 = metsrw.FSEntry('level1.txt', file_uuid=str(uuid.uuid4()))
        d = metsrw.FSEntry('root', type='Directory', children=[d1, f1])
        mw = metsrw.METSDocument()
        mw.append_file(d)
        files = mw.all_files()
        assert files
        assert len(files) == 4
        assert d in files
        assert f1 in files
        assert d1 in files
        assert f2 in files

        f3 = metsrw.FSEntry('level3.txt', file_uuid=str(uuid.uuid4()))
        d1.add_child(f3)
        files = mw.all_files()
        assert len(files) == 5
        assert f3 in files
コード例 #18
0
    def test_structmap(self):
        """
        It should create a structMap tag.
        It should have a div tag for the directory.
        It should have div tags for the children beneath the directory.
        It should not have div tags for deleted files (without label).
        """
        children = [
            metsrw.FSEntry('objects/file1.txt', file_uuid=str(uuid.uuid4())),
            metsrw.FSEntry('objects/file2.txt', file_uuid=str(uuid.uuid4())),
        ]
        parent = metsrw.FSEntry('objects', type='Directory', children=children)
        deleted_f = metsrw.FSEntry(use='deletion', file_uuid=str(uuid.uuid4()))

        writer = metsrw.METSDocument()
        writer.append_file(parent)
        writer.append_file(deleted_f)
        sm = writer._structmap()

        assert sm.tag == '{http://www.loc.gov/METS/}structMap'
        assert sm.attrib['TYPE'] == 'physical'
        assert sm.attrib['ID'] == 'structMap_1'
        assert sm.attrib['LABEL'] == 'Archivematica default'
        assert len(sm.attrib) == 3
        assert len(sm) == 1
        parent = sm[0]
        assert parent.tag == '{http://www.loc.gov/METS/}div'
        assert parent.attrib['LABEL'] == 'objects'
        assert parent.attrib['TYPE'] == 'Directory'
        assert len(parent.attrib) == 2
        assert len(parent) == 2
        assert parent[0].attrib['LABEL'] == 'file1.txt'
        assert parent[0].attrib['TYPE'] == 'Item'
        assert len(parent[0].attrib) == 2
        assert parent[0].find('{http://www.loc.gov/METS/}fptr') is not None
        assert parent[1].attrib['LABEL'] == 'file2.txt'
        assert parent[1].attrib['TYPE'] == 'Item'
        assert len(parent[1].attrib) == 2
        assert parent[1].find('{http://www.loc.gov/METS/}fptr') is not None
コード例 #19
0
    def test_dependency_injection(self):
        """Test the dependency injection (DI) infrastructure for metsrw plugins.

        - client: metsrw.FSEntry
        - services: classes for reading and writing metadata elements, e.g.,
          the PREMISObject class of metsrw.plugins.premisrw or other classes
          exposing the same interface.
        - injector: this test code or the code in metsrw/di.py which calls
          ``provide`` on the ``feature_broker`` singleton.

        The ``FSEntry`` class declares its dependency on the class attributes
        ``premis_object_class``, ``premis_event_class``, and
        ``premis_agent_class`` and further requires that these return classes
        with ``fromtree`` and ``serialize`` methods::

            >>> premis_object_class = Dependency(
            ...     has_methods('serialize'),
            ...     has_class_methods('fromtree'),
            ...     is_class)

        """

        # Clear the feature broker and then register/provide the premisrw
        # plugin classes (services) with the feature broker.
        feature_broker = metsrw.feature_broker
        assert len(feature_broker) == 3
        feature_broker.clear()
        assert not feature_broker
        feature_broker.provide('premis_object_class', premisrw.PREMISObject)
        feature_broker.provide('premis_event_class', premisrw.PREMISEvent)
        feature_broker.provide('premis_agent_class', premisrw.PREMISAgent)
        assert len(feature_broker) == 3

        # Create premisrw instances.
        compression_premis_event = premisrw.PREMISEvent(data=EX_COMPR_EVT)
        premis_events = [compression_premis_event]
        premis_agents = [
            premisrw.PREMISAgent(data=x) for x in [EX_AGT_1, EX_AGT_2]
        ]
        _, compression_program_version, archive_tool = (
            compression_premis_event.compression_details)
        premis_object = premisrw.PREMISObject(
            xsi_type=EX_PTR_XSI_TYPE,
            identifier_value=EX_PTR_IDENTIFIER_VALUE,
            message_digest_algorithm=EX_PTR_MESSAGE_DIGEST_ALGORITHM,
            message_digest=EX_PTR_MESSAGE_DIGEST,
            size=EX_PTR_SIZE,
            format_name=EX_PTR_FORMAT_NAME,
            format_registry_key=EX_PTR_FORMAT_REGISTRY_KEY,
            creating_application_name=archive_tool,
            creating_application_version=compression_program_version,
            date_created_by_application=EX_PTR_DATE_CREATED_BY_APPLICATION)
        transform_files = compression_premis_event.get_decompression_transform_files(
        )

        # Create metsrw ``METSDocument`` and ``FSEntry`` instances.
        mets_doc = metsrw.METSDocument()
        fs_entry = metsrw.FSEntry(path=EX_PTR_PATH,
                                  file_uuid=EX_PTR_IDENTIFIER_VALUE,
                                  use=EX_PTR_PACKAGE_TYPE,
                                  type=EX_PTR_PACKAGE_TYPE,
                                  transform_files=transform_files,
                                  mets_div_type=EX_PTR_AIP_SUBTYPE)
        mets_doc.append_file(fs_entry)

        # Use the ``add_premis_...`` methods to add the PREMIS metadata
        # elements to the ``FSEntry`` instance. This will assert that each
        # PREMIS instance is of the correct type (e.g., that ``premis_object``
        # is an instance of ``FSEntry().premis_object_class``) and will call the
        # instance's ``serialize`` method and incorporate the resulting
        # ``lxml.etree._ElementTree`` instance into the ``FSEntry`` instance
        # appropriately.
        fs_entry.add_premis_object(premis_object)
        for premis_event in premis_events:
            fs_entry.add_premis_event(premis_event)
        for premis_agent in premis_agents:
            fs_entry.add_premis_agent(premis_agent)

        # Assert that the instances returned by the
        # ``FSEntry().get_premis_...`` methods are of the anticipated type.
        new_premis_agents = fs_entry.get_premis_agents()
        for new_premis_agent in new_premis_agents:
            assert isinstance(new_premis_agent, premisrw.PREMISAgent)
            assert new_premis_agent in premis_agents
            assert id(new_premis_agent) not in [id(pa) for pa in premis_agents]
        new_premis_events = fs_entry.get_premis_events()
        for new_premis_event in new_premis_events:
            assert isinstance(new_premis_event, premisrw.PREMISEvent)
            assert new_premis_event in premis_events
            assert id(new_premis_event) not in [id(pa) for pa in premis_events]
        new_premis_objects = fs_entry.get_premis_objects()
        for new_premis_object in new_premis_objects:
            assert isinstance(new_premis_object, premisrw.PREMISObject)
            assert new_premis_object == premis_object
            assert id(new_premis_object) is not premis_object

        # Assert that the resulting mets XML contains a
        # premis:objectIdentifierValue in the anticipated location in the
        # structure with the anticipated value.
        mets_doc_el = mets_doc.serialize()
        xpath = ('mets:amdSec/mets:techMD/mets:mdWrap[@MDTYPE="PREMIS:OBJECT"]'
                 '/mets:xmlData/premis:object/premis:objectIdentifier/'
                 'premis:objectIdentifierValue')
        a = mets_doc_el.find(xpath, namespaces=metsrw.NAMESPACES)
        assert a.text == EX_PTR_IDENTIFIER_VALUE

        # Now change the feature broker so that ``FSEntry``'s dependency on a
        # ``premis_object_class`` class attribute is being fulfilled by a new
        # class: ``BetterPREMISObject``.
        feature_broker.provide('premis_object_class', BetterPREMISObject)

        # Now create a new PREMIS object
        premis_object_tree = premis_object.serialize()
        better_premis_object = BetterPREMISObject.fromtree(premis_object_tree)

        # And re-create the ``METSDocument`` and ``FSEntry`` instances.
        mets_doc = metsrw.METSDocument()
        fs_entry = metsrw.FSEntry(path=EX_PTR_PATH,
                                  file_uuid=EX_PTR_IDENTIFIER_VALUE,
                                  use=EX_PTR_PACKAGE_TYPE,
                                  type=EX_PTR_PACKAGE_TYPE,
                                  transform_files=transform_files,
                                  mets_div_type=EX_PTR_AIP_SUBTYPE)
        mets_doc.append_file(fs_entry)

        # Add the PREMIS metadata again, but this time use the instance of
        # ``BetterPREMISObject``.
        fs_entry.add_premis_object(better_premis_object)
        for premis_event in premis_events:
            fs_entry.add_premis_event(premis_event)
        for premis_agent in premis_agents:
            fs_entry.add_premis_agent(premis_agent)

        # Assert that the instances returned by the
        # ``FSEntry().get_premis_...`` methods are of the anticipated type.
        new_premis_objects = fs_entry.get_premis_objects()
        for new_premis_object in new_premis_objects:
            assert isinstance(new_premis_object, BetterPREMISObject)

        # Make sure we can still find the PREMIS object id value.
        mets_doc_el = mets_doc.serialize()
        assert (mets_doc_el.find(
            xpath,
            namespaces=metsrw.NAMESPACES).text == EX_PTR_IDENTIFIER_VALUE)

        # Reset the feature broker to its default state so subsequent tests
        # don't break.
        metsrw.set_feature_broker_to_default_state(feature_broker)
コード例 #20
0
    def test_pointer_file(self):
        """Test the creation of pointer files."""

        # Mocks of the AIP, its compression event, and other details.
        aip_uuid = str(uuid.uuid4())
        aip = {
            'current_path': '/path/to/myaip-{}.7z'.format(aip_uuid),
            'uuid': aip_uuid,
            'package_type': 'Archival Information Package',
            'checksum_algorithm': 'sha256',
            'checksum': '78e4509313928d2964fe877a6a82f1ba728c171eedf696e3f5b0aed61ec547f6',
            'size': '11854',
            'extension': '.7z',
            'archive_tool': '7-Zip',
            'archive_tool_version': '9.20',
            'transform_files': [
                {'algorithm': 'bzip2',
                 'order': '2',
                 'type': 'decompression'},
                {'algorithm': 'gpg',
                 'order': '1',
                 'type': 'decryption'}
            ]
        }
        compression_event = {
            'uuid': str(uuid.uuid4()),
            'detail': (
                'program=7z; version=p7zip Version 9.20'
                ' (locale=en_US.UTF-8,Utf16=on,HugeFiles=on,2 CPUs)'),
            'outcome': '',
            # This should be the output from 7-zip or other...
            'outcome_detail_note': '',
            'agents': [
                {'name': 'Archivematica',
                 'type': 'software',
                 'identifier_type': 'preservation system',
                 'identifier_value': 'Archivematica-1.6.1'},
                {'name': 'test',
                 'type': 'organization',
                 'identifier_type': 'repository code',
                 'identifier_value': 'test'}
            ]
        }
        now = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
        pronom_conversion = {
            '.7z': {'puid': 'fmt/484', 'name': '7Zip format'},
            '.bz2': {'puid': 'x-fmt/268', 'name': 'BZIP2 Compressed Archive'},
        }

        # Create the METS using metsrw
        mw = metsrw.METSDocument()

        # TODO: metsrw will prefix "file-" to the AIP UUID when creating <mets:fptr
        # FILEID> and <mets:file ID> attr vals. However, we want "file-" to be
        # replaced by the AIP's (i.e., the transfer's) name.
        aip_fs_entry = metsrw.FSEntry(
            path=aip['current_path'],
            file_uuid=aip['uuid'],
            use=aip['package_type'],
            type=aip['package_type'],
            transform_files=aip['transform_files'])

        premis_schema_location = (
            'info:lc/xmlns/premis-v2'
            ' http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd')
        nsmap = {
            'mets': metsrw.NAMESPACES['mets'],
            'xsi': metsrw.NAMESPACES['xsi'],
            'xlink': metsrw.NAMESPACES['xlink'],
        }
        E_P = ElementMaker(namespace=metsrw.NAMESPACES['premis'],
                           nsmap={'premis': metsrw.NAMESPACES['premis']})

        # Create the AIP's PREMIS:OBJECT using raw lxml
        aip_premis_object = E_P.object(
            E_P.objectIdentifier(
                E_P.objectIdentifierType('UUID'),
                E_P.objectIdentifierValue(aip['uuid']),
            ),
            E_P.objectCharacteristics(
                E_P.compositionLevel('1'),
                E_P.fixity(
                    E_P.messageDigestAlgorithm(aip['checksum_algorithm']),
                    E_P.messageDigest(aip['checksum']),
                ),
                E_P.size(str(aip['size'])),
                E_P.format(
                    E_P.formatDesignation(
                        E_P.formatName(
                            pronom_conversion[aip['extension']]['name']),
                        E_P.formatVersion(),
                    ),
                    E_P.formatRegistry(
                        E_P.formatRegistryName('PRONOM'),
                        E_P.formatRegistryKey(
                            pronom_conversion[aip['extension']]['puid'])
                    ),
                ),
                E_P.creatingApplication(
                    E_P.creatingApplicationName(aip['archive_tool']),
                    E_P.creatingApplicationVersion(aip['archive_tool_version']),
                    E_P.dateCreatedByApplication(now),
                ),
            ),
            version='2.2',
        )
        aip_premis_object.attrib['{' + nsmap['xsi'] + '}type'] = 'premis:file'
        aip_premis_object.attrib['{' + nsmap['xsi'] + '}schemaLocation'] = (
            premis_schema_location)
        aip_fs_entry.add_premis_object(aip_premis_object)

        # Create the AIP's PREMIS:EVENT for the compression using raw lxml
        aip_premis_compression_event = E_P.event(
            E_P.eventIdentifier(
                E_P.eventIdentifierType('UUID'),
                E_P.eventIdentifierValue(compression_event['uuid']),
            ),
            E_P.eventType('compression'),
            E_P.eventDateTime(now),
            E_P.eventDetail(compression_event['detail']),
            E_P.eventOutcomeInformation(
                E_P.eventOutcome(compression_event['outcome']),
                E_P.eventOutcomeDetail(
                    E_P.eventOutcomeDetailNote(
                        compression_event['outcome_detail_note'])
                ),
            ),
            *[E_P.linkingAgentIdentifier(
                E_P.linkingAgentIdentifierType(ag['identifier_type']),
                E_P.linkingAgentIdentifierValue(ag['identifier_value']))
              for ag in compression_event['agents']],
            version='2.2'
        )
        aip_premis_compression_event.attrib[
            '{' + nsmap['xsi'] + '}schemaLocation'] = (premis_schema_location)
        aip_fs_entry.add_premis_event(aip_premis_compression_event)

        # Create the AIP's PREMIS:AGENTs using raw lxml
        for agent in compression_event['agents']:
            agent_el = E_P.agent(
                E_P.agentIdentifier(
                    E_P.agentIdentifierType(agent['identifier_type']),
                    E_P.agentIdentifierValue(agent['identifier_value'])
                ),
                E_P.agentName(agent['name']),
                E_P.agentType(agent['type'])
            )
            agent_el.attrib['{' + nsmap['xsi'] + '}schemaLocation'] = (
                premis_schema_location)
            aip_fs_entry.add_premis_agent(agent_el)

        # TODO: we need metsrw to be able to set transformFile elements.
        # compression - 7z or tar.bz2
        """
        if extension == '.7z':
            etree.SubElement(file_, namespaces.metsBNS + "transformFile",
                            TRANSFORMORDER='1',
                            TRANSFORMTYPE='decompression',
                            TRANSFORMALGORITHM=algorithm)
        elif extension == '.bz2':
            etree.SubElement(file_, namespaces.metsBNS + "transformFile",
                            TRANSFORMORDER='1',
                            TRANSFORMTYPE='decompression',
                            TRANSFORMALGORITHM='bzip2')
            etree.SubElement(file_, namespaces.metsBNS + "transformFile",
                            TRANSFORMORDER='2',
                            TRANSFORMTYPE='decompression',
                            TRANSFORMALGORITHM='tar')
        """

        mw.append_file(aip_fs_entry)
        self.assert_pointer_valid(mw.serialize())
コード例 #21
0
 def test_parse_no_groupid(self):
     """ It should handle files with no GROUPID. """
     mw = metsrw.METSDocument().fromfile(
         'fixtures/mets_without_groupid_in_file.xml')
     assert mw.get_file(
         file_uuid='db653873-d0ab-4bc1-9edb-2b6d2d84ab5a') is not None
コード例 #22
0
    def test_write_normative_structmap(self):
        """It should be able to write a normative logical structural map.
        """

        # Create the empty directory as an FSEntry and give it a simple PREMIS
        # object.
        d_empty = metsrw.FSEntry('EMPTY_DIR', type='Directory')
        d_empty_id = str(uuid.uuid4())
        d_empty_premis_object = PREMISObject(
            identifier_value=d_empty_id,
            premis_version='3.0',
            xsi_type='premis:intellectualEntity')
        d_empty.add_premis_object(d_empty_premis_object)

        # Create the parent directory of the empty directory and give it a
        # simple PREMIS object also.
        f3 = metsrw.FSEntry('level3.txt', file_uuid=str(uuid.uuid4()))
        d2 = metsrw.FSEntry('dir2', type='Directory', children=[f3, d_empty])
        d2_id = str(uuid.uuid4())
        d2_premis_object = PREMISObject(identifier_value=d2_id)
        d2.add_premis_object(d2_premis_object)

        # Create more directories and files and add the root of the dir
        # structure to a metsrw METSDocument instance.
        f2 = metsrw.FSEntry('level2.txt', file_uuid=str(uuid.uuid4()))
        d1 = metsrw.FSEntry('dir1', type='Directory', children=[d2, f2])
        f1 = metsrw.FSEntry('level1.txt', file_uuid=str(uuid.uuid4()))
        d = metsrw.FSEntry('root', type='Directory', children=[d1, f1])
        mw = metsrw.METSDocument()
        mw.append_file(d)

        # Expect to find all of our files and directories in the return value
        # of ``all_files``, including the empty directory.
        files = mw.all_files()
        assert files
        assert len(files) == 7
        assert d in files
        assert f1 in files
        assert d1 in files
        assert f2 in files
        assert d2 in files
        assert f3 in files
        assert d_empty in files

        # Get XPaths for the empty directory and its sister file. The file
        # should exist in both structmaps, the empty directory should only
        # exist in the normative logical structmap.
        exists_in_both_path = ('mets:div[@LABEL="root"]/'
                               'mets:div[@LABEL="dir1"]/'
                               'mets:div[@LABEL="dir2"]/'
                               'mets:div[@LABEL="level3.txt"]')
        exists_in_normative_only_path = ('mets:div[@LABEL="root"]/'
                                         'mets:div[@LABEL="dir1"]/'
                                         'mets:div[@LABEL="dir2"]/'
                                         'mets:div[@LABEL="EMPTY_DIR"]')

        # Expect that the empty directory is not in the physical structmap.
        structmap_el = mw._structmap()
        assert structmap_el.find(exists_in_both_path, metsrw.NAMESPACES) is not None
        assert structmap_el.find(exists_in_normative_only_path, metsrw.NAMESPACES) is None

        # Expect that the empty directory is in the normative logical structmap.
        normative_structmap_el = mw._normative_structmap()
        assert normative_structmap_el.find(
            exists_in_both_path, metsrw.NAMESPACES) is not None
        empty_div_el = normative_structmap_el.find(
            exists_in_normative_only_path, metsrw.NAMESPACES)
        assert empty_div_el is not None

        # Expect that the empty directory in the normative logical structmap
        # has a DMDID that references a dmdSec in the METS document and that
        # this dmdSec contains a PREMIS object.
        dmdid = empty_div_el.get('DMDID')
        assert dmdid.startswith('dmdSec_')
        doc = mw.serialize()
        empty_dir_dmd_sec = doc.find(
            'mets:dmdSec[@ID="{}"]'.format(dmdid), metsrw.NAMESPACES)
        assert empty_dir_dmd_sec is not None
        xml_data_el = empty_dir_dmd_sec.find(
            'mets:mdWrap/mets:xmlData', metsrw.NAMESPACES)
        premis_object_el_retrieved = xml_data_el.find(
            'premis:object', PREMIS_3_0_NAMESPACES)
        d_empty_id_retrieved = premis_object_el_retrieved.find(
            'premis:objectIdentifier/premis:objectIdentifierValue',
            PREMIS_3_0_NAMESPACES).text
        assert d_empty_id_retrieved == d_empty_id
        xsi_type = premis_object_el_retrieved.get(
            '{}type'.format(lxmlns('xsi', premis_version='3.0')))
        assert xsi_type == 'premis:intellectualEntity'
コード例 #23
0
 def test_mets_header(self):
     mw = metsrw.METSDocument()
     date = '2014-07-16T22:52:02.480108'
     header = mw._mets_header(date)
     assert header.tag == '{http://www.loc.gov/METS/}metsHdr'
     assert header.attrib['CREATEDATE'] == date
コード例 #24
0
 def test_parse_tree_createdate_too_new(self):
     mw = metsrw.METSDocument()
     root = etree.parse('fixtures/createdate_too_new.xml')
     mw.tree = root
     with pytest.raises(metsrw.ParseError):
         mw._parse_tree()