def test_group_id_derived(self): """ It should return the group ID for the derived from file. """ file_uuid = str(uuid.uuid4()) f = metsrw.FSEntry('level1.txt', file_uuid=file_uuid) derived = metsrw.FSEntry('level3.txt', file_uuid=str(uuid.uuid4()), derived_from=f) assert derived.group_id() == 'Group-' + file_uuid assert derived.group_id() == f.group_id()
def test_serialize_structmap_child_empty(self): """ It should handle children with no structMap entry. """ d = metsrw.FSEntry('dir', type='Directory') f = metsrw.FSEntry(use='deletion', file_uuid=str(uuid.uuid4())) d.add_child(f) el = d.serialize_structmap(recurse=True) assert el.tag == '{http://www.loc.gov/METS/}div' assert el.attrib['TYPE'] == 'Directory' assert el.attrib['LABEL'] == 'dir' assert len(el.attrib) == 2 assert len(el) == 0
def add_md_dir_to_structmap(sip): """Add the metadata directory to the structmap.""" md_dir = metsrw.FSEntry(path="metadata", use=None, type="Directory") sip.add_child(md_dir) # Add dataset.json to the fileSec output. fsentry = metsrw.FSEntry( path="metadata/dataset.json", use="metadata", file_uuid=str(uuid.uuid4()) ) # Add dataset.json to the metadata fileSec group. md_dir.add_child(fsentry) return sip
def test_filesec(self): o = metsrw.FSEntry('objects/file1.txt', file_uuid=str(uuid.uuid4())) p = metsrw.FSEntry('objects/file1-preservation.txt', use='preservaton', file_uuid=str(uuid.uuid4())) o2 = metsrw.FSEntry('objects/file2.txt', file_uuid=str(uuid.uuid4())) mw = metsrw.METSDocument() element = mw._filesec([o, p, o2]) assert isinstance(element, etree._Element) assert element.tag == '{http://www.loc.gov/METS/}fileSec' assert len(element) == 2 # 2 groups assert element[0].tag == '{http://www.loc.gov/METS/}fileGrp' assert element[0].get('USE') == 'original' assert element[1].tag == '{http://www.loc.gov/METS/}fileGrp' assert element[1].get('USE') == 'preservaton'
def test_serialize_structmap_no_recurse(self): """ It should produce a mets:div element. It should have a TYPE and LABEL. It should not have children. """ d = metsrw.FSEntry('dir', type='Directory') f = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4())) d.add_child(f) el = d.serialize_structmap(recurse=False) assert el.tag == '{http://www.loc.gov/METS/}div' assert el.attrib['TYPE'] == 'Directory' assert el.attrib['LABEL'] == 'dir' assert len(el) == 0
def create_test_pointer_file(self): # 1. Get the PREMIS events and object as premisrw class instances. compression_event = premisrw.PREMISEvent(data=c.EX_COMPR_EVT) events = [compression_event] _, compression_program_version, archive_tool = ( compression_event.compression_details) premis_object = premisrw.PREMISObject( xsi_type=c.EX_PTR_XSI_TYPE, identifier_value=c.EX_PTR_IDENTIFIER_VALUE, message_digest_algorithm=c.EX_PTR_MESSAGE_DIGEST_ALGORITHM, message_digest=c.EX_PTR_MESSAGE_DIGEST, size=c.EX_PTR_SIZE, format_name=c.EX_PTR_FORMAT_NAME, format_registry_key=c.EX_PTR_FORMAT_REGISTRY_KEY, creating_application_name=archive_tool, creating_application_version=compression_program_version, date_created_by_application=c.EX_PTR_DATE_CREATED_BY_APPLICATION) transform_files = compression_event.get_decompression_transform_files() # 2. Construct the METS pointer file mw = metsrw.METSDocument() mets_fs_entry = metsrw.FSEntry(path=c.EX_PTR_PATH, file_uuid=c.EX_PTR_IDENTIFIER_VALUE, use=c.EX_PTR_PACKAGE_TYPE, type=c.EX_PTR_PACKAGE_TYPE, transform_files=transform_files, mets_div_type=c.EX_PTR_AIP_SUBTYPE) mets_fs_entry.add_premis_object(premis_object.serialize()) for event in events: mets_fs_entry.add_premis_event(event.serialize()) for agent in [c.EX_AGT_1, c.EX_AGT_2]: mets_fs_entry.add_premis_agent(premisrw.data_to_premis(agent)) mw.append_file(mets_fs_entry) return mw
def test_collect_mdsec_elements(self): f1 = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4())) f1.amdsecs.append(metsrw.AMDSec()) f1.dmdsecs.append(metsrw.SubSection('dmdSec', None)) f2 = metsrw.FSEntry('file2.txt', file_uuid=str(uuid.uuid4())) f2.dmdsecs.append(metsrw.SubSection('dmdSec', None)) mw = metsrw.METSDocument() elements = mw._collect_mdsec_elements([f1, f2]) # Check ordering - dmdSec before amdSec assert isinstance(elements, list) assert len(elements) == 3 assert isinstance(elements[0], metsrw.SubSection) assert elements[0].subsection == 'dmdSec' assert isinstance(elements[1], metsrw.SubSection) assert elements[1].subsection == 'dmdSec' assert isinstance(elements[2], metsrw.AMDSec)
def test_serialize_filesec_no_use(self): """ It should not produce a mets:file element. """ f = metsrw.FSEntry('file1.txt', use=None, file_uuid=str(uuid.uuid4())) el = f.serialize_filesec() assert el is None
def test_add_metadata_to_fsentry(self): f1 = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4())) f1.add_dublin_core('<dc />') assert f1.dmdsecs assert len(f1.dmdsecs) == 1 assert f1.dmdsecs[0].subsection == 'dmdSec' assert f1.dmdsecs[0].contents.mdtype == 'DC' # Can only have 1 amdSec, so subsequent subsections are children of AMDSec f1.add_premis_object('<premis>object</premis>') assert f1.amdsecs assert f1.amdsecs[0].subsections assert f1.amdsecs[0].subsections[0].subsection == 'techMD' assert f1.amdsecs[0].subsections[0].contents.mdtype == 'PREMIS:OBJECT' f1.add_premis_event('<premis>event</premis>') assert f1.amdsecs[0].subsections[1].subsection == 'digiprovMD' assert f1.amdsecs[0].subsections[1].contents.mdtype == 'PREMIS:EVENT' f1.add_premis_agent('<premis>agent</premis>') assert f1.amdsecs[0].subsections[2].subsection == 'digiprovMD' assert f1.amdsecs[0].subsections[2].contents.mdtype == 'PREMIS:AGENT' f1.add_premis_rights('<premis>rights</premis>') assert f1.amdsecs[0].subsections[3].subsection == 'rightsMD' assert f1.amdsecs[0].subsections[3].contents.mdtype == 'PREMIS:RIGHTS' assert len(f1.amdsecs[0].subsections) == 4
def test_record_backlog_event(tmp_path): transfer = Transfer.objects.create(uuid="756db89c-1380-459d-83bc-d3772f1e7dd8") user = User.objects.create(id=1) transfer.update_active_agent(user_id=user.id) file_obj = File.objects.create( uuid="3c567bc8-0847-4d12-a77d-0ed3a0361c0a", transfer=transfer ) # ``_record_backlog_event`` expects the METS file to exist already. # We're creating one with a single file in it. (tmp_path / "metadata/submissionDocumentation").mkdir(parents=True) mets_path = str(tmp_path / "metadata/submissionDocumentation/METS.xml") mets = metsrw.METSDocument() mets.append_file( metsrw.FSEntry( path="foobar.jpg", label="foobar", type="Item", file_uuid=file_obj.uuid ) ) mets.write(mets_path, pretty_print=True) move_to_backlog._record_backlog_event(transfer.uuid, str(tmp_path), "2019-03-12") # Load METS document again and test that the file has a PREMIS event. mets = metsrw.METSDocument().fromfile(mets_path) fsentry = next(iter(mets.all_files())) premis_events = fsentry.get_premis_events() assert len(premis_events) == 1 assert premis_events[0].event_type == "placement in backlog" assert premis_events[0].event_date_time == "2019-03-12"
def __init__(self, root_path, db_base_path, transfer): self.root_path = root_path self.db_base_path = db_base_path self.transfer = transfer self.root_node = metsrw.FSEntry(path=os.path.basename(root_path), type="Directory") self.file_index = {} self.dir_index = {}
def test_files(self): # Test collects several children deep f3 = metsrw.FSEntry('level3.txt', file_uuid=str(uuid.uuid4())) d2 = metsrw.FSEntry('dir2', type='Directory', children=[f3]) f2 = metsrw.FSEntry('level2.txt', file_uuid=str(uuid.uuid4())) d1 = metsrw.FSEntry('dir1', type='Directory', children=[d2, f2]) f1 = metsrw.FSEntry('level1.txt', file_uuid=str(uuid.uuid4())) d = metsrw.FSEntry('root', type='Directory', children=[d1, f1]) mw = metsrw.METSDocument() mw.append_file(d) files = mw.all_files() assert files assert len(files) == 6 assert d in files assert f1 in files assert d1 in files assert f2 in files assert d2 in files assert f3 in files f4_uuid = str(uuid.uuid4()) f4 = metsrw.FSEntry('file4.txt', file_uuid=f4_uuid) mw.append_file(f4) files = mw.all_files() assert len(files) == 7 assert f4 in files
def test_is_empty_dir(self): """It should be able to determine whether it is an empty directory.""" r = metsrw.FSEntry('dir', type='Directory') d1 = metsrw.FSEntry('dir', type='Directory') d2 = metsrw.FSEntry('dir', type='Directory') d1a = metsrw.FSEntry('dir', type='Directory') d2a = metsrw.FSEntry('dir', type='Directory') d2b = metsrw.FSEntry('dir', type='Directory') f = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4())) r.add_child(d1) r.add_child(d2) d1.add_child(d1a) d2.add_child(d2a) d2.add_child(d2b) d1a.add_child(f) assert d2a.is_empty_dir assert not d2a.children assert not d1a.is_empty_dir assert len(d1a.children) == 1 assert not d1.is_empty_dir assert not r.is_empty_dir assert not f.is_empty_dir # Directory d2 is an empty directory because it contains nothing but # empty directories. assert d2.is_empty_dir assert len(d2.children) == 2
def test_admids(self): """ It should return 0 or 1 amdSecs. """ f = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4())) assert len(f.admids) == 0 f.add_premis_object('<premis>object</premis>') assert len(f.admids) == 1 f.add_premis_event('<premis>event</premis>') # Can only have one amdSec assert len(f.admids) == 1
def test_serialize_filesec_not_item(self): """ It should not produce a mets:file element. """ f = metsrw.FSEntry('file1.txt', type='Directory', file_uuid=str(uuid.uuid4())) el = f.serialize_filesec() assert el is None
def test_remove_child(self): """ It should remove the child from the parent's children list. It should remove the parent from the child's parent link. """ d = metsrw.FSEntry('dir', type='Directory') f1 = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4())) f2 = metsrw.FSEntry('file2.txt', file_uuid=str(uuid.uuid4())) d.add_child(f1) d.add_child(f2) assert f1 in d.children assert f1.parent is d assert len(d.children) == 2 d.remove_child(f1) assert f1 not in d.children assert f1.parent is None assert len(d.children) == 1
def convert_dataverse_to_mets( job, unit_path, dataset_md_name="dataset.json", output_md_path=None, output_md_name=None, ): """Create a transfer METS file from a Dataverse's dataset.json file""" logger.info( "Convert Dataverse structure called with '%s' unit directory", unit_path ) json_metadata = load_md_and_return_json(unit_path, dataset_md_name) if json_metadata is None: raise ConvertDataverseError("Unable to the load Dataverse metadata file") dataset_md_latest = get_latest_version_metadata(json_metadata) if dataset_md_latest is None: raise ConvertDataverseError( "Unable to find the dataset metadata section from dataset.json" ) # If a dataset is restricted we may not have access to all the files. We # may also want to flag this dataset to the users of this service. We # can do this here and below. We do not yet know whether this microservice # should fail because we don't know how all datasets behave when some # restrictions are placed on them. contact_information = retrieve_terms_of_access(dataset_md_latest) # Create METS try: sip = metsrw.FSEntry( path="None", label=get_ddi_title(dataset_md_latest), use=None, type="Directory", ) except TypeError as err: citation_msg = ("Unable to gather citation data from dataset.json: %s", err) logger.error(citation_msg) raise ConvertDataverseError(citation_msg) sip = add_ddi_xml(job, sip, json_metadata, dataset_md_latest) if sip is None: raise ConvertDataverseError("Error creating SIP from Dataverse DDI") sip = add_metadata_ref(sip, dataset_md_name, "metadata/{}".format(dataset_md_name)) sip = add_dataset_files_to_md(job, sip, dataset_md_latest, contact_information) if sip is None: raise ConvertDataverseError("Error adding Dataset files to METS") # On success of the following two functions, the module will return None # to JobContext which expects non-zero as a failure code only. sip = add_md_dir_to_structmap(sip) write_mets_to_file(sip, unit_path, output_md_path, output_md_name)
def test_serialize_structmap_recurse(self): """ It should produce a mets:div element. It should have a TYPE and LABEL. It should have a child mets:div with the file. """ d = metsrw.FSEntry('dir', type='Directory') f = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4())) d.add_child(f) el = d.serialize_structmap(recurse=True) assert el.tag == '{http://www.loc.gov/METS/}div' assert el.attrib['TYPE'] == 'Directory' assert el.attrib['LABEL'] == 'dir' assert len(el) == 1 assert el[0].tag == '{http://www.loc.gov/METS/}div' assert el[0].attrib['TYPE'] == 'Item' assert el[0].attrib['LABEL'] == 'file1.txt' assert len(el[0]) == 1 assert el[0][0].tag == '{http://www.loc.gov/METS/}fptr' assert el[0][0].attrib['FILEID'].startswith('file-')
def test_add_child(self): """ It should add a new entry to the children list. It should add a parent link. It should handle duplicates. """ d = metsrw.FSEntry('dir', type='Directory') f = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4())) d.add_child(f) assert f in d.children assert len(d.children) == 1 assert f.parent is d d.add_child(f) assert f in d.children assert len(d.children) == 1 assert f.parent is d with pytest.raises(ValueError): f.add_child(d)
def build_tree(self, path, parent=None): dir_entries = sorted(os.scandir(path), key=lambda d: d.name) for dir_entry in dir_entries: entry_relative_path = os.path.relpath(dir_entry.path, start=self.root_path) if dir_entry.is_dir(): fsentry = metsrw.FSEntry(path=entry_relative_path, label=dir_entry.name, type="Directory") db_path = "".join( [self.db_base_path, entry_relative_path, os.path.sep]) self.dir_index[db_path] = fsentry self.build_tree(dir_entry.path, parent=fsentry) else: fsentry = metsrw.FSEntry(path=entry_relative_path, label=dir_entry.name, type="Item") db_path = "".join([self.db_base_path, entry_relative_path]) self.file_index[db_path] = fsentry parent.add_child(fsentry)
def test_add_file_to_child(self): # Test collects several children deep f2 = metsrw.FSEntry('level2.txt', file_uuid=str(uuid.uuid4())) d1 = metsrw.FSEntry('dir1', type='Directory', children=[f2]) f1 = metsrw.FSEntry('level1.txt', file_uuid=str(uuid.uuid4())) d = metsrw.FSEntry('root', type='Directory', children=[d1, f1]) mw = metsrw.METSDocument() mw.append_file(d) files = mw.all_files() assert files assert len(files) == 4 assert d in files assert f1 in files assert d1 in files assert f2 in files f3 = metsrw.FSEntry('level3.txt', file_uuid=str(uuid.uuid4())) d1.add_child(f3) files = mw.all_files() assert len(files) == 5 assert f3 in files
def test_remove_file(self): """ It should """ # Setup f3_uuid = str(uuid.uuid4()) f3 = metsrw.FSEntry('dir1/dir2/level3.txt', file_uuid=f3_uuid) d2 = metsrw.FSEntry('dir1/dir2', type='Directory', children=[f3]) f2_uuid = str(uuid.uuid4()) f2 = metsrw.FSEntry('dir1/level2.txt', file_uuid=f2_uuid) d1 = metsrw.FSEntry('dir1', type='Directory', children=[d2, f2]) f1_uuid = str(uuid.uuid4()) f1 = metsrw.FSEntry('level1.txt', file_uuid=f1_uuid) d = metsrw.FSEntry('root', type='Directory', children=[d1, f1]) mw = metsrw.METSDocument() mw.append_file(d) assert len(mw.all_files()) == 6 # Test remove file mw.remove_entry(f3) assert len(mw.all_files()) == 5 assert mw.get_file(file_uuid=f3_uuid) is None assert f3 not in d2.children assert f3 not in mw.all_files() # Test remove dir mw.remove_entry(d1) assert len(mw.all_files()) == 2 assert mw.get_file(path='dir1') is None assert d1 not in d.children assert d1 not in mw.all_files() assert f2 not in mw.all_files() assert d2 not in mw.all_files() assert f1 in d.children # Test remove root element mw.remove_entry(d) assert len(mw.all_files()) == 0
def test_serialize_filesec_no_path(self): """ It should produce a mets:file element. It should not have a child mets:FLocat. """ file_uuid = str(uuid.uuid4()) f = metsrw.FSEntry(file_uuid=file_uuid, use='deletion') el = f.serialize_filesec() assert el.tag == '{http://www.loc.gov/METS/}file' assert el.attrib['ID'] == 'file-' + file_uuid assert el.attrib['GROUPID'] == 'Group-' + file_uuid assert len(el.attrib) == 2 assert len(el) == 0
def test_structmap(self): """ It should create a structMap tag. It should have a div tag for the directory. It should have div tags for the children beneath the directory. It should not have div tags for deleted files (without label). """ children = [ metsrw.FSEntry('objects/file1.txt', file_uuid=str(uuid.uuid4())), metsrw.FSEntry('objects/file2.txt', file_uuid=str(uuid.uuid4())), ] parent = metsrw.FSEntry('objects', type='Directory', children=children) deleted_f = metsrw.FSEntry(use='deletion', file_uuid=str(uuid.uuid4())) writer = metsrw.METSDocument() writer.append_file(parent) writer.append_file(deleted_f) sm = writer._structmap() assert sm.tag == '{http://www.loc.gov/METS/}structMap' assert sm.attrib['TYPE'] == 'physical' assert sm.attrib['ID'] == 'structMap_1' assert sm.attrib['LABEL'] == 'Archivematica default' assert len(sm.attrib) == 3 assert len(sm) == 1 parent = sm[0] assert parent.tag == '{http://www.loc.gov/METS/}div' assert parent.attrib['LABEL'] == 'objects' assert parent.attrib['TYPE'] == 'Directory' assert len(parent.attrib) == 2 assert len(parent) == 2 assert parent[0].attrib['LABEL'] == 'file1.txt' assert parent[0].attrib['TYPE'] == 'Item' assert len(parent[0].attrib) == 2 assert parent[0].find('{http://www.loc.gov/METS/}fptr') is not None assert parent[1].attrib['LABEL'] == 'file2.txt' assert parent[1].attrib['TYPE'] == 'Item' assert len(parent[1].attrib) == 2 assert parent[1].find('{http://www.loc.gov/METS/}fptr') is not None
def test_serialize_structmap_file(self): """ It should produce a mets:div element. It should have a TYPE and LABEL. It should have a child mets:fptr element with FILEID. """ f = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4())) f.add_dublin_core('<dc />') el = f.serialize_structmap(recurse=False) assert el.tag == '{http://www.loc.gov/METS/}div' assert el.attrib['TYPE'] == 'Item' assert el.attrib['LABEL'] == 'file1.txt' assert len(el.attrib['DMDID'].split()) == 1 assert len(el) == 1 assert el[0].tag == '{http://www.loc.gov/METS/}fptr' assert el[0].attrib['FILEID'].startswith('file-')
def test_serialize_filesec_metadata(self): """ It should produce a mets:file element. It should have an ID attribute. It should have one ADMID. It should have a child mets:FLocat element with the path. """ f = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4())) f.add_premis_object('<premis>object</premis>') el = f.serialize_filesec() assert el.tag == '{http://www.loc.gov/METS/}file' assert el.attrib['ID'].startswith('file-') assert len(el.attrib['ADMID'].split()) == 1 assert len(el) == 1 assert el[0].tag == '{http://www.loc.gov/METS/}FLocat' assert el[0].attrib['LOCTYPE'] == 'OTHER' assert el[0].attrib['OTHERLOCTYPE'] == 'SYSTEM' assert el[0].attrib[ '{http://www.w3.org/1999/xlink}href'] == 'file1.txt'
def test_get_file(self): # Setup f3_uuid = str(uuid.uuid4()) f3 = metsrw.FSEntry('dir1/dir2/level3.txt', file_uuid=f3_uuid) d2 = metsrw.FSEntry('dir1/dir2', type='Directory', children=[f3]) f2_uuid = str(uuid.uuid4()) f2 = metsrw.FSEntry('dir1/level2.txt', file_uuid=f2_uuid) d1 = metsrw.FSEntry('dir1', type='Directory', children=[d2, f2]) f1_uuid = str(uuid.uuid4()) f1 = metsrw.FSEntry('level1.txt', file_uuid=f1_uuid) d = metsrw.FSEntry('root', type='Directory', children=[d1, f1]) mw = metsrw.METSDocument() mw.append_file(d) # Test # By UUID assert mw.get_file(file_uuid=f3_uuid) == f3 assert mw.get_file(file_uuid=f2_uuid) == f2 assert mw.get_file(file_uuid=f1_uuid) == f1 assert mw.get_file(file_uuid='does not exist') is None # By path assert mw.get_file(path='dir1/dir2/level3.txt') == f3 assert mw.get_file(path='dir1/dir2') == d2 assert mw.get_file(path='dir1/level2.txt') == f2 assert mw.get_file(path='dir1') == d1 assert mw.get_file(path='level1.txt') == f1 assert mw.get_file(path='does not exist') is None # By label assert mw.get_file(label='level3.txt') == f3 assert mw.get_file(label='dir2') == d2 assert mw.get_file(label='level2.txt') == f2 assert mw.get_file(label='dir1') == d1 assert mw.get_file(label='level1.txt') == f1 assert mw.get_file(label='does not exist') is None # By multiple assert mw.get_file(label='level3.txt', path='dir1/dir2/level3.txt') == f3 assert mw.get_file(label='dir2', type='Directory') == d2 assert mw.get_file(label='level2.txt', type='Item') == f2 assert mw.get_file(file_uuid=None, type='Item') is None # Updates list f4_uuid = str(uuid.uuid4()) f4 = metsrw.FSEntry('file4.txt', file_uuid=f4_uuid) mw.append_file(f4) assert mw.get_file(file_uuid=f4_uuid) == f4 assert mw.get_file(path='file4.txt') == f4
def create_dip_mets(aip_dir, aip_name, fsentries, mets, dip_mets_file): """Creates DIP METS file for AtoM/default upload.""" LOGGER.info("Creating DIP METS file for AtoM/default upload.") objects_entry = None for fsentry in fsentries: # Do not delete AIP entry if (fsentry.label == os.path.basename(aip_dir) and fsentry.type.lower() == "directory"): continue # Do not delete objects entry and save it for parenting if fsentry.label == "objects" and fsentry.type.lower() == "directory": objects_entry = fsentry continue # Delete all the others mets.remove_entry(fsentry) if not objects_entry: LOGGER.error("Could not find objects entry in METS file") return # Create new entry for ZIP file entry = metsrw.FSEntry( label="{}.zip".format(aip_name), path="objects/{}.zip".format(aip_name), file_uuid=str(uuid.uuid4()), ) # Add new entry to objects directory objects_entry.add_child(entry) # Create DIP METS file try: mets.write(dip_mets_file, fully_qualified=True, pretty_print=True) except Exception: LOGGER.error("Could not create DIP METS file") return
def test_serialize_filesec_basic(self): """ It should produce a mets:file element. It should have an ID attribute. It should not have ADMIDs. It should have a child mets:FLocat element with the path. """ f = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4()), checksumtype='MD5', checksum='daa05c683a4913b268653f7a7e36a5b4') el = f.serialize_filesec() assert el.tag == '{http://www.loc.gov/METS/}file' assert el.attrib['ID'].startswith('file-') assert el.attrib['CHECKSUM'] == 'daa05c683a4913b268653f7a7e36a5b4' assert el.attrib['CHECKSUMTYPE'] == 'MD5' assert el.attrib.get('ADMID') is None assert len(el) == 1 assert el[0].tag == '{http://www.loc.gov/METS/}FLocat' assert el[0].attrib['LOCTYPE'] == 'OTHER' assert el[0].attrib['OTHERLOCTYPE'] == 'SYSTEM' assert el[0].attrib[ '{http://www.w3.org/1999/xlink}href'] == 'file1.txt'
def test_dependency_injection(self): """Test the dependency injection (DI) infrastructure for metsrw plugins. - client: metsrw.FSEntry - services: classes for reading and writing metadata elements, e.g., the PREMISObject class of metsrw.plugins.premisrw or other classes exposing the same interface. - injector: this test code or the code in metsrw/di.py which calls ``provide`` on the ``feature_broker`` singleton. The ``FSEntry`` class declares its dependency on the class attributes ``premis_object_class``, ``premis_event_class``, and ``premis_agent_class`` and further requires that these return classes with ``fromtree`` and ``serialize`` methods:: >>> premis_object_class = Dependency( ... has_methods('serialize'), ... has_class_methods('fromtree'), ... is_class) """ # Clear the feature broker and then register/provide the premisrw # plugin classes (services) with the feature broker. feature_broker = metsrw.feature_broker assert len(feature_broker) == 3 feature_broker.clear() assert not feature_broker feature_broker.provide('premis_object_class', premisrw.PREMISObject) feature_broker.provide('premis_event_class', premisrw.PREMISEvent) feature_broker.provide('premis_agent_class', premisrw.PREMISAgent) assert len(feature_broker) == 3 # Create premisrw instances. compression_premis_event = premisrw.PREMISEvent(data=EX_COMPR_EVT) premis_events = [compression_premis_event] premis_agents = [ premisrw.PREMISAgent(data=x) for x in [EX_AGT_1, EX_AGT_2] ] _, compression_program_version, archive_tool = ( compression_premis_event.compression_details) premis_object = premisrw.PREMISObject( xsi_type=EX_PTR_XSI_TYPE, identifier_value=EX_PTR_IDENTIFIER_VALUE, message_digest_algorithm=EX_PTR_MESSAGE_DIGEST_ALGORITHM, message_digest=EX_PTR_MESSAGE_DIGEST, size=EX_PTR_SIZE, format_name=EX_PTR_FORMAT_NAME, format_registry_key=EX_PTR_FORMAT_REGISTRY_KEY, creating_application_name=archive_tool, creating_application_version=compression_program_version, date_created_by_application=EX_PTR_DATE_CREATED_BY_APPLICATION) transform_files = compression_premis_event.get_decompression_transform_files( ) # Create metsrw ``METSDocument`` and ``FSEntry`` instances. mets_doc = metsrw.METSDocument() fs_entry = metsrw.FSEntry(path=EX_PTR_PATH, file_uuid=EX_PTR_IDENTIFIER_VALUE, use=EX_PTR_PACKAGE_TYPE, type=EX_PTR_PACKAGE_TYPE, transform_files=transform_files, mets_div_type=EX_PTR_AIP_SUBTYPE) mets_doc.append_file(fs_entry) # Use the ``add_premis_...`` methods to add the PREMIS metadata # elements to the ``FSEntry`` instance. This will assert that each # PREMIS instance is of the correct type (e.g., that ``premis_object`` # is an instance of ``FSEntry().premis_object_class``) and will call the # instance's ``serialize`` method and incorporate the resulting # ``lxml.etree._ElementTree`` instance into the ``FSEntry`` instance # appropriately. fs_entry.add_premis_object(premis_object) for premis_event in premis_events: fs_entry.add_premis_event(premis_event) for premis_agent in premis_agents: fs_entry.add_premis_agent(premis_agent) # Assert that the instances returned by the # ``FSEntry().get_premis_...`` methods are of the anticipated type. new_premis_agents = fs_entry.get_premis_agents() for new_premis_agent in new_premis_agents: assert isinstance(new_premis_agent, premisrw.PREMISAgent) assert new_premis_agent in premis_agents assert id(new_premis_agent) not in [id(pa) for pa in premis_agents] new_premis_events = fs_entry.get_premis_events() for new_premis_event in new_premis_events: assert isinstance(new_premis_event, premisrw.PREMISEvent) assert new_premis_event in premis_events assert id(new_premis_event) not in [id(pa) for pa in premis_events] new_premis_objects = fs_entry.get_premis_objects() for new_premis_object in new_premis_objects: assert isinstance(new_premis_object, premisrw.PREMISObject) assert new_premis_object == premis_object assert id(new_premis_object) is not premis_object # Assert that the resulting mets XML contains a # premis:objectIdentifierValue in the anticipated location in the # structure with the anticipated value. mets_doc_el = mets_doc.serialize() xpath = ('mets:amdSec/mets:techMD/mets:mdWrap[@MDTYPE="PREMIS:OBJECT"]' '/mets:xmlData/premis:object/premis:objectIdentifier/' 'premis:objectIdentifierValue') a = mets_doc_el.find(xpath, namespaces=metsrw.NAMESPACES) assert a.text == EX_PTR_IDENTIFIER_VALUE # Now change the feature broker so that ``FSEntry``'s dependency on a # ``premis_object_class`` class attribute is being fulfilled by a new # class: ``BetterPREMISObject``. feature_broker.provide('premis_object_class', BetterPREMISObject) # Now create a new PREMIS object premis_object_tree = premis_object.serialize() better_premis_object = BetterPREMISObject.fromtree(premis_object_tree) # And re-create the ``METSDocument`` and ``FSEntry`` instances. mets_doc = metsrw.METSDocument() fs_entry = metsrw.FSEntry(path=EX_PTR_PATH, file_uuid=EX_PTR_IDENTIFIER_VALUE, use=EX_PTR_PACKAGE_TYPE, type=EX_PTR_PACKAGE_TYPE, transform_files=transform_files, mets_div_type=EX_PTR_AIP_SUBTYPE) mets_doc.append_file(fs_entry) # Add the PREMIS metadata again, but this time use the instance of # ``BetterPREMISObject``. fs_entry.add_premis_object(better_premis_object) for premis_event in premis_events: fs_entry.add_premis_event(premis_event) for premis_agent in premis_agents: fs_entry.add_premis_agent(premis_agent) # Assert that the instances returned by the # ``FSEntry().get_premis_...`` methods are of the anticipated type. new_premis_objects = fs_entry.get_premis_objects() for new_premis_object in new_premis_objects: assert isinstance(new_premis_object, BetterPREMISObject) # Make sure we can still find the PREMIS object id value. mets_doc_el = mets_doc.serialize() assert (mets_doc_el.find( xpath, namespaces=metsrw.NAMESPACES).text == EX_PTR_IDENTIFIER_VALUE) # Reset the feature broker to its default state so subsequent tests # don't break. metsrw.set_feature_broker_to_default_state(feature_broker)