def parse_reingest_mets(transfer_uuid, transfer_path): # Parse METS to extract information needed by later microservices mets_path = find_mets_file(transfer_path) try: root = etree.parse(mets_path) except Exception: print('Error parsing reingest METS', mets_path, ' - skipping') logger.info('Error parsing reingest mets %s - skipping', mets_path, exc_info=True) return # Get SIP UUID from METS name sip_uuid = os.path.basename(mets_path).replace('METS.', '').replace('.xml', '') # Note: Because DublinCore and PREMIS rights are not database-level foreign keys, this works even though the SIP may not exist yet parse_mets_to_db.parse_dc(sip_uuid, root) parse_mets_to_db.parse_rights(sip_uuid, root)
def test_no_sip_dc(self): """ It should ignore file-level DC. """ sip_uuid = 'f35d2530-45eb-4eb1-aa09-fb30661e7dcd' root = etree.parse(os.path.join(THIS_DIR, 'fixtures', 'mets_only_file_dc.xml')) dc = parse_mets_to_db.parse_dc(sip_uuid, root) assert dc is None assert models.DublinCore.objects.filter(metadataappliestoidentifier=sip_uuid).exists() is False
def test_none_found(self): """ It should parse no DC if none is found. """ sip_uuid = 'd481580e-53b9-4a52-96db-baa969e78adc' root = etree.parse(os.path.join(THIS_DIR, 'fixtures', 'mets_no_metadata.xml')) dc = parse_mets_to_db.parse_dc(sip_uuid, root) assert dc is None assert models.DublinCore.objects.filter(metadataappliestoidentifier=sip_uuid).exists() is False
def test_multiple_sip_dc(self): """ It should parse the most recent SIP DC if multiple exist. """ sip_uuid = 'eacbf65f-2528-4be0-8cb3-532f45fcdff8' root = etree.parse(os.path.join(THIS_DIR, 'fixtures', 'mets_multiple_sip_dc.xml')) dc = parse_mets_to_db.parse_dc(sip_uuid, root) assert dc assert models.DublinCore.objects.filter(metadataappliestoidentifier=sip_uuid).exists() assert dc.title == 'Yamani Weapons' assert dc.creator == 'Keladry of Mindelan' assert dc.subject == 'Glaives' assert dc.description == 'Glaives are awesome' assert dc.publisher == 'Tortall Press' assert dc.contributor == 'Yuki' assert dc.date == '2014' assert dc.type == 'Archival Information Package' assert dc.format == 'palimpsest' assert dc.identifier == '42/1' assert dc.source == '' assert dc.relation == 'Everyone!' assert dc.language == 'en' assert dc.rights == 'Public Domain' assert dc.is_part_of == 'AIC#43'
def test_get_sip_dc_ignore_file_dc(self): """ It should parse a SIP-level DC even if file-level DC is also present. """ sip_uuid = '55972e97-8d35-4b07-abaa-ae260c32d261' root = etree.parse(os.path.join(THIS_DIR, 'fixtures', 'mets_sip_and_file_dc.xml')) dc = parse_mets_to_db.parse_dc(sip_uuid, root) assert dc assert models.DublinCore.objects.filter(metadataappliestoidentifier=sip_uuid).exists() assert dc.title == 'Yamani Weapons' assert dc.creator == 'Keladry of Mindelan' assert dc.subject == 'Glaives' assert dc.description == 'Glaives are cool' assert dc.publisher == 'Tortall Press' assert dc.contributor == 'Yuki' assert dc.date == '2014' assert dc.type == 'Archival Information Package' assert dc.format == 'parchement' assert dc.identifier == '42/1' assert dc.source == "Numair's library" assert dc.relation == 'None' assert dc.language == 'en' assert dc.rights == 'Public Domain' assert dc.is_part_of == 'AIC#43'
def test_only_original(self): """ It should parse a SIP-level DC if found. """ sip_uuid = 'eacbf65f-2528-4be0-8cb3-532f45fcdff8' root = etree.parse(os.path.join(THIS_DIR, 'fixtures', 'mets_sip_dc.xml')) dc = parse_mets_to_db.parse_dc(sip_uuid, root) assert dc assert models.DublinCore.objects.filter(metadataappliestoidentifier=sip_uuid).exists() assert dc.title == 'Yamani Weapons' assert dc.creator == 'Keladry of Mindelan' assert dc.subject == 'Glaives' assert dc.description == 'Glaives are cool' assert dc.publisher == 'Tortall Press' assert dc.contributor == 'Yuki' assert dc.date == '2014' assert dc.type == 'Archival Information Package' assert dc.format == 'parchement' assert dc.identifier == '42/1' assert dc.source == "Numair's library" assert dc.relation == 'None' assert dc.language == 'en' assert dc.rights == 'Public Domain' assert dc.is_part_of == 'AIC#43'