예제 #1
0
def parse_reingest_mets(transfer_uuid, transfer_path):
    # Parse METS to extract information needed by later microservices
    mets_path = find_mets_file(transfer_path)
    try:
        root = etree.parse(mets_path)
    except Exception:
        print('Error parsing reingest METS', mets_path, ' - skipping')
        logger.info('Error parsing reingest mets %s - skipping', mets_path, exc_info=True)
        return

    # Get SIP UUID from METS name
    sip_uuid = os.path.basename(mets_path).replace('METS.', '').replace('.xml', '')
    # Note: Because DublinCore and PREMIS rights are not database-level foreign keys, this works even though the SIP may not exist yet
    parse_mets_to_db.parse_dc(sip_uuid, root)
    parse_mets_to_db.parse_rights(sip_uuid, root)
예제 #2
0
 def test_no_sip_dc(self):
     """ It should ignore file-level DC. """
     sip_uuid = 'f35d2530-45eb-4eb1-aa09-fb30661e7dcd'
     root = etree.parse(os.path.join(THIS_DIR, 'fixtures', 'mets_only_file_dc.xml'))
     dc = parse_mets_to_db.parse_dc(sip_uuid, root)
     assert dc is None
     assert models.DublinCore.objects.filter(metadataappliestoidentifier=sip_uuid).exists() is False
예제 #3
0
 def test_none_found(self):
     """ It should parse no DC if none is found. """
     sip_uuid = 'd481580e-53b9-4a52-96db-baa969e78adc'
     root = etree.parse(os.path.join(THIS_DIR, 'fixtures', 'mets_no_metadata.xml'))
     dc = parse_mets_to_db.parse_dc(sip_uuid, root)
     assert dc is None
     assert models.DublinCore.objects.filter(metadataappliestoidentifier=sip_uuid).exists() is False
예제 #4
0
 def test_multiple_sip_dc(self):
     """ It should parse the most recent SIP DC if multiple exist. """
     sip_uuid = 'eacbf65f-2528-4be0-8cb3-532f45fcdff8'
     root = etree.parse(os.path.join(THIS_DIR, 'fixtures', 'mets_multiple_sip_dc.xml'))
     dc = parse_mets_to_db.parse_dc(sip_uuid, root)
     assert dc
     assert models.DublinCore.objects.filter(metadataappliestoidentifier=sip_uuid).exists()
     assert dc.title == 'Yamani Weapons'
     assert dc.creator == 'Keladry of Mindelan'
     assert dc.subject == 'Glaives'
     assert dc.description == 'Glaives are awesome'
     assert dc.publisher == 'Tortall Press'
     assert dc.contributor == 'Yuki'
     assert dc.date == '2014'
     assert dc.type == 'Archival Information Package'
     assert dc.format == 'palimpsest'
     assert dc.identifier == '42/1'
     assert dc.source == ''
     assert dc.relation == 'Everyone!'
     assert dc.language == 'en'
     assert dc.rights == 'Public Domain'
     assert dc.is_part_of == 'AIC#43'
예제 #5
0
 def test_get_sip_dc_ignore_file_dc(self):
     """ It should parse a SIP-level DC even if file-level DC is also present. """
     sip_uuid = '55972e97-8d35-4b07-abaa-ae260c32d261'
     root = etree.parse(os.path.join(THIS_DIR, 'fixtures', 'mets_sip_and_file_dc.xml'))
     dc = parse_mets_to_db.parse_dc(sip_uuid, root)
     assert dc
     assert models.DublinCore.objects.filter(metadataappliestoidentifier=sip_uuid).exists()
     assert dc.title == 'Yamani Weapons'
     assert dc.creator == 'Keladry of Mindelan'
     assert dc.subject == 'Glaives'
     assert dc.description == 'Glaives are cool'
     assert dc.publisher == 'Tortall Press'
     assert dc.contributor == 'Yuki'
     assert dc.date == '2014'
     assert dc.type == 'Archival Information Package'
     assert dc.format == 'parchement'
     assert dc.identifier == '42/1'
     assert dc.source == "Numair's library"
     assert dc.relation == 'None'
     assert dc.language == 'en'
     assert dc.rights == 'Public Domain'
     assert dc.is_part_of == 'AIC#43'
예제 #6
0
 def test_only_original(self):
     """ It should parse a SIP-level DC if found. """
     sip_uuid = 'eacbf65f-2528-4be0-8cb3-532f45fcdff8'
     root = etree.parse(os.path.join(THIS_DIR, 'fixtures', 'mets_sip_dc.xml'))
     dc = parse_mets_to_db.parse_dc(sip_uuid, root)
     assert dc
     assert models.DublinCore.objects.filter(metadataappliestoidentifier=sip_uuid).exists()
     assert dc.title == 'Yamani Weapons'
     assert dc.creator == 'Keladry of Mindelan'
     assert dc.subject == 'Glaives'
     assert dc.description == 'Glaives are cool'
     assert dc.publisher == 'Tortall Press'
     assert dc.contributor == 'Yuki'
     assert dc.date == '2014'
     assert dc.type == 'Archival Information Package'
     assert dc.format == 'parchement'
     assert dc.identifier == '42/1'
     assert dc.source == "Numair's library"
     assert dc.relation == 'None'
     assert dc.language == 'en'
     assert dc.rights == 'Public Domain'
     assert dc.is_part_of == 'AIC#43'