def test_get_dublincore_none_found(self): """It should not create a Dublin Core element if no info found.""" sipuuid = 'dnednedn-5bd2-4249-84a1-2f00f725b981' dc_elem = archivematicaCreateMETS2.getDublinCore( self.siptypeuuid, sipuuid) assert dc_elem is None
def update_dublincore(mets, sip_uuid): """ Add new dmdSec for updated Dublin Core info relating to entire SIP. Case: No DC in DB, no DC in METS: Do nothing Case: No DC in DB, DC in METS: Mark as deleted. Case: DC in DB is untouched (METADATA_STATUS_REINGEST): Do nothing Case: New DC in DB with METADATA_STATUS_ORIGINAL: Add new DC Case: DC in DB with METADATA_STATUS_UPDATED: mark old, create updated """ # Check for DC in DB with METADATA_STATUS_UPDATED or METADATA_STATUS_ORIGINAL untouched = models.DublinCore.objects.filter( metadataappliestoidentifier=sip_uuid, metadataappliestotype_id=createmets2.SIPMetadataAppliesToType, status=models.METADATA_STATUS_REINGEST).exists() if untouched: # No new or updated DC found - return early print('No updated or new DC metadata found') return mets # Get structMap element related to SIP DC info objects_div = mets.get_file(label='objects', type='Directory') print('Existing dmdIds for DC metadata:', objects_div.dmdids) # Create element dc_elem = createmets2.getDublinCore(createmets2.SIPMetadataAppliesToType, sip_uuid) if dc_elem is None: if objects_div.dmdsecs: print('DC metadata was deleted') # Create 'deleted' DC element dc_elem = etree.Element(ns.dctermsBNS + "dublincore", nsmap={ "dcterms": ns.dctermsNS, 'dc': ns.dcNS }) dc_elem.set( ns.xsiBNS + "schemaLocation", ns.dctermsNS + " http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd" ) else: # No new or updated DC found - return early print('No updated or new DC metadata found') return mets dmdsec = objects_div.add_dublin_core(dc_elem) print('Adding new DC in dmdSec with ID', dmdsec.id_string()) if len(objects_div.dmdsecs) > 1: objects_div.dmdsecs[-2].replace_with(dmdsec) return mets
def test_get_dublincore(self): sipuuid = '8b891d7c-5bd2-4249-84a1-2f00f725b981' siptypeuuid = '3e48343d-e2d2-4956-aaa3-b54d26eb9761' # Generate DC element from DB dc_elem = archivematicaCreateMETS2.getDublinCore(siptypeuuid, sipuuid) # Verify created correctly assert dc_elem assert dc_elem.tag == '{http://purl.org/dc/terms/}dublincore' assert len(dc_elem) == 15 assert dc_elem[0].tag == '{http://purl.org/dc/elements/1.1/}title' assert dc_elem[0].text == 'Yamani Weapons' assert dc_elem[1].tag == '{http://purl.org/dc/elements/1.1/}creator' assert dc_elem[1].text == 'Keladry of Mindelan' assert dc_elem[2].tag == '{http://purl.org/dc/elements/1.1/}subject' assert dc_elem[2].text == 'Glaives' assert dc_elem[ 3].tag == '{http://purl.org/dc/elements/1.1/}description' assert dc_elem[3].text == 'Glaives are cool' assert dc_elem[4].tag == '{http://purl.org/dc/elements/1.1/}publisher' assert dc_elem[4].text == 'Tortall Press' assert dc_elem[ 5].tag == '{http://purl.org/dc/elements/1.1/}contributor' assert dc_elem[5].text == 'Yuki' assert dc_elem[6].tag == '{http://purl.org/dc/elements/1.1/}date' assert dc_elem[6].text == '2015' assert dc_elem[7].tag == '{http://purl.org/dc/elements/1.1/}type' assert dc_elem[7].text == 'Archival Information Package' assert dc_elem[8].tag == '{http://purl.org/dc/elements/1.1/}format' assert dc_elem[8].text == 'parchement' assert dc_elem[9].tag == '{http://purl.org/dc/elements/1.1/}identifier' assert dc_elem[9].text == '42/1' assert dc_elem[10].tag == '{http://purl.org/dc/elements/1.1/}source' assert dc_elem[10].text == "Numair's library" assert dc_elem[11].tag == '{http://purl.org/dc/elements/1.1/}relation' assert dc_elem[11].text == 'None' assert dc_elem[12].tag == '{http://purl.org/dc/elements/1.1/}language' assert dc_elem[12].text == 'en' assert dc_elem[13].tag == '{http://purl.org/dc/elements/1.1/}rights' assert dc_elem[13].text == 'Public Domain' assert dc_elem[14].tag == '{http://purl.org/dc/terms/}isPartOf' assert dc_elem[14].text == 'AIC#42'
def test_get_dublincore(self): """It should create a Dublin Core element from the database info.""" # Generate DC element from DB dc_elem = archivematicaCreateMETS2.getDublinCore( self.siptypeuuid, self.sipuuid) # Verify created correctly assert dc_elem is not None assert dc_elem.tag == '{http://purl.org/dc/terms/}dublincore' assert len(dc_elem) == 15 assert dc_elem[0].tag == '{http://purl.org/dc/elements/1.1/}title' assert dc_elem[0].text == 'Yamani Weapons' assert dc_elem[1].tag == '{http://purl.org/dc/elements/1.1/}creator' assert dc_elem[1].text == 'Keladry of Mindelan' assert dc_elem[2].tag == '{http://purl.org/dc/elements/1.1/}subject' assert dc_elem[2].text == 'Glaives' assert dc_elem[ 3].tag == '{http://purl.org/dc/elements/1.1/}description' assert dc_elem[3].text == 'Glaives are cool' assert dc_elem[4].tag == '{http://purl.org/dc/elements/1.1/}publisher' assert dc_elem[4].text == 'Tortall Press' assert dc_elem[ 5].tag == '{http://purl.org/dc/elements/1.1/}contributor' assert dc_elem[5].text == 'Yuki' assert dc_elem[6].tag == '{http://purl.org/dc/elements/1.1/}date' assert dc_elem[6].text == '2015' assert dc_elem[7].tag == '{http://purl.org/dc/elements/1.1/}type' assert dc_elem[7].text == 'Archival Information Package' assert dc_elem[8].tag == '{http://purl.org/dc/elements/1.1/}format' assert dc_elem[8].text == 'parchement' assert dc_elem[9].tag == '{http://purl.org/dc/elements/1.1/}identifier' assert dc_elem[9].text == '42/1' assert dc_elem[10].tag == '{http://purl.org/dc/elements/1.1/}source' assert dc_elem[10].text == "Numair's library" assert dc_elem[11].tag == '{http://purl.org/dc/elements/1.1/}relation' assert dc_elem[11].text == 'None' assert dc_elem[12].tag == '{http://purl.org/dc/elements/1.1/}language' assert dc_elem[12].text == 'en' assert dc_elem[13].tag == '{http://purl.org/dc/elements/1.1/}rights' assert dc_elem[13].text == 'Public Domain' assert dc_elem[14].tag == '{http://purl.org/dc/terms/}isPartOf' assert dc_elem[14].text == 'AIC#42'
def test_get_dublincore_none_found(self): sipuuid = 'dnednedn-5bd2-4249-84a1-2f00f725b981' dc_elem = archivematicaCreateMETS2.getDublinCore(self.siptypeuuid, sipuuid) assert dc_elem is None
def create_mets_file(aic, aips): """ Create AIC METS file with AIP information. """ # Prepare constants nsmap = { 'mets': ns.metsNS, 'xlink': ns.xlinkNS, 'xsi': ns.xsiNS, } now = timezone.now().strftime("%Y-%m-%dT%H:%M:%S") # Set up structure E = ElementMaker(namespace=ns.metsNS, nsmap=nsmap) mets = ( E.mets( E.metsHdr(CREATEDATE=now), E.dmdSec( E.mdWrap( E.xmlData(), MDTYPE="DC", # mdWrap ), ID='dmdSec_1', # dmdSec ), E.fileSec(E.fileGrp(), ), E.structMap( E.div( TYPE="Archival Information Collection", DMDID="dmdSec_1", ), TYPE='logical', # structMap ), )) mets.attrib['{{{ns}}}schemaLocation'.format( ns=nsmap['xsi'] )] = "http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version18/mets.xsd" # Add Dublin Core info xml_data = mets.find('mets:dmdSec/mets:mdWrap/mets:xmlData', namespaces=ns.NSMAP) dublincore = archivematicaCreateMETS2.getDublinCore( archivematicaCreateMETS2.SIPMetadataAppliesToType, aic['uuid']) # Add <extent> with number of AIPs extent = etree.SubElement(dublincore, ns.dctermsBNS + 'extent') extent.text = "{} AIPs".format(len(aips)) xml_data.append(dublincore) # Add elements for each AIP file_grp = mets.find('mets:fileSec/mets:fileGrp', namespaces=ns.NSMAP) struct_div = mets.find('mets:structMap/mets:div', namespaces=ns.NSMAP) for aip in aips: file_id = '{name}-{uuid}'.format(name=aip['name'], uuid=aip['uuid']) etree.SubElement(file_grp, ns.metsBNS + 'file', ID=file_id) label = aip['label'] or aip['name'] div = etree.SubElement(struct_div, ns.metsBNS + 'div', LABEL=label) etree.SubElement(div, ns.metsBNS + 'fptr', FILEID=file_id) print etree.tostring(mets, pretty_print=True) # Write out the file file_uuid = str(uuid.uuid4()) basename = os.path.join('metadata', "METS.{}.xml".format(file_uuid)) filename = os.path.join(aic['dir'], basename) with open(filename, 'w') as f: f.write(etree.tostring(mets, pretty_print=True)) fileOperations.addFileToSIP( filePathRelativeToSIP='%SIPDirectory%' + basename, fileUUID=file_uuid, sipUUID=aic['uuid'], taskUUID=str(uuid.uuid4()), # Unsure what should go here date=now, sourceType="aip creation", use='metadata') # To make this work with the createMETS2 (for SIPs) databaseFunctions.insertIntoDerivations(file_uuid, file_uuid) # Insert the count of AIPs in the AIC into UnitVariables, so it can be # indexed later UnitVariable.objects.create(unittype="SIP", unituuid=aic['uuid'], variable="AIPsinAIC", variablevalue=str(len(aips)))
def update_dublincore(root, sip_uuid, now): """ Add new dmdSec for updated Dublin Core info relating to entire SIP. Case: No DC in DB: Do nothing Case: DC in DB is untouched (METADATA_STATUS_REINGEST): Do nothing Case: New DC in DB with METADATA_STATUS_ORIGINAL: Add new DC Case: DC in DB with METADATA_STATUS_UPDATED: mark old, create updated """ # Check for DC in DB with METADATA_STATUS_UPDATED updated = models.DublinCore.objects.filter( metadataappliestoidentifier=sip_uuid, metadataappliestotype_id=createmets2.SIPMetadataAppliesToType, status=models.METADATA_STATUS_UPDATED).exists() # If no updated DC, check for a newly added DC if not updated: new = models.DublinCore.objects.filter( metadataappliestoidentifier=sip_uuid, metadataappliestotype_id=createmets2.SIPMetadataAppliesToType, status=models.METADATA_STATUS_ORIGINAL).exists() if new: updated = False else: # No new or updated DC found - return early print('No updated or new DC metadata found') return root print('DC form metadata was updated:', updated) # Get structMap element related to SIP DC info objects_div = root.find( 'mets:structMap/mets:div[@TYPE="Directory"]/mets:div[@TYPE="Directory"][@LABEL="objects"]', namespaces=ns.NSMAP) ids = objects_div.get('DMDID', '') print('Existing dmdIds for DC metadata:', ids) # Create element dc_elem = createmets2.getDublinCore(createmets2.SIPMetadataAppliesToType, sip_uuid) count_dmdsecs = int(root.xpath('count(mets:dmdSec)', namespaces=ns.NSMAP)) dmdid = "dmdSec_%d" % (count_dmdsecs + 1 ) # DMDID should be larger than any existing one dmd_sec = etree.Element(ns.metsBNS + "dmdSec", ID=dmdid, CREATED=now) print('Adding new DC in dmdSec with ID', dmdid) if updated: dmd_sec.set('STATUS', 'updated') # Update old DC # Get dmdSecs associated with the SIP search_ids = ' or '.join(['@ID="%s"' % x for x in ids.split()]) dmdsecs = root.xpath('mets:dmdSec[%s]' % search_ids, namespaces=ns.NSMAP) # Set status=original if none for d in dmdsecs: # If no status (not updated), set to original status = d.get('STATUS') if not status: print(d.get('ID'), 'status set to original') d.set('STATUS', 'original') mdWrap = etree.SubElement(dmd_sec, ns.metsBNS + "mdWrap", MDTYPE="DC") xmlData = etree.SubElement(mdWrap, ns.metsBNS + "xmlData") xmlData.append(dc_elem) # Append to document try: add_after = root.findall('mets:dmdSec', namespaces=ns.NSMAP)[-1] except IndexError: add_after = root.find('mets:metsHdr', namespaces=ns.NSMAP) add_after.addnext(dmd_sec) # Update structMap ids = ids + ' ' + dmdid if ids else dmdid objects_div.set('DMDID', ids) return root
def test_get_dublincore_none_found(self): sipuuid = 'dnednedn-5bd2-4249-84a1-2f00f725b981' siptypeuuid = '3e48343d-e2d2-4956-aaa3-b54d26eb9761' dc_elem = archivematicaCreateMETS2.getDublinCore(siptypeuuid, sipuuid) assert dc_elem is None