def update_dublincore(job, mets, sip_uuid): """ Add new dmdSec for updated Dublin Core info relating to entire SIP. Case: No DC in DB, no DC in METS: Do nothing Case: No DC in DB, DC in METS: Mark as deleted. Case: DC in DB is untouched (METADATA_STATUS_REINGEST): Do nothing Case: New DC in DB with METADATA_STATUS_ORIGINAL: Add new DC Case: DC in DB with METADATA_STATUS_UPDATED: mark old, create updated """ # Check for DC in DB with METADATA_STATUS_UPDATED or METADATA_STATUS_ORIGINAL untouched = models.DublinCore.objects.filter( metadataappliestoidentifier=sip_uuid, metadataappliestotype_id=createmets2.SIPMetadataAppliesToType, status=models.METADATA_STATUS_REINGEST, ).exists() if untouched: # No new or updated DC found - return early job.pyprint("No updated or new DC metadata found") return mets # Get structMap element related to SIP DC info objects_div = mets.get_file(label="objects", type="Directory") job.pyprint("Existing dmdIds for DC metadata:", objects_div.dmdids) # Create element dc_elem = createmets2.getDublinCore(createmets2.SIPMetadataAppliesToType, sip_uuid) if dc_elem is None: if objects_div.dmdsecs: job.pyprint("DC metadata was deleted") # Create 'deleted' DC element dc_elem = etree.Element( ns.dctermsBNS + "dublincore", nsmap={"dcterms": ns.dctermsNS, "dc": ns.dcNS}, ) dc_elem.set( ns.xsiBNS + "schemaLocation", ns.dctermsNS + " http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd", ) else: # No new or updated DC found - return early job.pyprint("No updated or new DC metadata found") return mets dmdsec = objects_div.add_dublin_core(dc_elem) job.pyprint("Adding new DC in dmdSec with ID", dmdsec.id_string) if len(objects_div.dmdsecs) > 1: objects_div.dmdsecs[-2].replace_with(dmdsec) return mets
def test_get_dublincore(self): """It should create a Dublin Core element from the database info.""" # Generate DC element from DB dc_elem = create_mets_v2.getDublinCore(self.siptypeuuid, self.sipuuid) # Verify created correctly assert dc_elem is not None assert dc_elem.tag == '{http://purl.org/dc/terms/}dublincore' assert len(dc_elem) == 15 assert dc_elem[0].tag == '{http://purl.org/dc/elements/1.1/}title' assert dc_elem[0].text == 'Yamani Weapons' assert dc_elem[1].tag == '{http://purl.org/dc/elements/1.1/}creator' assert dc_elem[1].text == 'Keladry of Mindelan' assert dc_elem[2].tag == '{http://purl.org/dc/elements/1.1/}subject' assert dc_elem[2].text == 'Glaives' assert dc_elem[3].tag == '{http://purl.org/dc/elements/1.1/}description' assert dc_elem[3].text == 'Glaives are cool' assert dc_elem[4].tag == '{http://purl.org/dc/elements/1.1/}publisher' assert dc_elem[4].text == 'Tortall Press' assert dc_elem[5].tag == '{http://purl.org/dc/elements/1.1/}contributor' assert dc_elem[5].text == 'Yuki' assert dc_elem[6].tag == '{http://purl.org/dc/elements/1.1/}date' assert dc_elem[6].text == '2015' assert dc_elem[7].tag == '{http://purl.org/dc/elements/1.1/}type' assert dc_elem[7].text == 'Archival Information Package' assert dc_elem[8].tag == '{http://purl.org/dc/elements/1.1/}format' assert dc_elem[8].text == 'parchement' assert dc_elem[9].tag == '{http://purl.org/dc/elements/1.1/}identifier' assert dc_elem[9].text == '42/1' assert dc_elem[10].tag == '{http://purl.org/dc/elements/1.1/}source' assert dc_elem[10].text == "Numair's library" assert dc_elem[11].tag == '{http://purl.org/dc/elements/1.1/}relation' assert dc_elem[11].text == 'None' assert dc_elem[12].tag == '{http://purl.org/dc/elements/1.1/}language' assert dc_elem[12].text == 'en' assert dc_elem[13].tag == '{http://purl.org/dc/elements/1.1/}rights' assert dc_elem[13].text == 'Public Domain' assert dc_elem[14].tag == '{http://purl.org/dc/terms/}isPartOf' assert dc_elem[14].text == 'AIC#42'
def test_get_dublincore(self): """It should create a Dublin Core element from the database info.""" # Generate DC element from DB dc_elem = create_mets_v2.getDublinCore(self.siptypeuuid, self.sipuuid) # Verify created correctly assert dc_elem is not None assert dc_elem.tag == "{http://purl.org/dc/terms/}dublincore" assert len(dc_elem) == 15 assert dc_elem[0].tag == "{http://purl.org/dc/elements/1.1/}title" assert dc_elem[0].text == "Yamani Weapons" assert dc_elem[1].tag == "{http://purl.org/dc/elements/1.1/}creator" assert dc_elem[1].text == "Keladry of Mindelan" assert dc_elem[2].tag == "{http://purl.org/dc/elements/1.1/}subject" assert dc_elem[2].text == "Glaives" assert dc_elem[3].tag == "{http://purl.org/dc/elements/1.1/}description" assert dc_elem[3].text == "Glaives are cool" assert dc_elem[4].tag == "{http://purl.org/dc/elements/1.1/}publisher" assert dc_elem[4].text == "Tortall Press" assert dc_elem[5].tag == "{http://purl.org/dc/elements/1.1/}contributor" assert dc_elem[5].text == "Yuki" assert dc_elem[6].tag == "{http://purl.org/dc/elements/1.1/}date" assert dc_elem[6].text == "2015" assert dc_elem[7].tag == "{http://purl.org/dc/elements/1.1/}type" assert dc_elem[7].text == "Archival Information Package" assert dc_elem[8].tag == "{http://purl.org/dc/elements/1.1/}format" assert dc_elem[8].text == "parchement" assert dc_elem[9].tag == "{http://purl.org/dc/elements/1.1/}identifier" assert dc_elem[9].text == "42/1" assert dc_elem[10].tag == "{http://purl.org/dc/elements/1.1/}source" assert dc_elem[10].text == "Numair's library" assert dc_elem[11].tag == "{http://purl.org/dc/elements/1.1/}relation" assert dc_elem[11].text == "None" assert dc_elem[12].tag == "{http://purl.org/dc/elements/1.1/}language" assert dc_elem[12].text == "en" assert dc_elem[13].tag == "{http://purl.org/dc/elements/1.1/}rights" assert dc_elem[13].text == "Public Domain" assert dc_elem[14].tag == "{http://purl.org/dc/terms/}isPartOf" assert dc_elem[14].text == "AIC#42"
def test_get_dublincore_none_found(self): """It should not create a Dublin Core element if no info found.""" sipuuid = 'dnednedn-5bd2-4249-84a1-2f00f725b981' dc_elem = create_mets_v2.getDublinCore(self.siptypeuuid, sipuuid) assert dc_elem is None
def create_mets_file(aic, aips, job): """ Create AIC METS file with AIP information. """ # Prepare constants nsmap = {"mets": ns.metsNS, "xlink": ns.xlinkNS, "xsi": ns.xsiNS} now = timezone.now().strftime("%Y-%m-%dT%H:%M:%S") # Set up structure E = ElementMaker(namespace=ns.metsNS, nsmap=nsmap) mets = E.mets( E.metsHdr(CREATEDATE=now), E.dmdSec(E.mdWrap(E.xmlData(), MDTYPE="DC"), ID="dmdSec_1"), # mdWrap # dmdSec E.fileSec(E.fileGrp()), E.structMap( E.div(TYPE="Archival Information Collection", DMDID="dmdSec_1"), TYPE="logical", # structMap ), ) mets.attrib["{{{ns}}}schemaLocation".format( ns=nsmap["xsi"] )] = "http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version1121/mets.xsd" # Add Dublin Core info xml_data = mets.find("mets:dmdSec/mets:mdWrap/mets:xmlData", namespaces=ns.NSMAP) dublincore = create_mets_v2.getDublinCore( create_mets_v2.SIPMetadataAppliesToType, aic["uuid"]) # Add <extent> with number of AIPs extent = etree.SubElement(dublincore, ns.dctermsBNS + "extent") extent.text = "{} AIPs".format(len(aips)) xml_data.append(dublincore) # Add elements for each AIP file_grp = mets.find("mets:fileSec/mets:fileGrp", namespaces=ns.NSMAP) struct_div = mets.find("mets:structMap/mets:div", namespaces=ns.NSMAP) for aip in aips: file_id = "{name}-{uuid}".format(name=aip["name"], uuid=aip["uuid"]) etree.SubElement(file_grp, ns.metsBNS + "file", ID=file_id) label = aip["label"] or aip["name"] div = etree.SubElement(struct_div, ns.metsBNS + "div", LABEL=label) etree.SubElement(div, ns.metsBNS + "fptr", FILEID=file_id) job.pyprint(etree.tostring(mets, pretty_print=True)) # Write out the file file_uuid = str(uuid.uuid4()) basename = os.path.join("metadata", "METS.{}.xml".format(file_uuid)) filename = os.path.join(aic["dir"], basename) with open(filename, "w") as f: f.write( etree.tostring(mets, pretty_print=True, xml_declaration=True, encoding="utf-8")) fileOperations.addFileToSIP( filePathRelativeToSIP="%SIPDirectory%" + basename, fileUUID=file_uuid, sipUUID=aic["uuid"], taskUUID=str(uuid.uuid4()), # Unsure what should go here date=now, sourceType="aip creation", use="metadata", ) # To make this work with the createMETS2 (for SIPs) databaseFunctions.insertIntoDerivations(file_uuid, file_uuid) # Insert the count of AIPs in the AIC into UnitVariables, so it can be # indexed later UnitVariable.objects.create( unittype="SIP", unituuid=aic["uuid"], variable="AIPsinAIC", variablevalue=str(len(aips)), )