Exemple #1
0
def update_dublincore(job, mets, sip_uuid):
    """
    Add new dmdSec for updated Dublin Core info relating to entire SIP.

    Case: No DC in DB, no DC in METS: Do nothing
    Case: No DC in DB, DC in METS: Mark as deleted.
    Case: DC in DB is untouched (METADATA_STATUS_REINGEST): Do nothing
    Case: New DC in DB with METADATA_STATUS_ORIGINAL: Add new DC
    Case: DC in DB with METADATA_STATUS_UPDATED: mark old, create updated
    """

    # Check for DC in DB with METADATA_STATUS_UPDATED or METADATA_STATUS_ORIGINAL
    untouched = models.DublinCore.objects.filter(
        metadataappliestoidentifier=sip_uuid,
        metadataappliestotype_id=createmets2.SIPMetadataAppliesToType,
        status=models.METADATA_STATUS_REINGEST,
    ).exists()
    if untouched:
        # No new or updated DC found - return early
        job.pyprint("No updated or new DC metadata found")
        return mets

    # Get structMap element related to SIP DC info
    objects_div = mets.get_file(label="objects", type="Directory")
    job.pyprint("Existing dmdIds for DC metadata:", objects_div.dmdids)

    # Create element
    dc_elem = createmets2.getDublinCore(createmets2.SIPMetadataAppliesToType, sip_uuid)

    if dc_elem is None:
        if objects_div.dmdsecs:
            job.pyprint("DC metadata was deleted")
            # Create 'deleted' DC element
            dc_elem = etree.Element(
                ns.dctermsBNS + "dublincore",
                nsmap={"dcterms": ns.dctermsNS, "dc": ns.dcNS},
            )
            dc_elem.set(
                ns.xsiBNS + "schemaLocation",
                ns.dctermsNS
                + " http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd",
            )
        else:
            # No new or updated DC found - return early
            job.pyprint("No updated or new DC metadata found")
            return mets
    dmdsec = objects_div.add_dublin_core(dc_elem)
    job.pyprint("Adding new DC in dmdSec with ID", dmdsec.id_string)
    if len(objects_div.dmdsecs) > 1:
        objects_div.dmdsecs[-2].replace_with(dmdsec)

    return mets
    def test_get_dublincore(self):
        """It should create a Dublin Core element from the database info."""
        # Generate DC element from DB
        dc_elem = create_mets_v2.getDublinCore(self.siptypeuuid, self.sipuuid)

        # Verify created correctly
        assert dc_elem is not None
        assert dc_elem.tag == '{http://purl.org/dc/terms/}dublincore'
        assert len(dc_elem) == 15
        assert dc_elem[0].tag == '{http://purl.org/dc/elements/1.1/}title'
        assert dc_elem[0].text == 'Yamani Weapons'
        assert dc_elem[1].tag == '{http://purl.org/dc/elements/1.1/}creator'
        assert dc_elem[1].text == 'Keladry of Mindelan'
        assert dc_elem[2].tag == '{http://purl.org/dc/elements/1.1/}subject'
        assert dc_elem[2].text == 'Glaives'
        assert dc_elem[3].tag == '{http://purl.org/dc/elements/1.1/}description'
        assert dc_elem[3].text == 'Glaives are cool'
        assert dc_elem[4].tag == '{http://purl.org/dc/elements/1.1/}publisher'
        assert dc_elem[4].text == 'Tortall Press'
        assert dc_elem[5].tag == '{http://purl.org/dc/elements/1.1/}contributor'
        assert dc_elem[5].text == 'Yuki'
        assert dc_elem[6].tag == '{http://purl.org/dc/elements/1.1/}date'
        assert dc_elem[6].text == '2015'
        assert dc_elem[7].tag == '{http://purl.org/dc/elements/1.1/}type'
        assert dc_elem[7].text == 'Archival Information Package'
        assert dc_elem[8].tag == '{http://purl.org/dc/elements/1.1/}format'
        assert dc_elem[8].text == 'parchement'
        assert dc_elem[9].tag == '{http://purl.org/dc/elements/1.1/}identifier'
        assert dc_elem[9].text == '42/1'
        assert dc_elem[10].tag == '{http://purl.org/dc/elements/1.1/}source'
        assert dc_elem[10].text == "Numair's library"
        assert dc_elem[11].tag == '{http://purl.org/dc/elements/1.1/}relation'
        assert dc_elem[11].text == 'None'
        assert dc_elem[12].tag == '{http://purl.org/dc/elements/1.1/}language'
        assert dc_elem[12].text == 'en'
        assert dc_elem[13].tag == '{http://purl.org/dc/elements/1.1/}rights'
        assert dc_elem[13].text == 'Public Domain'
        assert dc_elem[14].tag == '{http://purl.org/dc/terms/}isPartOf'
        assert dc_elem[14].text == 'AIC#42'
Exemple #3
0
    def test_get_dublincore(self):
        """It should create a Dublin Core element from the database info."""
        # Generate DC element from DB
        dc_elem = create_mets_v2.getDublinCore(self.siptypeuuid, self.sipuuid)

        # Verify created correctly
        assert dc_elem is not None
        assert dc_elem.tag == "{http://purl.org/dc/terms/}dublincore"
        assert len(dc_elem) == 15
        assert dc_elem[0].tag == "{http://purl.org/dc/elements/1.1/}title"
        assert dc_elem[0].text == "Yamani Weapons"
        assert dc_elem[1].tag == "{http://purl.org/dc/elements/1.1/}creator"
        assert dc_elem[1].text == "Keladry of Mindelan"
        assert dc_elem[2].tag == "{http://purl.org/dc/elements/1.1/}subject"
        assert dc_elem[2].text == "Glaives"
        assert dc_elem[3].tag == "{http://purl.org/dc/elements/1.1/}description"
        assert dc_elem[3].text == "Glaives are cool"
        assert dc_elem[4].tag == "{http://purl.org/dc/elements/1.1/}publisher"
        assert dc_elem[4].text == "Tortall Press"
        assert dc_elem[5].tag == "{http://purl.org/dc/elements/1.1/}contributor"
        assert dc_elem[5].text == "Yuki"
        assert dc_elem[6].tag == "{http://purl.org/dc/elements/1.1/}date"
        assert dc_elem[6].text == "2015"
        assert dc_elem[7].tag == "{http://purl.org/dc/elements/1.1/}type"
        assert dc_elem[7].text == "Archival Information Package"
        assert dc_elem[8].tag == "{http://purl.org/dc/elements/1.1/}format"
        assert dc_elem[8].text == "parchement"
        assert dc_elem[9].tag == "{http://purl.org/dc/elements/1.1/}identifier"
        assert dc_elem[9].text == "42/1"
        assert dc_elem[10].tag == "{http://purl.org/dc/elements/1.1/}source"
        assert dc_elem[10].text == "Numair's library"
        assert dc_elem[11].tag == "{http://purl.org/dc/elements/1.1/}relation"
        assert dc_elem[11].text == "None"
        assert dc_elem[12].tag == "{http://purl.org/dc/elements/1.1/}language"
        assert dc_elem[12].text == "en"
        assert dc_elem[13].tag == "{http://purl.org/dc/elements/1.1/}rights"
        assert dc_elem[13].text == "Public Domain"
        assert dc_elem[14].tag == "{http://purl.org/dc/terms/}isPartOf"
        assert dc_elem[14].text == "AIC#42"
    def test_get_dublincore_none_found(self):
        """It should not create a Dublin Core element if no info found."""
        sipuuid = 'dnednedn-5bd2-4249-84a1-2f00f725b981'

        dc_elem = create_mets_v2.getDublinCore(self.siptypeuuid, sipuuid)
        assert dc_elem is None
def create_mets_file(aic, aips, job):
    """ Create AIC METS file with AIP information. """

    # Prepare constants
    nsmap = {"mets": ns.metsNS, "xlink": ns.xlinkNS, "xsi": ns.xsiNS}
    now = timezone.now().strftime("%Y-%m-%dT%H:%M:%S")

    # Set up structure
    E = ElementMaker(namespace=ns.metsNS, nsmap=nsmap)
    mets = E.mets(
        E.metsHdr(CREATEDATE=now),
        E.dmdSec(E.mdWrap(E.xmlData(), MDTYPE="DC"),
                 ID="dmdSec_1"),  # mdWrap  # dmdSec
        E.fileSec(E.fileGrp()),
        E.structMap(
            E.div(TYPE="Archival Information Collection", DMDID="dmdSec_1"),
            TYPE="logical",  # structMap
        ),
    )
    mets.attrib["{{{ns}}}schemaLocation".format(
        ns=nsmap["xsi"]
    )] = "http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version1121/mets.xsd"

    # Add Dublin Core info
    xml_data = mets.find("mets:dmdSec/mets:mdWrap/mets:xmlData",
                         namespaces=ns.NSMAP)
    dublincore = create_mets_v2.getDublinCore(
        create_mets_v2.SIPMetadataAppliesToType, aic["uuid"])
    # Add <extent> with number of AIPs
    extent = etree.SubElement(dublincore, ns.dctermsBNS + "extent")
    extent.text = "{} AIPs".format(len(aips))
    xml_data.append(dublincore)

    # Add elements for each AIP
    file_grp = mets.find("mets:fileSec/mets:fileGrp", namespaces=ns.NSMAP)
    struct_div = mets.find("mets:structMap/mets:div", namespaces=ns.NSMAP)
    for aip in aips:
        file_id = "{name}-{uuid}".format(name=aip["name"], uuid=aip["uuid"])
        etree.SubElement(file_grp, ns.metsBNS + "file", ID=file_id)

        label = aip["label"] or aip["name"]
        div = etree.SubElement(struct_div, ns.metsBNS + "div", LABEL=label)
        etree.SubElement(div, ns.metsBNS + "fptr", FILEID=file_id)

    job.pyprint(etree.tostring(mets, pretty_print=True))

    # Write out the file
    file_uuid = str(uuid.uuid4())
    basename = os.path.join("metadata", "METS.{}.xml".format(file_uuid))
    filename = os.path.join(aic["dir"], basename)
    with open(filename, "w") as f:
        f.write(
            etree.tostring(mets,
                           pretty_print=True,
                           xml_declaration=True,
                           encoding="utf-8"))
    fileOperations.addFileToSIP(
        filePathRelativeToSIP="%SIPDirectory%" + basename,
        fileUUID=file_uuid,
        sipUUID=aic["uuid"],
        taskUUID=str(uuid.uuid4()),  # Unsure what should go here
        date=now,
        sourceType="aip creation",
        use="metadata",
    )
    # To make this work with the createMETS2 (for SIPs)
    databaseFunctions.insertIntoDerivations(file_uuid, file_uuid)

    # Insert the count of AIPs in the AIC into UnitVariables, so it can be
    # indexed later
    UnitVariable.objects.create(
        unittype="SIP",
        unituuid=aic["uuid"],
        variable="AIPsinAIC",
        variablevalue=str(len(aips)),
    )