def test_dmdsec_from_csv_parsed_metadata_no_data(self):
     """It should not create dmdSecs with no parsed metadata."""
     data = {}
     # Test
     ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(Job("stub", "stub", []), data)
     # Verify
     assert ret == []
 def test_dmdsec_from_csv_parsed_metadata_other_only(self):
     """It should only create an Other dmdSec from parsed metadata."""
     data = collections.OrderedDict([
         ("Title", ["Yamani Weapons"]),
         ("Contributor", [u"雪 ユキ".encode('utf8')]),
         ("Long Description", ['This is about how glaives are used in the Yamani Islands'])
     ])
     # Test
     ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(Job("stub", "stub", []), data)
     # Verify
     assert ret
     assert len(ret) == 1
     dmdsec = ret[0]
     assert dmdsec.tag == '{http://www.loc.gov/METS/}dmdSec'
     assert 'ID' in dmdsec.attrib
     mdwrap = dmdsec[0]
     assert mdwrap.tag == '{http://www.loc.gov/METS/}mdWrap'
     assert 'MDTYPE' in mdwrap.attrib
     assert mdwrap.attrib['MDTYPE'] == 'OTHER'
     assert 'OTHERMDTYPE' in mdwrap.attrib
     assert mdwrap.attrib['OTHERMDTYPE'] == 'CUSTOM'
     xmldata = mdwrap[0]
     assert xmldata.tag == '{http://www.loc.gov/METS/}xmlData'
     # Elements are direct children of xmlData
     assert len(xmldata) == 3
     assert xmldata[0].tag == 'title'
     assert xmldata[0].text == 'Yamani Weapons'
     assert xmldata[1].tag == 'contributor'
     assert xmldata[1].text == u'雪 ユキ'
     assert xmldata[2].tag == 'long_description'
     assert xmldata[2].text == 'This is about how glaives are used in the Yamani Islands'
    def test_dmdsec_from_csv_parsed_metadata_both(self):
        """It should create a dmdSec for DC and Other parsed metadata."""
        data = collections.OrderedDict([
            ("dc.title", ["Yamani Weapons"]),
            ("dc.contributor", [u"雪 ユキ".encode('utf8')]),
            ("dcterms.isPartOf", ["AIC#42"]),
            ("Title", ["Yamani Weapons"]),
            ("Contributor", [u"雪 ユキ".encode('utf8')]),
            ("Long Description", ['This is about how glaives are used in the Yamani Islands'])
        ])
        # Test
        state = create_mets_v2.MetsState()
        ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(Job("stub", "stub", []), data, state)
        # Verify
        assert ret
        assert len(ret) == 2
        # Return can be DC or OTHER first, but in this case DC should be first
        dc_dmdsec = ret[0]
        assert dc_dmdsec.tag == '{http://www.loc.gov/METS/}dmdSec'
        assert 'ID' in dc_dmdsec.attrib
        mdwrap = dc_dmdsec[0]
        assert mdwrap.tag == '{http://www.loc.gov/METS/}mdWrap'
        assert 'MDTYPE' in mdwrap.attrib
        assert mdwrap.attrib['MDTYPE'] == 'DC'
        xmldata = mdwrap[0]
        assert xmldata.tag == '{http://www.loc.gov/METS/}xmlData'
        dc_elem = xmldata[0]
        # Elements are children of dublincore tag
        assert dc_elem.tag == '{http://purl.org/dc/terms/}dublincore'
        assert len(dc_elem) == 3
        assert dc_elem[0].tag == '{http://purl.org/dc/elements/1.1/}title'
        assert dc_elem[0].text == 'Yamani Weapons'
        assert dc_elem[1].tag == '{http://purl.org/dc/elements/1.1/}contributor'
        assert dc_elem[1].text == u'雪 ユキ'
        assert dc_elem[2].tag == '{http://purl.org/dc/terms/}isPartOf'
        assert dc_elem[2].text == 'AIC#42'

        other_dmdsec = ret[1]
        assert other_dmdsec.tag == '{http://www.loc.gov/METS/}dmdSec'
        assert 'ID' in other_dmdsec.attrib
        mdwrap = other_dmdsec[0]
        assert mdwrap.tag == '{http://www.loc.gov/METS/}mdWrap'
        assert 'MDTYPE' in mdwrap.attrib
        assert mdwrap.attrib['MDTYPE'] == 'OTHER'
        assert 'OTHERMDTYPE' in mdwrap.attrib
        assert mdwrap.attrib['OTHERMDTYPE'] == 'CUSTOM'
        xmldata = mdwrap[0]
        assert xmldata.tag == '{http://www.loc.gov/METS/}xmlData'
        # Elements are direct children of xmlData
        assert len(xmldata) == 3
        assert xmldata[0].tag == 'title'
        assert xmldata[0].text == 'Yamani Weapons'
        assert xmldata[1].tag == 'contributor'
        assert xmldata[1].text == u'雪 ユキ'
        assert xmldata[2].tag == 'long_description'
        assert xmldata[2].text == 'This is about how glaives are used in the Yamani Islands'
Beispiel #4
0
    def test_dmdsec_from_csv_parsed_metadata_repeats(self):
        """It should create multiple elements for repeated input."""
        data = collections.OrderedDict(
            [
                ("dc.contributor", ["Yuki", "雪 ユキ".encode("utf8")]),
                ("Contributor", ["Yuki", "雪 ユキ".encode("utf8")]),
            ]
        )
        # Test
        state = create_mets_v2.MetsState()
        ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(
            Job("stub", "stub", []), data, state
        )
        # Verify
        assert ret
        assert len(ret) == 2
        # Return can be DC or OTHER first, but in this case DC should be first
        dc_dmdsec = ret[0]
        assert dc_dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec"
        assert "ID" in dc_dmdsec.attrib
        mdwrap = dc_dmdsec[0]
        assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap"
        assert "MDTYPE" in mdwrap.attrib
        assert mdwrap.attrib["MDTYPE"] == "DC"
        xmldata = mdwrap[0]
        assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData"
        dc_elem = xmldata[0]
        # Elements are children of dublincore tag
        assert dc_elem.tag == "{http://purl.org/dc/terms/}dublincore"
        assert len(dc_elem) == 2
        assert dc_elem[0].tag == "{http://purl.org/dc/elements/1.1/}contributor"
        assert dc_elem[0].text == "Yuki"
        assert dc_elem[1].tag == "{http://purl.org/dc/elements/1.1/}contributor"
        assert dc_elem[1].text == "雪 ユキ"

        other_dmdsec = ret[1]
        assert other_dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec"
        assert "ID" in other_dmdsec.attrib
        mdwrap = other_dmdsec[0]
        assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap"
        assert "MDTYPE" in mdwrap.attrib
        assert mdwrap.attrib["MDTYPE"] == "OTHER"
        assert "OTHERMDTYPE" in mdwrap.attrib
        assert mdwrap.attrib["OTHERMDTYPE"] == "CUSTOM"
        xmldata = mdwrap[0]
        assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData"
        # Elements are direct children of xmlData
        assert len(xmldata) == 2
        assert xmldata[0].tag == "contributor"
        assert xmldata[0].text == "Yuki"
        assert xmldata[1].tag == "contributor"
        assert xmldata[1].text == "雪 ユキ"
    def test_dmdsec_from_csv_parsed_metadata_repeats(self):
        """It should create multiple elements for repeated input."""
        data = collections.OrderedDict([
            ("dc.contributor", ["Yuki", u"雪 ユキ".encode('utf8')]),
            ("Contributor", ["Yuki", u"雪 ユキ".encode('utf8')]),
        ])
        # Test
        ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(Job("stub", "stub", []), data)
        # Verify
        assert ret
        assert len(ret) == 2
        # Return can be DC or OTHER first, but in this case DC should be first
        dc_dmdsec = ret[0]
        assert dc_dmdsec.tag == '{http://www.loc.gov/METS/}dmdSec'
        assert 'ID' in dc_dmdsec.attrib
        mdwrap = dc_dmdsec[0]
        assert mdwrap.tag == '{http://www.loc.gov/METS/}mdWrap'
        assert 'MDTYPE' in mdwrap.attrib
        assert mdwrap.attrib['MDTYPE'] == 'DC'
        xmldata = mdwrap[0]
        assert xmldata.tag == '{http://www.loc.gov/METS/}xmlData'
        dc_elem = xmldata[0]
        # Elements are children of dublincore tag
        assert dc_elem.tag == '{http://purl.org/dc/terms/}dublincore'
        assert len(dc_elem) == 2
        assert dc_elem[0].tag == '{http://purl.org/dc/elements/1.1/}contributor'
        assert dc_elem[0].text == 'Yuki'
        assert dc_elem[1].tag == '{http://purl.org/dc/elements/1.1/}contributor'
        assert dc_elem[1].text == u'雪 ユキ'

        other_dmdsec = ret[1]
        assert other_dmdsec.tag == '{http://www.loc.gov/METS/}dmdSec'
        assert 'ID' in other_dmdsec.attrib
        mdwrap = other_dmdsec[0]
        assert mdwrap.tag == '{http://www.loc.gov/METS/}mdWrap'
        assert 'MDTYPE' in mdwrap.attrib
        assert mdwrap.attrib['MDTYPE'] == 'OTHER'
        assert 'OTHERMDTYPE' in mdwrap.attrib
        assert mdwrap.attrib['OTHERMDTYPE'] == 'CUSTOM'
        xmldata = mdwrap[0]
        assert xmldata.tag == '{http://www.loc.gov/METS/}xmlData'
        # Elements are direct children of xmlData
        assert len(xmldata) == 2
        assert xmldata[0].tag == 'contributor'
        assert xmldata[0].text == 'Yuki'
        assert xmldata[1].tag == 'contributor'
        assert xmldata[1].text == u'雪 ユキ'
def update_metadata_csv(job, mets, metadata_csv, sip_uuid, sip_dir, state):
    job.pyprint('Parse new metadata.csv')
    full_path = metadata_csv.currentlocation.replace('%SIPDirectory%', sip_dir,
                                                     1)
    csvmetadata = createmetscsv.parseMetadataCSV(job, full_path)

    # FIXME This doesn't support having both DC and non-DC metadata in dmdSecs
    # If createDmdSecsFromCSVParsedMetadata returns more than 1 dmdSec, behaviour is undefined
    for f, md in csvmetadata.items():
        # Verify file is in AIP
        job.pyprint('Looking for', f, 'from metadata.csv in SIP')
        # Find File with original or current locationg matching metadata.csv
        # Prepend % to match the end of %SIPDirectory% or %transferDirectory%
        try:
            file_obj = models.File.objects.get(sip_id=sip_uuid,
                                               originallocation__endswith='%' +
                                               f)
        except models.File.DoesNotExist:
            try:
                file_obj = models.File.objects.get(
                    sip_id=sip_uuid, currentlocation__endswith='%' + f)
            except models.File.DoesNotExist:
                job.pyprint(f, 'not found in database')
                continue
        job.pyprint(f, 'found in database')

        fsentry = mets.get_file(file_uuid=file_obj.uuid)
        job.pyprint(f, 'was associated with', fsentry.dmdids)

        # Create dmdSec
        new_dmdsecs = createmets2.createDmdSecsFromCSVParsedMetadata(
            job, md, state)
        # Add both
        for new_dmdsec in new_dmdsecs:
            # need to strip new_d to just the DC part
            new_dc = new_dmdsec.find('.//dcterms:dublincore',
                                     namespaces=ns.NSMAP)
            new_metsrw_dmdsec = fsentry.add_dublin_core(new_dc)
            if len(fsentry.dmdsecs) > 1:
                fsentry.dmdsecs[-2].replace_with(new_metsrw_dmdsec)

        job.pyprint(f, 'now associated with', fsentry.dmdids)

    return mets
Beispiel #7
0
 def test_dmdsec_from_csv_parsed_metadata_other_only(self):
     """It should only create an Other dmdSec from parsed metadata."""
     data = collections.OrderedDict(
         [
             ("Title", ["Yamani Weapons"]),
             ("Contributor", ["雪 ユキ".encode("utf8")]),
             (
                 "Long Description",
                 ["This is about how glaives are used in the Yamani Islands"],
             ),
         ]
     )
     # Test
     state = create_mets_v2.MetsState()
     ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(
         Job("stub", "stub", []), data, state
     )
     # Verify
     assert ret
     assert len(ret) == 1
     dmdsec = ret[0]
     assert dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec"
     assert "ID" in dmdsec.attrib
     mdwrap = dmdsec[0]
     assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap"
     assert "MDTYPE" in mdwrap.attrib
     assert mdwrap.attrib["MDTYPE"] == "OTHER"
     assert "OTHERMDTYPE" in mdwrap.attrib
     assert mdwrap.attrib["OTHERMDTYPE"] == "CUSTOM"
     xmldata = mdwrap[0]
     assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData"
     # Elements are direct children of xmlData
     assert len(xmldata) == 3
     assert xmldata[0].tag == "title"
     assert xmldata[0].text == "Yamani Weapons"
     assert xmldata[1].tag == "contributor"
     assert xmldata[1].text == "雪 ユキ"
     assert xmldata[2].tag == "long_description"
     assert (
         xmldata[2].text
         == "This is about how glaives are used in the Yamani Islands"
     )
 def test_dmdsec_from_csv_parsed_metadata_dc_only(self):
     """It should only create a DC dmdSec from parsed metadata."""
     data = collections.OrderedDict([
         ("dc.title", ["Yamani Weapons"]),
         ("dc.creator", ["Keladry of Mindelan"]),
         ("dc.subject", ["Glaives"]),
         ("dc.description", ["Glaives are cool"]),
         ("dc.publisher", ["Tortall Press"]),
         ("dc.contributor", [u"雪 ユキ".encode('utf8')]),
         ("dc.date", ["2015"]),
         ("dc.type", ["Archival Information Package"]),
         ("dc.format", ["parchement"]),
         ("dc.identifier", ["42/1"]),
         ("dc.source", ["Numair's library"]),
         ("dc.relation", ["None"]),
         ("dc.language", ["en"]),
         ("dc.rights", ["Public Domain"]),
         ("dcterms.isPartOf", ["AIC#42"]),
     ])
     # Test
     ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(Job("stub", "stub", []), data)
     # Verify
     assert ret
     assert len(ret) == 1
     dmdsec = ret[0]
     assert dmdsec.tag == '{http://www.loc.gov/METS/}dmdSec'
     assert 'ID' in dmdsec.attrib
     mdwrap = dmdsec[0]
     assert mdwrap.tag == '{http://www.loc.gov/METS/}mdWrap'
     assert 'MDTYPE' in mdwrap.attrib
     assert mdwrap.attrib['MDTYPE'] == 'DC'
     xmldata = mdwrap[0]
     assert xmldata.tag == '{http://www.loc.gov/METS/}xmlData'
     # Elements are children of dublincore tag
     dc_elem = xmldata[0]
     assert dc_elem.tag == '{http://purl.org/dc/terms/}dublincore'
     assert len(dc_elem) == 15
     assert dc_elem[0].tag == '{http://purl.org/dc/elements/1.1/}title'
     assert dc_elem[0].text == 'Yamani Weapons'
     assert dc_elem[1].tag == '{http://purl.org/dc/elements/1.1/}creator'
     assert dc_elem[1].text == 'Keladry of Mindelan'
     assert dc_elem[2].tag == '{http://purl.org/dc/elements/1.1/}subject'
     assert dc_elem[2].text == 'Glaives'
     assert dc_elem[3].tag == '{http://purl.org/dc/elements/1.1/}description'
     assert dc_elem[3].text == 'Glaives are cool'
     assert dc_elem[4].tag == '{http://purl.org/dc/elements/1.1/}publisher'
     assert dc_elem[4].text == 'Tortall Press'
     assert dc_elem[5].tag == '{http://purl.org/dc/elements/1.1/}contributor'
     assert dc_elem[5].text == u'雪 ユキ'
     assert dc_elem[6].tag == '{http://purl.org/dc/elements/1.1/}date'
     assert dc_elem[6].text == '2015'
     assert dc_elem[7].tag == '{http://purl.org/dc/elements/1.1/}type'
     assert dc_elem[7].text == 'Archival Information Package'
     assert dc_elem[8].tag == '{http://purl.org/dc/elements/1.1/}format'
     assert dc_elem[8].text == 'parchement'
     assert dc_elem[9].tag == '{http://purl.org/dc/elements/1.1/}identifier'
     assert dc_elem[9].text == '42/1'
     assert dc_elem[10].tag == '{http://purl.org/dc/elements/1.1/}source'
     assert dc_elem[10].text == "Numair's library"
     assert dc_elem[11].tag == '{http://purl.org/dc/elements/1.1/}relation'
     assert dc_elem[11].text == 'None'
     assert dc_elem[12].tag == '{http://purl.org/dc/elements/1.1/}language'
     assert dc_elem[12].text == 'en'
     assert dc_elem[13].tag == '{http://purl.org/dc/elements/1.1/}rights'
     assert dc_elem[13].text == 'Public Domain'
     assert dc_elem[14].tag == '{http://purl.org/dc/terms/}isPartOf'
     assert dc_elem[14].text == 'AIC#42'
Beispiel #9
0
    def test_dmdsec_from_csv_parsed_metadata_both(self):
        """It should create a dmdSec for DC and Other parsed metadata."""
        data = collections.OrderedDict(
            [
                ("dc.title", ["Yamani Weapons"]),
                ("dc.contributor", ["雪 ユキ".encode("utf8")]),
                ("dcterms.isPartOf", ["AIC#42"]),
                ("Title", ["Yamani Weapons"]),
                ("Contributor", ["雪 ユキ".encode("utf8")]),
                (
                    "Long Description",
                    ["This is about how glaives are used in the Yamani Islands"],
                ),
            ]
        )
        # Test
        state = create_mets_v2.MetsState()
        ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(
            Job("stub", "stub", []), data, state
        )
        # Verify
        assert ret
        assert len(ret) == 2
        # Return can be DC or OTHER first, but in this case DC should be first
        dc_dmdsec = ret[0]
        assert dc_dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec"
        assert "ID" in dc_dmdsec.attrib
        mdwrap = dc_dmdsec[0]
        assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap"
        assert "MDTYPE" in mdwrap.attrib
        assert mdwrap.attrib["MDTYPE"] == "DC"
        xmldata = mdwrap[0]
        assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData"
        dc_elem = xmldata[0]
        # Elements are children of dublincore tag
        assert dc_elem.tag == "{http://purl.org/dc/terms/}dublincore"
        assert len(dc_elem) == 3
        assert dc_elem[0].tag == "{http://purl.org/dc/elements/1.1/}title"
        assert dc_elem[0].text == "Yamani Weapons"
        assert dc_elem[1].tag == "{http://purl.org/dc/elements/1.1/}contributor"
        assert dc_elem[1].text == "雪 ユキ"
        assert dc_elem[2].tag == "{http://purl.org/dc/terms/}isPartOf"
        assert dc_elem[2].text == "AIC#42"

        other_dmdsec = ret[1]
        assert other_dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec"
        assert "ID" in other_dmdsec.attrib
        mdwrap = other_dmdsec[0]
        assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap"
        assert "MDTYPE" in mdwrap.attrib
        assert mdwrap.attrib["MDTYPE"] == "OTHER"
        assert "OTHERMDTYPE" in mdwrap.attrib
        assert mdwrap.attrib["OTHERMDTYPE"] == "CUSTOM"
        xmldata = mdwrap[0]
        assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData"
        # Elements are direct children of xmlData
        assert len(xmldata) == 3
        assert xmldata[0].tag == "title"
        assert xmldata[0].text == "Yamani Weapons"
        assert xmldata[1].tag == "contributor"
        assert xmldata[1].text == "雪 ユキ"
        assert xmldata[2].tag == "long_description"
        assert (
            xmldata[2].text
            == "This is about how glaives are used in the Yamani Islands"
        )
Beispiel #10
0
 def test_dmdsec_from_csv_parsed_metadata_dc_only(self):
     """It should only create a DC dmdSec from parsed metadata."""
     data = collections.OrderedDict(
         [
             ("dc.title", ["Yamani Weapons"]),
             ("dc.creator", ["Keladry of Mindelan"]),
             ("dc.subject", ["Glaives"]),
             ("dc.description", ["Glaives are cool"]),
             ("dc.publisher", ["Tortall Press"]),
             ("dc.contributor", ["雪 ユキ".encode("utf8")]),
             ("dc.date", ["2015"]),
             ("dc.type", ["Archival Information Package"]),
             ("dc.format", ["parchement"]),
             ("dc.identifier", ["42/1"]),
             ("dc.source", ["Numair's library"]),
             ("dc.relation", ["None"]),
             ("dc.language", ["en"]),
             ("dc.rights", ["Public Domain"]),
             ("dcterms.isPartOf", ["AIC#42"]),
         ]
     )
     # Test
     state = create_mets_v2.MetsState()
     ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(
         Job("stub", "stub", []), data, state
     )
     # Verify
     assert ret
     assert len(ret) == 1
     dmdsec = ret[0]
     assert dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec"
     assert "ID" in dmdsec.attrib
     mdwrap = dmdsec[0]
     assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap"
     assert "MDTYPE" in mdwrap.attrib
     assert mdwrap.attrib["MDTYPE"] == "DC"
     xmldata = mdwrap[0]
     assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData"
     # Elements are children of dublincore tag
     dc_elem = xmldata[0]
     assert dc_elem.tag == "{http://purl.org/dc/terms/}dublincore"
     assert len(dc_elem) == 15
     assert dc_elem[0].tag == "{http://purl.org/dc/elements/1.1/}title"
     assert dc_elem[0].text == "Yamani Weapons"
     assert dc_elem[1].tag == "{http://purl.org/dc/elements/1.1/}creator"
     assert dc_elem[1].text == "Keladry of Mindelan"
     assert dc_elem[2].tag == "{http://purl.org/dc/elements/1.1/}subject"
     assert dc_elem[2].text == "Glaives"
     assert dc_elem[3].tag == "{http://purl.org/dc/elements/1.1/}description"
     assert dc_elem[3].text == "Glaives are cool"
     assert dc_elem[4].tag == "{http://purl.org/dc/elements/1.1/}publisher"
     assert dc_elem[4].text == "Tortall Press"
     assert dc_elem[5].tag == "{http://purl.org/dc/elements/1.1/}contributor"
     assert dc_elem[5].text == "雪 ユキ"
     assert dc_elem[6].tag == "{http://purl.org/dc/elements/1.1/}date"
     assert dc_elem[6].text == "2015"
     assert dc_elem[7].tag == "{http://purl.org/dc/elements/1.1/}type"
     assert dc_elem[7].text == "Archival Information Package"
     assert dc_elem[8].tag == "{http://purl.org/dc/elements/1.1/}format"
     assert dc_elem[8].text == "parchement"
     assert dc_elem[9].tag == "{http://purl.org/dc/elements/1.1/}identifier"
     assert dc_elem[9].text == "42/1"
     assert dc_elem[10].tag == "{http://purl.org/dc/elements/1.1/}source"
     assert dc_elem[10].text == "Numair's library"
     assert dc_elem[11].tag == "{http://purl.org/dc/elements/1.1/}relation"
     assert dc_elem[11].text == "None"
     assert dc_elem[12].tag == "{http://purl.org/dc/elements/1.1/}language"
     assert dc_elem[12].text == "en"
     assert dc_elem[13].tag == "{http://purl.org/dc/elements/1.1/}rights"
     assert dc_elem[13].text == "Public Domain"
     assert dc_elem[14].tag == "{http://purl.org/dc/terms/}isPartOf"
     assert dc_elem[14].text == "AIC#42"
Beispiel #11
0
def update_metadata_csv(job, mets, metadata_csv, sip_uuid, sip_dir, state):
    job.pyprint("Parse new metadata.csv")
    full_path = metadata_csv.currentlocation.replace("%SIPDirectory%", sip_dir, 1)
    csvmetadata = createmetscsv.parseMetadataCSV(job, full_path)

    # FIXME This doesn't support having both DC and non-DC metadata in dmdSecs
    # If createDmdSecsFromCSVParsedMetadata returns more than 1 dmdSec, behaviour is undefined
    for f, md in csvmetadata.items():
        # Verify file is in AIP
        job.pyprint("Looking for", f, "from metadata.csv in SIP")
        # Find File with original or current locationg matching metadata.csv
        # Prepend % to match the end of %SIPDirectory% or %transferDirectory%
        file_obj = None
        try:
            file_obj = models.File.objects.get(
                sip_id=sip_uuid, originallocation__endswith="%" + f
            )
        except models.File.DoesNotExist:
            try:
                file_obj = models.File.objects.get(
                    sip_id=sip_uuid, currentlocation__endswith="%" + f
                )
            except models.File.DoesNotExist:
                pass
        if file_obj is not None:
            fsentry = mets.get_file(file_uuid=file_obj.uuid)
        else:
            fsentry = _get_directory_fsentry(mets, f)
        if fsentry is None:
            job.pyprint(f, "not found in database or METS file")
            continue

        job.pyprint(f, "found in database or METS file")
        job.pyprint(f, "was associated with", fsentry.dmdids)

        # Save existing dmdSecs
        dc_dmdsecs = []
        non_dc_dmdsecs = []
        for dmdsec in fsentry.dmdsecs:
            mdwrap = dmdsec.contents
            if mdwrap.mdtype == "DC":
                dc_dmdsecs.append(dmdsec)
            elif (
                mdwrap.mdtype == "OTHER"
                and getattr(mdwrap, "othermdtype", None) == "CUSTOM"
            ):
                non_dc_dmdsecs.append(dmdsec)

        # Create dmdSec
        new_dmdsecs = createmets2.createDmdSecsFromCSVParsedMetadata(job, md, state)
        # Add both
        for new_dmdsec in new_dmdsecs:
            # need to strip new_d to just the DC part
            new_dc = new_dmdsec.find(".//dcterms:dublincore", namespaces=ns.NSMAP)
            if new_dc is not None:
                new_metsrw_dmdsec = fsentry.add_dublin_core(new_dc)
                _replace_original_dmdsec(dc_dmdsecs, new_metsrw_dmdsec)
            else:
                new_non_dc = new_dmdsec.find(
                    './/mets:mdWrap[@MDTYPE="OTHER"][@OTHERMDTYPE="CUSTOM"]/mets:xmlData',
                    namespaces=ns.NSMAP,
                )
                if new_non_dc is not None:
                    new_metsrw_dmdsec = fsentry.add_dmdsec(
                        new_non_dc, "OTHER", othermdtype="CUSTOM"
                    )
                    _replace_original_dmdsec(non_dc_dmdsecs, new_metsrw_dmdsec)
        job.pyprint(f, "now associated with", fsentry.dmdids)

    return mets