コード例 #1
0
 def test_create_dc_dmdsec_no_dc_no_transfers_dir(self):
     """It should not fail if no transfers directory exists."""
     badsipuuid = 'dnednedn-5bd2-4249-84a1-2f00f725b981'
     state = create_mets_v2.MetsState()
     dmdsec_elem = create_mets_v2.createDublincoreDMDSecFromDBData(Job("stub", "stub", []), self.siptypeuuid, badsipuuid, THIS_DIR, state)
     # Expect no element
     assert dmdsec_elem is None
コード例 #2
0
 def test_normative_structmap_creation(self):
     """It should return an etree Element instance."""
     state = create_mets_v2.MetsState()
     normativeStructMap = create_mets_v2.get_normative_structmap(
         str(self.sip_dir) + os.sep, str(self.sip_object_dir), {}, state
     )
     assert isinstance(normativeStructMap, etree._Element)
コード例 #3
0
 def test_create_rights_granted(self):
     # Setup
     elem = etree.Element("{info:lc/xmlns/premis-v2}rightsStatement", nsmap={'premis': NSMAP['premis']})
     statement = RightsStatement.objects.get(id=1)
     # Test
     state = create_mets_v2.MetsState()
     archivematicaCreateMETSRights.getrightsGranted(Job("stub", "stub", []), statement, elem, state)
     # Verify
     assert len(elem) == 1
     rightsgranted = elem[0]
     assert rightsgranted.tag == '{info:lc/xmlns/premis-v2}rightsGranted'
     assert len(rightsgranted.attrib) == 0
     assert len(rightsgranted) == 4
     assert rightsgranted[0].tag == '{info:lc/xmlns/premis-v2}act'
     assert rightsgranted[0].text == 'Disseminate'
     assert len(rightsgranted[0].attrib) == 0
     assert len(rightsgranted[0]) == 0
     assert rightsgranted[1].tag == '{info:lc/xmlns/premis-v2}restriction'
     assert rightsgranted[1].text == 'Allow'
     assert len(rightsgranted[1].attrib) == 0
     assert len(rightsgranted[1]) == 0
     assert rightsgranted[2].tag == '{info:lc/xmlns/premis-v2}termOfGrant'
     assert len(rightsgranted[2].attrib) == 0
     assert len(rightsgranted[2]) == 2
     assert rightsgranted[2][0].tag == '{info:lc/xmlns/premis-v2}startDate'
     assert rightsgranted[2][0].text == '2000'
     assert rightsgranted[2][1].tag == '{info:lc/xmlns/premis-v2}endDate'
     assert rightsgranted[2][1].text == 'OPEN'
     assert rightsgranted[3].tag == '{info:lc/xmlns/premis-v2}rightsGrantedNote'
     assert rightsgranted[3].text == 'Attribution required'
     assert len(rightsgranted[3].attrib) == 0
     assert len(rightsgranted[3]) == 0
コード例 #4
0
 def test_dmdsec_from_csv_parsed_metadata_other_only(self):
     """It should only create an Other dmdSec from parsed metadata."""
     data = collections.OrderedDict([
         ("Title", ["Yamani Weapons"]),
         ("Contributor", [u"雪 ユキ".encode('utf8')]),
         ("Long Description", ['This is about how glaives are used in the Yamani Islands'])
     ])
     # Test
     state = create_mets_v2.MetsState()
     ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(Job("stub", "stub", []), data, state)
     # Verify
     assert ret
     assert len(ret) == 1
     dmdsec = ret[0]
     assert dmdsec.tag == '{http://www.loc.gov/METS/}dmdSec'
     assert 'ID' in dmdsec.attrib
     mdwrap = dmdsec[0]
     assert mdwrap.tag == '{http://www.loc.gov/METS/}mdWrap'
     assert 'MDTYPE' in mdwrap.attrib
     assert mdwrap.attrib['MDTYPE'] == 'OTHER'
     assert 'OTHERMDTYPE' in mdwrap.attrib
     assert mdwrap.attrib['OTHERMDTYPE'] == 'CUSTOM'
     xmldata = mdwrap[0]
     assert xmldata.tag == '{http://www.loc.gov/METS/}xmlData'
     # Elements are direct children of xmlData
     assert len(xmldata) == 3
     assert xmldata[0].tag == 'title'
     assert xmldata[0].text == 'Yamani Weapons'
     assert xmldata[1].tag == 'contributor'
     assert xmldata[1].text == u'雪 ユキ'
     assert xmldata[2].tag == 'long_description'
     assert xmldata[2].text == 'This is about how glaives are used in the Yamani Islands'
コード例 #5
0
 def test_dmdsec_from_csv_parsed_metadata_no_data(self):
     """It should not create dmdSecs with no parsed metadata."""
     data = {}
     # Test
     state = create_mets_v2.MetsState()
     ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(Job("stub", "stub", []), data, state)
     # Verify
     assert ret == []
コード例 #6
0
    def test_dmdsec_from_csv_parsed_metadata_both(self):
        """It should create a dmdSec for DC and Other parsed metadata."""
        data = collections.OrderedDict([
            ("dc.title", ["Yamani Weapons"]),
            ("dc.contributor", [u"雪 ユキ".encode('utf8')]),
            ("dcterms.isPartOf", ["AIC#42"]),
            ("Title", ["Yamani Weapons"]),
            ("Contributor", [u"雪 ユキ".encode('utf8')]),
            ("Long Description", ['This is about how glaives are used in the Yamani Islands'])
        ])
        # Test
        state = create_mets_v2.MetsState()
        ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(Job("stub", "stub", []), data, state)
        # Verify
        assert ret
        assert len(ret) == 2
        # Return can be DC or OTHER first, but in this case DC should be first
        dc_dmdsec = ret[0]
        assert dc_dmdsec.tag == '{http://www.loc.gov/METS/}dmdSec'
        assert 'ID' in dc_dmdsec.attrib
        mdwrap = dc_dmdsec[0]
        assert mdwrap.tag == '{http://www.loc.gov/METS/}mdWrap'
        assert 'MDTYPE' in mdwrap.attrib
        assert mdwrap.attrib['MDTYPE'] == 'DC'
        xmldata = mdwrap[0]
        assert xmldata.tag == '{http://www.loc.gov/METS/}xmlData'
        dc_elem = xmldata[0]
        # Elements are children of dublincore tag
        assert dc_elem.tag == '{http://purl.org/dc/terms/}dublincore'
        assert len(dc_elem) == 3
        assert dc_elem[0].tag == '{http://purl.org/dc/elements/1.1/}title'
        assert dc_elem[0].text == 'Yamani Weapons'
        assert dc_elem[1].tag == '{http://purl.org/dc/elements/1.1/}contributor'
        assert dc_elem[1].text == u'雪 ユキ'
        assert dc_elem[2].tag == '{http://purl.org/dc/terms/}isPartOf'
        assert dc_elem[2].text == 'AIC#42'

        other_dmdsec = ret[1]
        assert other_dmdsec.tag == '{http://www.loc.gov/METS/}dmdSec'
        assert 'ID' in other_dmdsec.attrib
        mdwrap = other_dmdsec[0]
        assert mdwrap.tag == '{http://www.loc.gov/METS/}mdWrap'
        assert 'MDTYPE' in mdwrap.attrib
        assert mdwrap.attrib['MDTYPE'] == 'OTHER'
        assert 'OTHERMDTYPE' in mdwrap.attrib
        assert mdwrap.attrib['OTHERMDTYPE'] == 'CUSTOM'
        xmldata = mdwrap[0]
        assert xmldata.tag == '{http://www.loc.gov/METS/}xmlData'
        # Elements are direct children of xmlData
        assert len(xmldata) == 3
        assert xmldata[0].tag == 'title'
        assert xmldata[0].text == 'Yamani Weapons'
        assert xmldata[1].tag == 'contributor'
        assert xmldata[1].text == u'雪 ユキ'
        assert xmldata[2].tag == 'long_description'
        assert xmldata[2].text == 'This is about how glaives are used in the Yamani Islands'
コード例 #7
0
    def test_dmdsec_from_csv_parsed_metadata_repeats(self):
        """It should create multiple elements for repeated input."""
        data = collections.OrderedDict(
            [
                ("dc.contributor", ["Yuki", "雪 ユキ".encode("utf8")]),
                ("Contributor", ["Yuki", "雪 ユキ".encode("utf8")]),
            ]
        )
        # Test
        state = create_mets_v2.MetsState()
        ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(
            Job("stub", "stub", []), data, state
        )
        # Verify
        assert ret
        assert len(ret) == 2
        # Return can be DC or OTHER first, but in this case DC should be first
        dc_dmdsec = ret[0]
        assert dc_dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec"
        assert "ID" in dc_dmdsec.attrib
        mdwrap = dc_dmdsec[0]
        assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap"
        assert "MDTYPE" in mdwrap.attrib
        assert mdwrap.attrib["MDTYPE"] == "DC"
        xmldata = mdwrap[0]
        assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData"
        dc_elem = xmldata[0]
        # Elements are children of dublincore tag
        assert dc_elem.tag == "{http://purl.org/dc/terms/}dublincore"
        assert len(dc_elem) == 2
        assert dc_elem[0].tag == "{http://purl.org/dc/elements/1.1/}contributor"
        assert dc_elem[0].text == "Yuki"
        assert dc_elem[1].tag == "{http://purl.org/dc/elements/1.1/}contributor"
        assert dc_elem[1].text == "雪 ユキ"

        other_dmdsec = ret[1]
        assert other_dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec"
        assert "ID" in other_dmdsec.attrib
        mdwrap = other_dmdsec[0]
        assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap"
        assert "MDTYPE" in mdwrap.attrib
        assert mdwrap.attrib["MDTYPE"] == "OTHER"
        assert "OTHERMDTYPE" in mdwrap.attrib
        assert mdwrap.attrib["OTHERMDTYPE"] == "CUSTOM"
        xmldata = mdwrap[0]
        assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData"
        # Elements are direct children of xmlData
        assert len(xmldata) == 2
        assert xmldata[0].tag == "contributor"
        assert xmldata[0].text == "Yuki"
        assert xmldata[1].tag == "contributor"
        assert xmldata[1].text == "雪 ユキ"
コード例 #8
0
    def generate_aip_mets_v2_state(self):
        """Generate fileSec state

        State will be generated that we will help us to test the units involved
        with creating a custom structmap in the AIP METS.
        """
        arbitrary_max_structmaps = 10
        self.transfer_dir = os.path.join(
            THIS_DIR,
            "fixtures",
            "custom_structmaps",
            "custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929",
            "",
        )
        self.objects_dir = os.path.join(self.transfer_dir, "objects")
        structMap = etree.Element(
            ns.metsBNS + "structMap",
            TYPE="physical",
            ID="structMap_1",
            LABEL="Archivematica default",
        )
        # Input to create_file_sec:
        #
        # <ns0:div xmlns:ns0="http://www.loc.gov/METS/"
        #          LABEL="3-031927e0-63bb-430c-8b37-fc799c132ca9"
        #          TYPE="Directory"
        #          DMDID="dmdSec_1"
        # />
        #
        sip_dir_name = os.path.basename(self.objects_dir.rstrip(os.path.sep))
        structMapDiv = etree.SubElement(
            structMap, ns.metsBNS + "div", TYPE="Directory", LABEL=sip_dir_name
        )
        self.state = create_mets_v2.MetsState()
        self.state.globalStructMapCounter = random.choice(
            [x for x in range(arbitrary_max_structmaps)]
        )
        self.structmap_div_element = create_mets_v2.createFileSec(
            job=Job("stub", "stub", []),
            directoryPath=self.objects_dir,
            parentDiv=structMapDiv,
            baseDirectoryPath=self.transfer_dir,
            baseDirectoryName="%SIPDirectory%",
            fileGroupIdentifier="3a915449-d1bb-4920-b274-c917c7bb5929",
            fileGroupType="sip_id",
            directories={},
            state=self.state,
            includeAmdSec=True,
        )
コード例 #9
0
 def test_create_dc_dmdsec_no_dc_no_transfers(self):
     """It should not fail if no dublincore.xml exists from transfers."""
     badsipuuid = 'dnednedn-5bd2-4249-84a1-2f00f725b981'
     empty_transfers_sip = os.path.join(THIS_DIR, 'fixtures', 'emptysip')
     state = create_mets_v2.MetsState()
     # Make sure directory is empty
     try:
         os.remove(os.path.join(empty_transfers_sip, 'objects', 'metadata', 'transfers', '.gitignore'))
     except OSError:
         pass
     dmdsec_elem = create_mets_v2.createDublincoreDMDSecFromDBData(Job("stub", "stub", []), self.siptypeuuid, badsipuuid, empty_transfers_sip, state)
     assert dmdsec_elem is None
     # Reset directory state
     with open(os.path.join(empty_transfers_sip, 'objects', 'metadata', 'transfers', '.gitignore'), 'w'):
         pass
コード例 #10
0
 def test_creates_events(self):
     """
     It should create Events
     It should create Agents
     It should link Events only with Agents for that Event
     It should only include Agents used by that file
     """
     state = create_mets_v2.MetsState()
     ret = create_mets_v2.createDigiprovMD("ae8d4290-fe52-4954-b72a-0f591bee2e2f", state)
     assert len(ret) == 9
     # Events
     assert ret[0][0].attrib['MDTYPE'] == 'PREMIS:EVENT'
     assert ret[0].find('.//{info:lc/xmlns/premis-v2}eventType').text == 'ingestion'
     assert len(ret[0].findall('.//{info:lc/xmlns/premis-v2}linkingAgentIdentifier')) == 3
     assert ret[1][0].attrib['MDTYPE'] == 'PREMIS:EVENT'
     assert ret[1].find('.//{info:lc/xmlns/premis-v2}eventType').text == 'message digest calculation'
     assert len(ret[1].findall('.//{info:lc/xmlns/premis-v2}linkingAgentIdentifier')) == 3
     assert ret[2][0].attrib['MDTYPE'] == 'PREMIS:EVENT'
     assert ret[2].find('.//{info:lc/xmlns/premis-v2}eventType').text == 'virus check'
     assert len(ret[2].findall('.//{info:lc/xmlns/premis-v2}linkingAgentIdentifier')) == 3
     assert ret[3][0].attrib['MDTYPE'] == 'PREMIS:EVENT'
     assert ret[3].find('.//{info:lc/xmlns/premis-v2}eventType').text == 'name cleanup'
     assert len(ret[3].findall('.//{info:lc/xmlns/premis-v2}linkingAgentIdentifier')) == 3
     assert ret[4][0].attrib['MDTYPE'] == 'PREMIS:EVENT'
     assert ret[4].find('.//{info:lc/xmlns/premis-v2}eventType').text == 'format identification'
     assert len(ret[4].findall('.//{info:lc/xmlns/premis-v2}linkingAgentIdentifier')) == 3
     assert ret[5][0].attrib['MDTYPE'] == 'PREMIS:EVENT'
     assert ret[5].find('.//{info:lc/xmlns/premis-v2}eventType').text == 'validation'
     assert len(ret[5].findall('.//{info:lc/xmlns/premis-v2}linkingAgentIdentifier')) == 3
     # Agents
     assert ret[6][0].attrib['MDTYPE'] == 'PREMIS:AGENT'
     assert ret[6].find('.//{info:lc/xmlns/premis-v2}agentIdentifierType').text == 'preservation system'
     assert ret[6].find('.//{info:lc/xmlns/premis-v2}agentIdentifierValue').text == 'Archivematica-1.4.0'
     assert ret[6].find('.//{info:lc/xmlns/premis-v2}agentName').text == 'Archivematica'
     assert ret[6].find('.//{info:lc/xmlns/premis-v2}agentType').text == 'software'
     assert ret[7][0].attrib['MDTYPE'] == 'PREMIS:AGENT'
     assert ret[7].find('.//{info:lc/xmlns/premis-v2}agentIdentifierType').text == 'repository code'
     assert ret[7].find('.//{info:lc/xmlns/premis-v2}agentIdentifierValue').text == 'demo'
     assert ret[7].find('.//{info:lc/xmlns/premis-v2}agentName').text == 'demo'
     assert ret[7].find('.//{info:lc/xmlns/premis-v2}agentType').text == 'organization'
     assert ret[8][0].attrib['MDTYPE'] == 'PREMIS:AGENT'
     assert ret[8].find('.//{info:lc/xmlns/premis-v2}agentIdentifierType').text == 'Archivematica user pk'
     assert ret[8].find('.//{info:lc/xmlns/premis-v2}agentIdentifierValue').text == '1'
     assert ret[8].find('.//{info:lc/xmlns/premis-v2}agentName').text == 'username="******", first_name="Keladry", last_name="Mindelan"'
     assert ret[8].find('.//{info:lc/xmlns/premis-v2}agentType').text == 'Archivematica user'
コード例 #11
0
 def test_create_dc_dmdsec_dc_exists(self):
     """It should create a dmdSec if DC information exists."""
     # Generate dmdSec if DC exists
     state = create_mets_v2.MetsState()
     dmdsec_elem, dmdid = create_mets_v2.createDublincoreDMDSecFromDBData(Job("stub", "stub", []), self.siptypeuuid, self.sipuuid, THIS_DIR, state)
     # Verify created correctly
     assert dmdsec_elem is not None
     assert dmdsec_elem.tag == '{http://www.loc.gov/METS/}dmdSec'
     assert dmdsec_elem.attrib['ID'] == dmdid
     assert len(dmdsec_elem) == 1
     mdwrap = dmdsec_elem[0]
     assert mdwrap.tag == '{http://www.loc.gov/METS/}mdWrap'
     assert mdwrap.attrib['MDTYPE'] == 'DC'
     assert len(mdwrap) == 1
     xmldata = mdwrap[0]
     assert xmldata.tag == '{http://www.loc.gov/METS/}xmlData'
     assert len(xmldata) == 1
     assert xmldata[0].tag == '{http://purl.org/dc/terms/}dublincore'
コード例 #12
0
 def test_dmdsec_from_csv_parsed_metadata_other_only(self):
     """It should only create an Other dmdSec from parsed metadata."""
     data = collections.OrderedDict(
         [
             ("Title", ["Yamani Weapons"]),
             ("Contributor", ["雪 ユキ".encode("utf8")]),
             (
                 "Long Description",
                 ["This is about how glaives are used in the Yamani Islands"],
             ),
         ]
     )
     # Test
     state = create_mets_v2.MetsState()
     ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(
         Job("stub", "stub", []), data, state
     )
     # Verify
     assert ret
     assert len(ret) == 1
     dmdsec = ret[0]
     assert dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec"
     assert "ID" in dmdsec.attrib
     mdwrap = dmdsec[0]
     assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap"
     assert "MDTYPE" in mdwrap.attrib
     assert mdwrap.attrib["MDTYPE"] == "OTHER"
     assert "OTHERMDTYPE" in mdwrap.attrib
     assert mdwrap.attrib["OTHERMDTYPE"] == "CUSTOM"
     xmldata = mdwrap[0]
     assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData"
     # Elements are direct children of xmlData
     assert len(xmldata) == 3
     assert xmldata[0].tag == "title"
     assert xmldata[0].text == "Yamani Weapons"
     assert xmldata[1].tag == "contributor"
     assert xmldata[1].text == "雪 ユキ"
     assert xmldata[2].tag == "long_description"
     assert (
         xmldata[2].text
         == "This is about how glaives are used in the Yamani Islands"
     )
コード例 #13
0
 def test_create_dc_dmdsec_no_dc_no_transfers(self):
     """It should not fail if no dublincore.xml exists from transfers."""
     badsipuuid = "dnednedn-5bd2-4249-84a1-2f00f725b981"
     sip_dir = Path(tempfile.mkdtemp()) / "emptysip"
     try:
         shutil.copytree(
             os.path.join(THIS_DIR, "fixtures", "emptysip"), str(sip_dir)
         )
         # Make sure directory is empty
         (sip_dir / "objects/metadata/transfers/.gitignore").unlink()
         state = create_mets_v2.MetsState()
         dmdsec_elem = create_mets_v2.createDublincoreDMDSecFromDBData(
             Job("stub", "stub", []),
             self.siptypeuuid,
             badsipuuid,
             str(sip_dir),
             state,
         )
         assert dmdsec_elem is None
     finally:
         shutil.rmtree(str(sip_dir.parent))
コード例 #14
0
 def test_get_included_structmap_incomplete_mets(self):
     """Test the output of custom structmaps in create_mets_v2 where the
     structMap is incomplete.
     """
     self.generate_aip_mets_v2_state()
     self._fixup_fileid_state()
     default_structmap = "mets_structmap.xml"
     Result = collections.namedtuple("Result", "structmap_name structmap_id")
     results = [
         Result("no-contentids.xml", "custom_structmap"),
         Result("file_does_not_exist.xml", "custom_structmap"),
         Result("empty_filenames.xml", "custom_structmap"),
         Result("missing_contentid.xml", "custom_structmap"),
     ]
     for res in results:
         self.state = create_mets_v2.MetsState()
         structmap_path = os.path.join(
             self.objects_dir,
             "metadata",
             "transfers",
             "custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51",
             (default_structmap if not res.structmap_name else res.structmap_name),
         )
         assert os.path.isfile(structmap_path)
         assert os.path.isfile(self.mets_xsd_path)
         self.validate_mets(self.mets_xsd_path, structmap_path)
         custom_structmap = create_mets_v2.include_custom_structmap(
             job=Job("stub", "stub", []),
             baseDirectoryPath=self.transfer_dir,
             state=self.state,
             custom_structmap=res.structmap_name,
         )
         assert (
             custom_structmap == []
         ), "Return from include_custom_structmap should be an empty array: {}".format(
             custom_structmap
         )
         assert (
             self.state.error_accumulator.error_count == 1
         ), "error counter should be incremented on error"
コード例 #15
0
 def test_create_rights_granted(self):
     # Setup
     elem = etree.Element(
         "{http://www.loc.gov/premis/v3}rightsStatement",
         nsmap={"premis": NSMAP["premis"]},
     )
     statement = RightsStatement.objects.get(id=1)
     # Test
     state = create_mets_v2.MetsState()
     archivematicaCreateMETSRights.getrightsGranted(
         Job("stub", "stub", []), statement, elem, state
     )
     # Verify
     assert len(elem) == 1
     rightsgranted = elem[0]
     assert rightsgranted.tag == "{http://www.loc.gov/premis/v3}rightsGranted"
     assert len(rightsgranted.attrib) == 0
     assert len(rightsgranted) == 4
     assert rightsgranted[0].tag == "{http://www.loc.gov/premis/v3}act"
     assert rightsgranted[0].text == "Disseminate"
     assert len(rightsgranted[0].attrib) == 0
     assert len(rightsgranted[0]) == 0
     assert rightsgranted[1].tag == "{http://www.loc.gov/premis/v3}restriction"
     assert rightsgranted[1].text == "Allow"
     assert len(rightsgranted[1].attrib) == 0
     assert len(rightsgranted[1]) == 0
     assert rightsgranted[2].tag == "{http://www.loc.gov/premis/v3}termOfGrant"
     assert len(rightsgranted[2].attrib) == 0
     assert len(rightsgranted[2]) == 2
     assert rightsgranted[2][0].tag == "{http://www.loc.gov/premis/v3}startDate"
     assert rightsgranted[2][0].text == "2000"
     assert rightsgranted[2][1].tag == "{http://www.loc.gov/premis/v3}endDate"
     assert rightsgranted[2][1].text == "OPEN"
     assert rightsgranted[3].tag == "{http://www.loc.gov/premis/v3}rightsGrantedNote"
     assert rightsgranted[3].text == "Attribution required"
     assert len(rightsgranted[3].attrib) == 0
     assert len(rightsgranted[3]) == 0
コード例 #16
0
def add_new_files(job, mets, sip_uuid, sip_dir):
    """
    Add new files to structMap, fileSec.

    This supports adding new metadata or preservation files.

    If a new file is a metadata.csv, parse it to create dmdSecs.
    """
    # Find new files
    # How tell new file from old with same name? Check hash?
    # QUESTION should the metadata.csv be parsed and only updated if different
    # even if one already existed?
    new_files = []
    old_mets_rel_path = _get_old_mets_rel_path(sip_uuid)
    metadata_csv = None
    objects_dir = os.path.join(sip_dir, "objects")
    for dirpath, _, filenames in os.walk(objects_dir):
        for filename in filenames:
            # Find in METS
            current_loc = os.path.join(dirpath, filename).replace(
                sip_dir, "%SIPDirectory%", 1
            )
            rel_path = current_loc.replace("%SIPDirectory%", "", 1)
            job.pyprint("Looking for", rel_path, "in METS")
            fsentry = mets.get_file(path=rel_path)
            if fsentry is None:
                # If not in METS (and is not old METS), get File object and
                # store for later
                if rel_path != old_mets_rel_path:
                    job.pyprint(rel_path, "not found in METS, must be new file")
                    f = models.File.objects.get(
                        currentlocation=current_loc, sip_id=sip_uuid
                    )
                    new_files.append(f)
                    if rel_path == "objects/metadata/metadata.csv":
                        metadata_csv = f
            else:
                job.pyprint(rel_path, "found in METS, no further work needed")

    if not new_files:
        return mets

    # Set global counters so getAMDSec will work
    state = createmets2.MetsState(
        globalAmdSecCounter=metsrw.AMDSec.get_current_id_count(),
        globalTechMDCounter=metsrw.SubSection.get_current_id_count("techMD"),
        globalDigiprovMDCounter=metsrw.SubSection.get_current_id_count("digiprovMD"),
    )

    objects_fsentry = mets.get_file(label="objects", type="Directory")

    for f in new_files:
        # Create amdSecs
        job.pyprint("Adding amdSec for", f.currentlocation, "(", f.uuid, ")")
        amdsec, amdid = createmets2.getAMDSec(
            job,
            fileUUID=f.uuid,
            filePath=None,  # Only needed if use=original
            use=f.filegrpuse,
            sip_uuid=sip_uuid,
            transferUUID=None,  # Only needed if use=original
            itemdirectoryPath=None,  # Only needed if use=original
            typeOfTransfer=None,  # Only needed if use=original
            baseDirectoryPath=sip_dir,
            state=state,
        )
        job.pyprint(f.uuid, "has amdSec with ID", amdid)

        # Create parent directories if needed
        dirs = os.path.dirname(
            f.currentlocation.replace("%SIPDirectory%objects/", "", 1)
        ).split("/")
        parent_fsentry = objects_fsentry
        for dirname in (d for d in dirs if d):
            child = mets.get_file(type="Directory", label=dirname)
            if child is None:
                child = metsrw.FSEntry(path=None, type="Directory", label=dirname)
                parent_fsentry.add_child(child)
            parent_fsentry = child

        derived_from = None
        if f.original_file_set.exists():
            original_f = f.original_file_set.get().source_file
            derived_from = mets.get_file(file_uuid=original_f.uuid)
        entry = metsrw.FSEntry(
            path=f.currentlocation.replace("%SIPDirectory%", "", 1),
            use=f.filegrpuse,
            type="Item",
            file_uuid=f.uuid,
            derived_from=derived_from,
        )
        metsrw_amdsec = metsrw.AMDSec(tree=amdsec, section_id=amdid)
        entry.amdsecs.append(metsrw_amdsec)
        parent_fsentry.add_child(entry)

    # Parse metadata.csv and add dmdSecs
    if metadata_csv:
        mets = update_metadata_csv(job, mets, metadata_csv, sip_uuid, sip_dir, state)

    return mets
コード例 #17
0
 def test_dmdsec_from_csv_parsed_metadata_dc_only(self):
     """It should only create a DC dmdSec from parsed metadata."""
     data = collections.OrderedDict(
         [
             ("dc.title", ["Yamani Weapons"]),
             ("dc.creator", ["Keladry of Mindelan"]),
             ("dc.subject", ["Glaives"]),
             ("dc.description", ["Glaives are cool"]),
             ("dc.publisher", ["Tortall Press"]),
             ("dc.contributor", ["雪 ユキ".encode("utf8")]),
             ("dc.date", ["2015"]),
             ("dc.type", ["Archival Information Package"]),
             ("dc.format", ["parchement"]),
             ("dc.identifier", ["42/1"]),
             ("dc.source", ["Numair's library"]),
             ("dc.relation", ["None"]),
             ("dc.language", ["en"]),
             ("dc.rights", ["Public Domain"]),
             ("dcterms.isPartOf", ["AIC#42"]),
         ]
     )
     # Test
     state = create_mets_v2.MetsState()
     ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(
         Job("stub", "stub", []), data, state
     )
     # Verify
     assert ret
     assert len(ret) == 1
     dmdsec = ret[0]
     assert dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec"
     assert "ID" in dmdsec.attrib
     mdwrap = dmdsec[0]
     assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap"
     assert "MDTYPE" in mdwrap.attrib
     assert mdwrap.attrib["MDTYPE"] == "DC"
     xmldata = mdwrap[0]
     assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData"
     # Elements are children of dublincore tag
     dc_elem = xmldata[0]
     assert dc_elem.tag == "{http://purl.org/dc/terms/}dublincore"
     assert len(dc_elem) == 15
     assert dc_elem[0].tag == "{http://purl.org/dc/elements/1.1/}title"
     assert dc_elem[0].text == "Yamani Weapons"
     assert dc_elem[1].tag == "{http://purl.org/dc/elements/1.1/}creator"
     assert dc_elem[1].text == "Keladry of Mindelan"
     assert dc_elem[2].tag == "{http://purl.org/dc/elements/1.1/}subject"
     assert dc_elem[2].text == "Glaives"
     assert dc_elem[3].tag == "{http://purl.org/dc/elements/1.1/}description"
     assert dc_elem[3].text == "Glaives are cool"
     assert dc_elem[4].tag == "{http://purl.org/dc/elements/1.1/}publisher"
     assert dc_elem[4].text == "Tortall Press"
     assert dc_elem[5].tag == "{http://purl.org/dc/elements/1.1/}contributor"
     assert dc_elem[5].text == "雪 ユキ"
     assert dc_elem[6].tag == "{http://purl.org/dc/elements/1.1/}date"
     assert dc_elem[6].text == "2015"
     assert dc_elem[7].tag == "{http://purl.org/dc/elements/1.1/}type"
     assert dc_elem[7].text == "Archival Information Package"
     assert dc_elem[8].tag == "{http://purl.org/dc/elements/1.1/}format"
     assert dc_elem[8].text == "parchement"
     assert dc_elem[9].tag == "{http://purl.org/dc/elements/1.1/}identifier"
     assert dc_elem[9].text == "42/1"
     assert dc_elem[10].tag == "{http://purl.org/dc/elements/1.1/}source"
     assert dc_elem[10].text == "Numair's library"
     assert dc_elem[11].tag == "{http://purl.org/dc/elements/1.1/}relation"
     assert dc_elem[11].text == "None"
     assert dc_elem[12].tag == "{http://purl.org/dc/elements/1.1/}language"
     assert dc_elem[12].text == "en"
     assert dc_elem[13].tag == "{http://purl.org/dc/elements/1.1/}rights"
     assert dc_elem[13].text == "Public Domain"
     assert dc_elem[14].tag == "{http://purl.org/dc/terms/}isPartOf"
     assert dc_elem[14].text == "AIC#42"
コード例 #18
0
 def test_creates_events(self):
     """
     It should create Events
     It should create Agents
     It should link Events only with Agents for that Event
     It should only include Agents used by that file
     """
     state = create_mets_v2.MetsState()
     ret = create_mets_v2.createDigiprovMD(
         "ae8d4290-fe52-4954-b72a-0f591bee2e2f", state
     )
     assert len(ret) == 9
     # Events
     assert ret[0][0].attrib["MDTYPE"] == "PREMIS:EVENT"
     assert (
         ret[0].find(".//{http://www.loc.gov/premis/v3}eventType").text
         == "ingestion"
     )
     assert (
         len(
             ret[0].findall(
                 ".//{http://www.loc.gov/premis/v3}linkingAgentIdentifier"
             )
         )
         == 3
     )
     assert ret[1][0].attrib["MDTYPE"] == "PREMIS:EVENT"
     assert (
         ret[1].find(".//{http://www.loc.gov/premis/v3}eventType").text
         == "message digest calculation"
     )
     assert (
         len(
             ret[1].findall(
                 ".//{http://www.loc.gov/premis/v3}linkingAgentIdentifier"
             )
         )
         == 3
     )
     assert ret[2][0].attrib["MDTYPE"] == "PREMIS:EVENT"
     assert (
         ret[2].find(".//{http://www.loc.gov/premis/v3}eventType").text
         == "virus check"
     )
     assert (
         len(
             ret[2].findall(
                 ".//{http://www.loc.gov/premis/v3}linkingAgentIdentifier"
             )
         )
         == 3
     )
     assert ret[3][0].attrib["MDTYPE"] == "PREMIS:EVENT"
     assert (
         ret[3].find(".//{http://www.loc.gov/premis/v3}eventType").text
         == "name cleanup"
     )
     assert (
         len(
             ret[3].findall(
                 ".//{http://www.loc.gov/premis/v3}linkingAgentIdentifier"
             )
         )
         == 3
     )
     assert ret[4][0].attrib["MDTYPE"] == "PREMIS:EVENT"
     assert (
         ret[4].find(".//{http://www.loc.gov/premis/v3}eventType").text
         == "format identification"
     )
     assert (
         len(
             ret[4].findall(
                 ".//{http://www.loc.gov/premis/v3}linkingAgentIdentifier"
             )
         )
         == 3
     )
     assert ret[5][0].attrib["MDTYPE"] == "PREMIS:EVENT"
     assert (
         ret[5].find(".//{http://www.loc.gov/premis/v3}eventType").text
         == "validation"
     )
     assert (
         len(
             ret[5].findall(
                 ".//{http://www.loc.gov/premis/v3}linkingAgentIdentifier"
             )
         )
         == 3
     )
     # Agents
     assert ret[6][0].attrib["MDTYPE"] == "PREMIS:AGENT"
     assert (
         ret[6].find(".//{http://www.loc.gov/premis/v3}agentIdentifierType").text
         == "preservation system"
     )
     assert (
         ret[6].find(".//{http://www.loc.gov/premis/v3}agentIdentifierValue").text
         == "Archivematica-1.4.0"
     )
     assert (
         ret[6].find(".//{http://www.loc.gov/premis/v3}agentName").text
         == "Archivematica"
     )
     assert (
         ret[6].find(".//{http://www.loc.gov/premis/v3}agentType").text == "software"
     )
     assert ret[7][0].attrib["MDTYPE"] == "PREMIS:AGENT"
     assert (
         ret[7].find(".//{http://www.loc.gov/premis/v3}agentIdentifierType").text
         == "repository code"
     )
     assert (
         ret[7].find(".//{http://www.loc.gov/premis/v3}agentIdentifierValue").text
         == "demo"
     )
     assert ret[7].find(".//{http://www.loc.gov/premis/v3}agentName").text == "demo"
     assert (
         ret[7].find(".//{http://www.loc.gov/premis/v3}agentType").text
         == "organization"
     )
     assert ret[8][0].attrib["MDTYPE"] == "PREMIS:AGENT"
     assert (
         ret[8].find(".//{http://www.loc.gov/premis/v3}agentIdentifierType").text
         == "Archivematica user pk"
     )
     assert (
         ret[8].find(".//{http://www.loc.gov/premis/v3}agentIdentifierValue").text
         == "1"
     )
     assert (
         ret[8].find(".//{http://www.loc.gov/premis/v3}agentName").text
         == 'username="******", first_name="Keladry", last_name="Mindelan"'
     )
     assert (
         ret[8].find(".//{http://www.loc.gov/premis/v3}agentType").text
         == "Archivematica user"
     )
コード例 #19
0
    def test_dmdsec_from_csv_parsed_metadata_both(self):
        """It should create a dmdSec for DC and Other parsed metadata."""
        data = collections.OrderedDict(
            [
                ("dc.title", ["Yamani Weapons"]),
                ("dc.contributor", ["雪 ユキ".encode("utf8")]),
                ("dcterms.isPartOf", ["AIC#42"]),
                ("Title", ["Yamani Weapons"]),
                ("Contributor", ["雪 ユキ".encode("utf8")]),
                (
                    "Long Description",
                    ["This is about how glaives are used in the Yamani Islands"],
                ),
            ]
        )
        # Test
        state = create_mets_v2.MetsState()
        ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(
            Job("stub", "stub", []), data, state
        )
        # Verify
        assert ret
        assert len(ret) == 2
        # Return can be DC or OTHER first, but in this case DC should be first
        dc_dmdsec = ret[0]
        assert dc_dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec"
        assert "ID" in dc_dmdsec.attrib
        mdwrap = dc_dmdsec[0]
        assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap"
        assert "MDTYPE" in mdwrap.attrib
        assert mdwrap.attrib["MDTYPE"] == "DC"
        xmldata = mdwrap[0]
        assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData"
        dc_elem = xmldata[0]
        # Elements are children of dublincore tag
        assert dc_elem.tag == "{http://purl.org/dc/terms/}dublincore"
        assert len(dc_elem) == 3
        assert dc_elem[0].tag == "{http://purl.org/dc/elements/1.1/}title"
        assert dc_elem[0].text == "Yamani Weapons"
        assert dc_elem[1].tag == "{http://purl.org/dc/elements/1.1/}contributor"
        assert dc_elem[1].text == "雪 ユキ"
        assert dc_elem[2].tag == "{http://purl.org/dc/terms/}isPartOf"
        assert dc_elem[2].text == "AIC#42"

        other_dmdsec = ret[1]
        assert other_dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec"
        assert "ID" in other_dmdsec.attrib
        mdwrap = other_dmdsec[0]
        assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap"
        assert "MDTYPE" in mdwrap.attrib
        assert mdwrap.attrib["MDTYPE"] == "OTHER"
        assert "OTHERMDTYPE" in mdwrap.attrib
        assert mdwrap.attrib["OTHERMDTYPE"] == "CUSTOM"
        xmldata = mdwrap[0]
        assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData"
        # Elements are direct children of xmlData
        assert len(xmldata) == 3
        assert xmldata[0].tag == "title"
        assert xmldata[0].text == "Yamani Weapons"
        assert xmldata[1].tag == "contributor"
        assert xmldata[1].text == "雪 ユキ"
        assert xmldata[2].tag == "long_description"
        assert (
            xmldata[2].text
            == "This is about how glaives are used in the Yamani Islands"
        )