def test_dmdsec_from_csv_parsed_metadata_both(self): """It should create a dmdSec for DC and Other parsed metadata.""" data = collections.OrderedDict([ ("dc.title", ["Yamani Weapons"]), ("dc.contributor", ["雪 ユキ"]), ("Title", ["Yamani Weapons"]), ("Contributor", ["雪 ユキ"]), ( "Long Description", ["This is about how glaives are used in the Yamani Islands"], ), ]) # Test state = create_mets_v2.MetsState() ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata( Job("stub", "stub", []), data, state) # Verify assert ret assert len(ret) == 2 # Return can be DC or OTHER first, but in this case DC should be first dc_dmdsec = ret[0] assert dc_dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec" assert "ID" in dc_dmdsec.attrib mdwrap = dc_dmdsec[0] assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap" assert "MDTYPE" in mdwrap.attrib assert mdwrap.attrib["MDTYPE"] == "DC" xmldata = mdwrap[0] assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData" dc_elem = xmldata[0] # Elements are children of dublincore tag assert dc_elem.tag == "{http://purl.org/dc/terms/}dublincore" assert len(dc_elem) == 2 assert dc_elem[0].tag == "{http://purl.org/dc/elements/1.1/}title" assert dc_elem[0].text == "Yamani Weapons" assert dc_elem[ 1].tag == "{http://purl.org/dc/elements/1.1/}contributor" assert dc_elem[1].text == "雪 ユキ" other_dmdsec = ret[1] assert other_dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec" assert "ID" in other_dmdsec.attrib mdwrap = other_dmdsec[0] assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap" assert "MDTYPE" in mdwrap.attrib assert mdwrap.attrib["MDTYPE"] == "OTHER" assert "OTHERMDTYPE" in mdwrap.attrib assert mdwrap.attrib["OTHERMDTYPE"] == "CUSTOM" xmldata = mdwrap[0] assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData" # Elements are direct children of xmlData assert len(xmldata) == 3 assert xmldata[0].tag == "title" assert xmldata[0].text == "Yamani Weapons" assert xmldata[1].tag == "contributor" assert xmldata[1].text == "雪 ユキ" assert xmldata[2].tag == "long_description" assert (xmldata[2].text == "This is about how glaives are used in the Yamani Islands")
def test_dmdsec_from_csv_parsed_metadata_no_data(self): """It should not create dmdSecs with no parsed metadata.""" data = {} # Test state = create_mets_v2.MetsState() ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata( Job("stub", "stub", []), data, state) # Verify assert ret == []
def test_create_dc_dmdsec_no_dc_no_transfers_dir(self): """It should not fail if no transfers directory exists.""" badsipuuid = "dnednedn-5bd2-4249-84a1-2f00f725b981" state = create_mets_v2.MetsState() dmdsec_elem = create_mets_v2.createDublincoreDMDSecFromDBData( Job("stub", "stub", []), self.siptypeuuid, badsipuuid, THIS_DIR, state) # Expect no element assert dmdsec_elem is None
def generate_aip_mets_v2_state(self): """Generate fileSec state State will be generated that we will help us to test the units involved with creating a custom structmap in the AIP METS. """ arbitrary_max_structmaps = 10 self.transfer_dir = os.path.join( THIS_DIR, "fixtures", "custom_structmaps", "custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929", "", ) self.objects_dir = os.path.join(self.transfer_dir, "objects") structMap = etree.Element( ns.metsBNS + "structMap", TYPE="physical", ID="structMap_1", LABEL="Archivematica default", ) # Input to create_file_sec: # # <ns0:div xmlns:ns0="http://www.loc.gov/METS/" # LABEL="3-031927e0-63bb-430c-8b37-fc799c132ca9" # TYPE="Directory" # DMDID="dmdSec_1" # /> # sip_dir_name = os.path.basename(self.objects_dir.rstrip(os.path.sep)) structMapDiv = etree.SubElement(structMap, ns.metsBNS + "div", TYPE="Directory", LABEL=sip_dir_name) self.state = create_mets_v2.MetsState() self.state.globalStructMapCounter = random.choice( [x for x in range(arbitrary_max_structmaps)]) self.structmap_div_element = create_mets_v2.createFileSec( job=Job("stub", "stub", []), directoryPath=self.objects_dir, parentDiv=structMapDiv, baseDirectoryPath=self.transfer_dir, baseDirectoryName="%SIPDirectory%", fileGroupIdentifier="3a915449-d1bb-4920-b274-c917c7bb5929", fileGroupType="sip_id", directories={}, state=self.state, includeAmdSec=True, )
def test_dmdsec_from_csv_parsed_metadata_repeats(self): """It should create multiple elements for repeated input.""" data = collections.OrderedDict([("dc.contributor", ["Yuki", "雪 ユキ"]), ("Contributor", ["Yuki", "雪 ユキ"])]) # Test state = create_mets_v2.MetsState() ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata( Job("stub", "stub", []), data, state) # Verify assert ret assert len(ret) == 2 # Return can be DC or OTHER first, but in this case DC should be first dc_dmdsec = ret[0] assert dc_dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec" assert "ID" in dc_dmdsec.attrib mdwrap = dc_dmdsec[0] assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap" assert "MDTYPE" in mdwrap.attrib assert mdwrap.attrib["MDTYPE"] == "DC" xmldata = mdwrap[0] assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData" dc_elem = xmldata[0] # Elements are children of dublincore tag assert dc_elem.tag == "{http://purl.org/dc/terms/}dublincore" assert len(dc_elem) == 2 assert dc_elem[ 0].tag == "{http://purl.org/dc/elements/1.1/}contributor" assert dc_elem[0].text == "Yuki" assert dc_elem[ 1].tag == "{http://purl.org/dc/elements/1.1/}contributor" assert dc_elem[1].text == "雪 ユキ" other_dmdsec = ret[1] assert other_dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec" assert "ID" in other_dmdsec.attrib mdwrap = other_dmdsec[0] assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap" assert "MDTYPE" in mdwrap.attrib assert mdwrap.attrib["MDTYPE"] == "OTHER" assert "OTHERMDTYPE" in mdwrap.attrib assert mdwrap.attrib["OTHERMDTYPE"] == "CUSTOM" xmldata = mdwrap[0] assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData" # Elements are direct children of xmlData assert len(xmldata) == 2 assert xmldata[0].tag == "contributor" assert xmldata[0].text == "Yuki" assert xmldata[1].tag == "contributor" assert xmldata[1].text == "雪 ユキ"
def test_create_dc_dmdsec_no_dc_no_transfers(self): """It should not fail if no dublincore.xml exists from transfers.""" badsipuuid = "dnednedn-5bd2-4249-84a1-2f00f725b981" sip_dir = Path(tempfile.mkdtemp()) / "emptysip" try: shutil.copytree(os.path.join(THIS_DIR, "fixtures", "emptysip"), str(sip_dir)) # Make sure directory is empty (sip_dir / "objects/metadata/transfers/.gitignore").unlink() state = create_mets_v2.MetsState() dmdsec_elem = create_mets_v2.createDublincoreDMDSecFromDBData( Job("stub", "stub", []), self.siptypeuuid, badsipuuid, str(sip_dir), state, ) assert dmdsec_elem is None finally: shutil.rmtree(str(sip_dir.parent))
def test_create_dc_dmdsec_dc_exists(self): """It should create a dmdSec if DC information exists.""" # Generate dmdSec if DC exists state = create_mets_v2.MetsState() dmdsec_elem, dmdid = create_mets_v2.createDublincoreDMDSecFromDBData( Job("stub", "stub", []), self.siptypeuuid, self.sipuuid, THIS_DIR, state) # Verify created correctly assert dmdsec_elem is not None assert dmdsec_elem.tag == "{http://www.loc.gov/METS/}dmdSec" assert dmdsec_elem.attrib["ID"] == dmdid assert len(dmdsec_elem) == 1 mdwrap = dmdsec_elem[0] assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap" assert mdwrap.attrib["MDTYPE"] == "DC" assert len(mdwrap) == 1 xmldata = mdwrap[0] assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData" assert len(xmldata) == 1 assert xmldata[0].tag == "{http://purl.org/dc/terms/}dublincore"
def test_create_rights_granted(self): # Setup elem = etree.Element( "{http://www.loc.gov/premis/v3}rightsStatement", nsmap={"premis": NSMAP["premis"]}, ) statement = RightsStatement.objects.get(id=1) # Test state = create_mets_v2.MetsState() archivematicaCreateMETSRights.getrightsGranted(Job("stub", "stub", []), statement, elem, state) # Verify assert len(elem) == 1 rightsgranted = elem[0] assert rightsgranted.tag == "{http://www.loc.gov/premis/v3}rightsGranted" assert len(rightsgranted.attrib) == 0 assert len(rightsgranted) == 4 assert rightsgranted[0].tag == "{http://www.loc.gov/premis/v3}act" assert rightsgranted[0].text == "Disseminate" assert len(rightsgranted[0].attrib) == 0 assert len(rightsgranted[0]) == 0 assert rightsgranted[ 1].tag == "{http://www.loc.gov/premis/v3}restriction" assert rightsgranted[1].text == "Allow" assert len(rightsgranted[1].attrib) == 0 assert len(rightsgranted[1]) == 0 assert rightsgranted[ 2].tag == "{http://www.loc.gov/premis/v3}termOfGrant" assert len(rightsgranted[2].attrib) == 0 assert len(rightsgranted[2]) == 2 assert rightsgranted[2][ 0].tag == "{http://www.loc.gov/premis/v3}startDate" assert rightsgranted[2][0].text == "2000" assert rightsgranted[2][ 1].tag == "{http://www.loc.gov/premis/v3}endDate" assert rightsgranted[2][1].text == "OPEN" assert rightsgranted[ 3].tag == "{http://www.loc.gov/premis/v3}rightsGrantedNote" assert rightsgranted[3].text == "Attribution required" assert len(rightsgranted[3].attrib) == 0 assert len(rightsgranted[3]) == 0
def test_get_included_structmap_incomplete_mets(self): """Test the output of custom structmaps in create_mets_v2 where the structMap is incomplete. """ self.generate_aip_mets_v2_state() self._fixup_fileid_state() default_structmap = "mets_structmap.xml" Result = collections.namedtuple("Result", "structmap_name structmap_id") results = [ Result("no-contentids.xml", "custom_structmap"), Result("file_does_not_exist.xml", "custom_structmap"), Result("empty_filenames.xml", "custom_structmap"), Result("missing_contentid.xml", "custom_structmap"), ] for res in results: self.state = create_mets_v2.MetsState() structmap_path = os.path.join( self.objects_dir, "metadata", "transfers", "custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51", (default_structmap if not res.structmap_name else res.structmap_name), ) assert os.path.isfile(structmap_path) assert os.path.isfile(self.mets_xsd_path) self.validate_mets(self.mets_xsd_path, structmap_path) custom_structmap = create_mets_v2.include_custom_structmap( job=Job("stub", "stub", []), baseDirectoryPath=self.transfer_dir, state=self.state, custom_structmap=res.structmap_name, ) assert ( custom_structmap == [] ), "Return from include_custom_structmap should be an empty array: {}".format( custom_structmap) assert (self.state.error_accumulator.error_count == 1 ), "error counter should be incremented on error"
def test_dmdsec_from_csv_parsed_metadata_other_only(self): """It should only create an Other dmdSec from parsed metadata.""" data = collections.OrderedDict([ ("Title", ["Yamani Weapons"]), ("Contributor", ["雪 ユキ"]), ( "Long Description", ["This is about how glaives are used in the Yamani Islands"], ), ]) # Test state = create_mets_v2.MetsState() ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata( Job("stub", "stub", []), data, state) # Verify assert ret assert len(ret) == 1 dmdsec = ret[0] assert dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec" assert "ID" in dmdsec.attrib mdwrap = dmdsec[0] assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap" assert "MDTYPE" in mdwrap.attrib assert mdwrap.attrib["MDTYPE"] == "OTHER" assert "OTHERMDTYPE" in mdwrap.attrib assert mdwrap.attrib["OTHERMDTYPE"] == "CUSTOM" xmldata = mdwrap[0] assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData" # Elements are direct children of xmlData assert len(xmldata) == 3 assert xmldata[0].tag == "title" assert xmldata[0].text == "Yamani Weapons" assert xmldata[1].tag == "contributor" assert xmldata[1].text == "雪 ユキ" assert xmldata[2].tag == "long_description" assert (xmldata[2].text == "This is about how glaives are used in the Yamani Islands")
def test_normative_structmap_creation(self): """It should return an etree Element instance.""" state = create_mets_v2.MetsState() normativeStructMap = create_mets_v2.get_normative_structmap( str(self.sip_dir) + os.sep, str(self.sip_object_dir), {}, state) assert isinstance(normativeStructMap, etree._Element)
def test_creates_events(self): """ It should create Events It should create Agents It should link Events only with Agents for that Event It should only include Agents used by that file """ state = create_mets_v2.MetsState() ret = create_mets_v2.createDigiprovMD( "ae8d4290-fe52-4954-b72a-0f591bee2e2f", state) assert len(ret) == 9 # Events assert ret[0][0].attrib["MDTYPE"] == "PREMIS:EVENT" assert (ret[0].find(".//{http://www.loc.gov/premis/v3}eventType").text == "ingestion") assert (len(ret[0].findall( ".//{http://www.loc.gov/premis/v3}linkingAgentIdentifier")) == 3) assert ret[1][0].attrib["MDTYPE"] == "PREMIS:EVENT" assert (ret[1].find(".//{http://www.loc.gov/premis/v3}eventType").text == "message digest calculation") assert (len(ret[1].findall( ".//{http://www.loc.gov/premis/v3}linkingAgentIdentifier")) == 3) assert ret[2][0].attrib["MDTYPE"] == "PREMIS:EVENT" assert (ret[2].find(".//{http://www.loc.gov/premis/v3}eventType").text == "virus check") assert (len(ret[2].findall( ".//{http://www.loc.gov/premis/v3}linkingAgentIdentifier")) == 3) assert ret[3][0].attrib["MDTYPE"] == "PREMIS:EVENT" assert (ret[3].find(".//{http://www.loc.gov/premis/v3}eventType").text == "name cleanup") assert (len(ret[3].findall( ".//{http://www.loc.gov/premis/v3}linkingAgentIdentifier")) == 3) assert ret[4][0].attrib["MDTYPE"] == "PREMIS:EVENT" assert (ret[4].find(".//{http://www.loc.gov/premis/v3}eventType").text == "format identification") assert (len(ret[4].findall( ".//{http://www.loc.gov/premis/v3}linkingAgentIdentifier")) == 3) assert ret[5][0].attrib["MDTYPE"] == "PREMIS:EVENT" assert (ret[5].find(".//{http://www.loc.gov/premis/v3}eventType").text == "validation") assert (len(ret[5].findall( ".//{http://www.loc.gov/premis/v3}linkingAgentIdentifier")) == 3) # Agents assert ret[6][0].attrib["MDTYPE"] == "PREMIS:AGENT" assert ( ret[6].find(".//{http://www.loc.gov/premis/v3}agentIdentifierType" ).text == "preservation system") assert ( ret[6].find(".//{http://www.loc.gov/premis/v3}agentIdentifierValue" ).text == "Archivematica-1.4.0") assert (ret[6].find(".//{http://www.loc.gov/premis/v3}agentName").text == "Archivematica") assert (ret[6].find(".//{http://www.loc.gov/premis/v3}agentType").text == "software") assert ret[7][0].attrib["MDTYPE"] == "PREMIS:AGENT" assert ( ret[7].find(".//{http://www.loc.gov/premis/v3}agentIdentifierType" ).text == "repository code") assert ( ret[7].find(".//{http://www.loc.gov/premis/v3}agentIdentifierValue" ).text == "demo") assert ret[7].find( ".//{http://www.loc.gov/premis/v3}agentName").text == "demo" assert (ret[7].find(".//{http://www.loc.gov/premis/v3}agentType").text == "organization")
def test_dmdsec_from_csv_parsed_metadata_dc_only(self): """It should only create a DC dmdSec from parsed metadata.""" data = collections.OrderedDict([ ("dc.title", ["Yamani Weapons"]), ("dc.creator", ["Keladry of Mindelan"]), ("dc.subject", ["Glaives"]), ("dc.description", ["Glaives are cool"]), ("dc.publisher", ["Tortall Press"]), ("dc.contributor", ["雪 ユキ"]), ("dc.date", ["2015"]), ("dc.type", ["Archival Information Package"]), ("dc.format", ["parchement"]), ("dc.identifier", ["42/1"]), ("dc.source", ["Numair's library"]), ("dc.relation", ["None"]), ("dc.language", ["en"]), ("dc.rights", ["Public Domain"]), ]) # Test state = create_mets_v2.MetsState() ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata( Job("stub", "stub", []), data, state) # Verify assert ret assert len(ret) == 1 dmdsec = ret[0] assert dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec" assert "ID" in dmdsec.attrib mdwrap = dmdsec[0] assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap" assert "MDTYPE" in mdwrap.attrib assert mdwrap.attrib["MDTYPE"] == "DC" xmldata = mdwrap[0] assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData" # Elements are children of dublincore tag dc_elem = xmldata[0] assert dc_elem.tag == "{http://purl.org/dc/terms/}dublincore" assert len(dc_elem) == 14 assert dc_elem[0].tag == "{http://purl.org/dc/elements/1.1/}title" assert dc_elem[0].text == "Yamani Weapons" assert dc_elem[1].tag == "{http://purl.org/dc/elements/1.1/}creator" assert dc_elem[1].text == "Keladry of Mindelan" assert dc_elem[2].tag == "{http://purl.org/dc/elements/1.1/}subject" assert dc_elem[2].text == "Glaives" assert dc_elem[ 3].tag == "{http://purl.org/dc/elements/1.1/}description" assert dc_elem[3].text == "Glaives are cool" assert dc_elem[4].tag == "{http://purl.org/dc/elements/1.1/}publisher" assert dc_elem[4].text == "Tortall Press" assert dc_elem[ 5].tag == "{http://purl.org/dc/elements/1.1/}contributor" assert dc_elem[5].text == "雪 ユキ" assert dc_elem[6].tag == "{http://purl.org/dc/elements/1.1/}date" assert dc_elem[6].text == "2015" assert dc_elem[7].tag == "{http://purl.org/dc/elements/1.1/}type" assert dc_elem[7].text == "Archival Information Package" assert dc_elem[8].tag == "{http://purl.org/dc/elements/1.1/}format" assert dc_elem[8].text == "parchement" assert dc_elem[9].tag == "{http://purl.org/dc/elements/1.1/}identifier" assert dc_elem[9].text == "42/1" assert dc_elem[10].tag == "{http://purl.org/dc/elements/1.1/}source" assert dc_elem[10].text == "Numair's library" assert dc_elem[11].tag == "{http://purl.org/dc/elements/1.1/}relation" assert dc_elem[11].text == "None" assert dc_elem[12].tag == "{http://purl.org/dc/elements/1.1/}language" assert dc_elem[12].text == "en" assert dc_elem[13].tag == "{http://purl.org/dc/elements/1.1/}rights" assert dc_elem[13].text == "Public Domain"