def test_create_dc_dmdsec_no_dc_no_transfers_dir(self): """It should not fail if no transfers directory exists.""" badsipuuid = 'dnednedn-5bd2-4249-84a1-2f00f725b981' state = create_mets_v2.MetsState() dmdsec_elem = create_mets_v2.createDublincoreDMDSecFromDBData(Job("stub", "stub", []), self.siptypeuuid, badsipuuid, THIS_DIR, state) # Expect no element assert dmdsec_elem is None
def test_normative_structmap_creation(self): """It should return an etree Element instance.""" state = create_mets_v2.MetsState() normativeStructMap = create_mets_v2.get_normative_structmap( str(self.sip_dir) + os.sep, str(self.sip_object_dir), {}, state ) assert isinstance(normativeStructMap, etree._Element)
def test_create_rights_granted(self): # Setup elem = etree.Element("{info:lc/xmlns/premis-v2}rightsStatement", nsmap={'premis': NSMAP['premis']}) statement = RightsStatement.objects.get(id=1) # Test state = create_mets_v2.MetsState() archivematicaCreateMETSRights.getrightsGranted(Job("stub", "stub", []), statement, elem, state) # Verify assert len(elem) == 1 rightsgranted = elem[0] assert rightsgranted.tag == '{info:lc/xmlns/premis-v2}rightsGranted' assert len(rightsgranted.attrib) == 0 assert len(rightsgranted) == 4 assert rightsgranted[0].tag == '{info:lc/xmlns/premis-v2}act' assert rightsgranted[0].text == 'Disseminate' assert len(rightsgranted[0].attrib) == 0 assert len(rightsgranted[0]) == 0 assert rightsgranted[1].tag == '{info:lc/xmlns/premis-v2}restriction' assert rightsgranted[1].text == 'Allow' assert len(rightsgranted[1].attrib) == 0 assert len(rightsgranted[1]) == 0 assert rightsgranted[2].tag == '{info:lc/xmlns/premis-v2}termOfGrant' assert len(rightsgranted[2].attrib) == 0 assert len(rightsgranted[2]) == 2 assert rightsgranted[2][0].tag == '{info:lc/xmlns/premis-v2}startDate' assert rightsgranted[2][0].text == '2000' assert rightsgranted[2][1].tag == '{info:lc/xmlns/premis-v2}endDate' assert rightsgranted[2][1].text == 'OPEN' assert rightsgranted[3].tag == '{info:lc/xmlns/premis-v2}rightsGrantedNote' assert rightsgranted[3].text == 'Attribution required' assert len(rightsgranted[3].attrib) == 0 assert len(rightsgranted[3]) == 0
def test_dmdsec_from_csv_parsed_metadata_other_only(self): """It should only create an Other dmdSec from parsed metadata.""" data = collections.OrderedDict([ ("Title", ["Yamani Weapons"]), ("Contributor", [u"雪 ユキ".encode('utf8')]), ("Long Description", ['This is about how glaives are used in the Yamani Islands']) ]) # Test state = create_mets_v2.MetsState() ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(Job("stub", "stub", []), data, state) # Verify assert ret assert len(ret) == 1 dmdsec = ret[0] assert dmdsec.tag == '{http://www.loc.gov/METS/}dmdSec' assert 'ID' in dmdsec.attrib mdwrap = dmdsec[0] assert mdwrap.tag == '{http://www.loc.gov/METS/}mdWrap' assert 'MDTYPE' in mdwrap.attrib assert mdwrap.attrib['MDTYPE'] == 'OTHER' assert 'OTHERMDTYPE' in mdwrap.attrib assert mdwrap.attrib['OTHERMDTYPE'] == 'CUSTOM' xmldata = mdwrap[0] assert xmldata.tag == '{http://www.loc.gov/METS/}xmlData' # Elements are direct children of xmlData assert len(xmldata) == 3 assert xmldata[0].tag == 'title' assert xmldata[0].text == 'Yamani Weapons' assert xmldata[1].tag == 'contributor' assert xmldata[1].text == u'雪 ユキ' assert xmldata[2].tag == 'long_description' assert xmldata[2].text == 'This is about how glaives are used in the Yamani Islands'
def test_dmdsec_from_csv_parsed_metadata_no_data(self): """It should not create dmdSecs with no parsed metadata.""" data = {} # Test state = create_mets_v2.MetsState() ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(Job("stub", "stub", []), data, state) # Verify assert ret == []
def test_dmdsec_from_csv_parsed_metadata_both(self): """It should create a dmdSec for DC and Other parsed metadata.""" data = collections.OrderedDict([ ("dc.title", ["Yamani Weapons"]), ("dc.contributor", [u"雪 ユキ".encode('utf8')]), ("dcterms.isPartOf", ["AIC#42"]), ("Title", ["Yamani Weapons"]), ("Contributor", [u"雪 ユキ".encode('utf8')]), ("Long Description", ['This is about how glaives are used in the Yamani Islands']) ]) # Test state = create_mets_v2.MetsState() ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata(Job("stub", "stub", []), data, state) # Verify assert ret assert len(ret) == 2 # Return can be DC or OTHER first, but in this case DC should be first dc_dmdsec = ret[0] assert dc_dmdsec.tag == '{http://www.loc.gov/METS/}dmdSec' assert 'ID' in dc_dmdsec.attrib mdwrap = dc_dmdsec[0] assert mdwrap.tag == '{http://www.loc.gov/METS/}mdWrap' assert 'MDTYPE' in mdwrap.attrib assert mdwrap.attrib['MDTYPE'] == 'DC' xmldata = mdwrap[0] assert xmldata.tag == '{http://www.loc.gov/METS/}xmlData' dc_elem = xmldata[0] # Elements are children of dublincore tag assert dc_elem.tag == '{http://purl.org/dc/terms/}dublincore' assert len(dc_elem) == 3 assert dc_elem[0].tag == '{http://purl.org/dc/elements/1.1/}title' assert dc_elem[0].text == 'Yamani Weapons' assert dc_elem[1].tag == '{http://purl.org/dc/elements/1.1/}contributor' assert dc_elem[1].text == u'雪 ユキ' assert dc_elem[2].tag == '{http://purl.org/dc/terms/}isPartOf' assert dc_elem[2].text == 'AIC#42' other_dmdsec = ret[1] assert other_dmdsec.tag == '{http://www.loc.gov/METS/}dmdSec' assert 'ID' in other_dmdsec.attrib mdwrap = other_dmdsec[0] assert mdwrap.tag == '{http://www.loc.gov/METS/}mdWrap' assert 'MDTYPE' in mdwrap.attrib assert mdwrap.attrib['MDTYPE'] == 'OTHER' assert 'OTHERMDTYPE' in mdwrap.attrib assert mdwrap.attrib['OTHERMDTYPE'] == 'CUSTOM' xmldata = mdwrap[0] assert xmldata.tag == '{http://www.loc.gov/METS/}xmlData' # Elements are direct children of xmlData assert len(xmldata) == 3 assert xmldata[0].tag == 'title' assert xmldata[0].text == 'Yamani Weapons' assert xmldata[1].tag == 'contributor' assert xmldata[1].text == u'雪 ユキ' assert xmldata[2].tag == 'long_description' assert xmldata[2].text == 'This is about how glaives are used in the Yamani Islands'
def test_dmdsec_from_csv_parsed_metadata_repeats(self): """It should create multiple elements for repeated input.""" data = collections.OrderedDict( [ ("dc.contributor", ["Yuki", "雪 ユキ".encode("utf8")]), ("Contributor", ["Yuki", "雪 ユキ".encode("utf8")]), ] ) # Test state = create_mets_v2.MetsState() ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata( Job("stub", "stub", []), data, state ) # Verify assert ret assert len(ret) == 2 # Return can be DC or OTHER first, but in this case DC should be first dc_dmdsec = ret[0] assert dc_dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec" assert "ID" in dc_dmdsec.attrib mdwrap = dc_dmdsec[0] assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap" assert "MDTYPE" in mdwrap.attrib assert mdwrap.attrib["MDTYPE"] == "DC" xmldata = mdwrap[0] assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData" dc_elem = xmldata[0] # Elements are children of dublincore tag assert dc_elem.tag == "{http://purl.org/dc/terms/}dublincore" assert len(dc_elem) == 2 assert dc_elem[0].tag == "{http://purl.org/dc/elements/1.1/}contributor" assert dc_elem[0].text == "Yuki" assert dc_elem[1].tag == "{http://purl.org/dc/elements/1.1/}contributor" assert dc_elem[1].text == "雪 ユキ" other_dmdsec = ret[1] assert other_dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec" assert "ID" in other_dmdsec.attrib mdwrap = other_dmdsec[0] assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap" assert "MDTYPE" in mdwrap.attrib assert mdwrap.attrib["MDTYPE"] == "OTHER" assert "OTHERMDTYPE" in mdwrap.attrib assert mdwrap.attrib["OTHERMDTYPE"] == "CUSTOM" xmldata = mdwrap[0] assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData" # Elements are direct children of xmlData assert len(xmldata) == 2 assert xmldata[0].tag == "contributor" assert xmldata[0].text == "Yuki" assert xmldata[1].tag == "contributor" assert xmldata[1].text == "雪 ユキ"
def generate_aip_mets_v2_state(self): """Generate fileSec state State will be generated that we will help us to test the units involved with creating a custom structmap in the AIP METS. """ arbitrary_max_structmaps = 10 self.transfer_dir = os.path.join( THIS_DIR, "fixtures", "custom_structmaps", "custom-structmap-3a915449-d1bb-4920-b274-c917c7bb5929", "", ) self.objects_dir = os.path.join(self.transfer_dir, "objects") structMap = etree.Element( ns.metsBNS + "structMap", TYPE="physical", ID="structMap_1", LABEL="Archivematica default", ) # Input to create_file_sec: # # <ns0:div xmlns:ns0="http://www.loc.gov/METS/" # LABEL="3-031927e0-63bb-430c-8b37-fc799c132ca9" # TYPE="Directory" # DMDID="dmdSec_1" # /> # sip_dir_name = os.path.basename(self.objects_dir.rstrip(os.path.sep)) structMapDiv = etree.SubElement( structMap, ns.metsBNS + "div", TYPE="Directory", LABEL=sip_dir_name ) self.state = create_mets_v2.MetsState() self.state.globalStructMapCounter = random.choice( [x for x in range(arbitrary_max_structmaps)] ) self.structmap_div_element = create_mets_v2.createFileSec( job=Job("stub", "stub", []), directoryPath=self.objects_dir, parentDiv=structMapDiv, baseDirectoryPath=self.transfer_dir, baseDirectoryName="%SIPDirectory%", fileGroupIdentifier="3a915449-d1bb-4920-b274-c917c7bb5929", fileGroupType="sip_id", directories={}, state=self.state, includeAmdSec=True, )
def test_create_dc_dmdsec_no_dc_no_transfers(self): """It should not fail if no dublincore.xml exists from transfers.""" badsipuuid = 'dnednedn-5bd2-4249-84a1-2f00f725b981' empty_transfers_sip = os.path.join(THIS_DIR, 'fixtures', 'emptysip') state = create_mets_v2.MetsState() # Make sure directory is empty try: os.remove(os.path.join(empty_transfers_sip, 'objects', 'metadata', 'transfers', '.gitignore')) except OSError: pass dmdsec_elem = create_mets_v2.createDublincoreDMDSecFromDBData(Job("stub", "stub", []), self.siptypeuuid, badsipuuid, empty_transfers_sip, state) assert dmdsec_elem is None # Reset directory state with open(os.path.join(empty_transfers_sip, 'objects', 'metadata', 'transfers', '.gitignore'), 'w'): pass
def test_creates_events(self): """ It should create Events It should create Agents It should link Events only with Agents for that Event It should only include Agents used by that file """ state = create_mets_v2.MetsState() ret = create_mets_v2.createDigiprovMD("ae8d4290-fe52-4954-b72a-0f591bee2e2f", state) assert len(ret) == 9 # Events assert ret[0][0].attrib['MDTYPE'] == 'PREMIS:EVENT' assert ret[0].find('.//{info:lc/xmlns/premis-v2}eventType').text == 'ingestion' assert len(ret[0].findall('.//{info:lc/xmlns/premis-v2}linkingAgentIdentifier')) == 3 assert ret[1][0].attrib['MDTYPE'] == 'PREMIS:EVENT' assert ret[1].find('.//{info:lc/xmlns/premis-v2}eventType').text == 'message digest calculation' assert len(ret[1].findall('.//{info:lc/xmlns/premis-v2}linkingAgentIdentifier')) == 3 assert ret[2][0].attrib['MDTYPE'] == 'PREMIS:EVENT' assert ret[2].find('.//{info:lc/xmlns/premis-v2}eventType').text == 'virus check' assert len(ret[2].findall('.//{info:lc/xmlns/premis-v2}linkingAgentIdentifier')) == 3 assert ret[3][0].attrib['MDTYPE'] == 'PREMIS:EVENT' assert ret[3].find('.//{info:lc/xmlns/premis-v2}eventType').text == 'name cleanup' assert len(ret[3].findall('.//{info:lc/xmlns/premis-v2}linkingAgentIdentifier')) == 3 assert ret[4][0].attrib['MDTYPE'] == 'PREMIS:EVENT' assert ret[4].find('.//{info:lc/xmlns/premis-v2}eventType').text == 'format identification' assert len(ret[4].findall('.//{info:lc/xmlns/premis-v2}linkingAgentIdentifier')) == 3 assert ret[5][0].attrib['MDTYPE'] == 'PREMIS:EVENT' assert ret[5].find('.//{info:lc/xmlns/premis-v2}eventType').text == 'validation' assert len(ret[5].findall('.//{info:lc/xmlns/premis-v2}linkingAgentIdentifier')) == 3 # Agents assert ret[6][0].attrib['MDTYPE'] == 'PREMIS:AGENT' assert ret[6].find('.//{info:lc/xmlns/premis-v2}agentIdentifierType').text == 'preservation system' assert ret[6].find('.//{info:lc/xmlns/premis-v2}agentIdentifierValue').text == 'Archivematica-1.4.0' assert ret[6].find('.//{info:lc/xmlns/premis-v2}agentName').text == 'Archivematica' assert ret[6].find('.//{info:lc/xmlns/premis-v2}agentType').text == 'software' assert ret[7][0].attrib['MDTYPE'] == 'PREMIS:AGENT' assert ret[7].find('.//{info:lc/xmlns/premis-v2}agentIdentifierType').text == 'repository code' assert ret[7].find('.//{info:lc/xmlns/premis-v2}agentIdentifierValue').text == 'demo' assert ret[7].find('.//{info:lc/xmlns/premis-v2}agentName').text == 'demo' assert ret[7].find('.//{info:lc/xmlns/premis-v2}agentType').text == 'organization' assert ret[8][0].attrib['MDTYPE'] == 'PREMIS:AGENT' assert ret[8].find('.//{info:lc/xmlns/premis-v2}agentIdentifierType').text == 'Archivematica user pk' assert ret[8].find('.//{info:lc/xmlns/premis-v2}agentIdentifierValue').text == '1' assert ret[8].find('.//{info:lc/xmlns/premis-v2}agentName').text == 'username="******", first_name="Keladry", last_name="Mindelan"' assert ret[8].find('.//{info:lc/xmlns/premis-v2}agentType').text == 'Archivematica user'
def test_create_dc_dmdsec_dc_exists(self): """It should create a dmdSec if DC information exists.""" # Generate dmdSec if DC exists state = create_mets_v2.MetsState() dmdsec_elem, dmdid = create_mets_v2.createDublincoreDMDSecFromDBData(Job("stub", "stub", []), self.siptypeuuid, self.sipuuid, THIS_DIR, state) # Verify created correctly assert dmdsec_elem is not None assert dmdsec_elem.tag == '{http://www.loc.gov/METS/}dmdSec' assert dmdsec_elem.attrib['ID'] == dmdid assert len(dmdsec_elem) == 1 mdwrap = dmdsec_elem[0] assert mdwrap.tag == '{http://www.loc.gov/METS/}mdWrap' assert mdwrap.attrib['MDTYPE'] == 'DC' assert len(mdwrap) == 1 xmldata = mdwrap[0] assert xmldata.tag == '{http://www.loc.gov/METS/}xmlData' assert len(xmldata) == 1 assert xmldata[0].tag == '{http://purl.org/dc/terms/}dublincore'
def test_dmdsec_from_csv_parsed_metadata_other_only(self): """It should only create an Other dmdSec from parsed metadata.""" data = collections.OrderedDict( [ ("Title", ["Yamani Weapons"]), ("Contributor", ["雪 ユキ".encode("utf8")]), ( "Long Description", ["This is about how glaives are used in the Yamani Islands"], ), ] ) # Test state = create_mets_v2.MetsState() ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata( Job("stub", "stub", []), data, state ) # Verify assert ret assert len(ret) == 1 dmdsec = ret[0] assert dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec" assert "ID" in dmdsec.attrib mdwrap = dmdsec[0] assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap" assert "MDTYPE" in mdwrap.attrib assert mdwrap.attrib["MDTYPE"] == "OTHER" assert "OTHERMDTYPE" in mdwrap.attrib assert mdwrap.attrib["OTHERMDTYPE"] == "CUSTOM" xmldata = mdwrap[0] assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData" # Elements are direct children of xmlData assert len(xmldata) == 3 assert xmldata[0].tag == "title" assert xmldata[0].text == "Yamani Weapons" assert xmldata[1].tag == "contributor" assert xmldata[1].text == "雪 ユキ" assert xmldata[2].tag == "long_description" assert ( xmldata[2].text == "This is about how glaives are used in the Yamani Islands" )
def test_create_dc_dmdsec_no_dc_no_transfers(self): """It should not fail if no dublincore.xml exists from transfers.""" badsipuuid = "dnednedn-5bd2-4249-84a1-2f00f725b981" sip_dir = Path(tempfile.mkdtemp()) / "emptysip" try: shutil.copytree( os.path.join(THIS_DIR, "fixtures", "emptysip"), str(sip_dir) ) # Make sure directory is empty (sip_dir / "objects/metadata/transfers/.gitignore").unlink() state = create_mets_v2.MetsState() dmdsec_elem = create_mets_v2.createDublincoreDMDSecFromDBData( Job("stub", "stub", []), self.siptypeuuid, badsipuuid, str(sip_dir), state, ) assert dmdsec_elem is None finally: shutil.rmtree(str(sip_dir.parent))
def test_get_included_structmap_incomplete_mets(self): """Test the output of custom structmaps in create_mets_v2 where the structMap is incomplete. """ self.generate_aip_mets_v2_state() self._fixup_fileid_state() default_structmap = "mets_structmap.xml" Result = collections.namedtuple("Result", "structmap_name structmap_id") results = [ Result("no-contentids.xml", "custom_structmap"), Result("file_does_not_exist.xml", "custom_structmap"), Result("empty_filenames.xml", "custom_structmap"), Result("missing_contentid.xml", "custom_structmap"), ] for res in results: self.state = create_mets_v2.MetsState() structmap_path = os.path.join( self.objects_dir, "metadata", "transfers", "custom-structmap-41ab1f1a-34d0-4a83-a2a3-0ad1b1ee1c51", (default_structmap if not res.structmap_name else res.structmap_name), ) assert os.path.isfile(structmap_path) assert os.path.isfile(self.mets_xsd_path) self.validate_mets(self.mets_xsd_path, structmap_path) custom_structmap = create_mets_v2.include_custom_structmap( job=Job("stub", "stub", []), baseDirectoryPath=self.transfer_dir, state=self.state, custom_structmap=res.structmap_name, ) assert ( custom_structmap == [] ), "Return from include_custom_structmap should be an empty array: {}".format( custom_structmap ) assert ( self.state.error_accumulator.error_count == 1 ), "error counter should be incremented on error"
def test_create_rights_granted(self): # Setup elem = etree.Element( "{http://www.loc.gov/premis/v3}rightsStatement", nsmap={"premis": NSMAP["premis"]}, ) statement = RightsStatement.objects.get(id=1) # Test state = create_mets_v2.MetsState() archivematicaCreateMETSRights.getrightsGranted( Job("stub", "stub", []), statement, elem, state ) # Verify assert len(elem) == 1 rightsgranted = elem[0] assert rightsgranted.tag == "{http://www.loc.gov/premis/v3}rightsGranted" assert len(rightsgranted.attrib) == 0 assert len(rightsgranted) == 4 assert rightsgranted[0].tag == "{http://www.loc.gov/premis/v3}act" assert rightsgranted[0].text == "Disseminate" assert len(rightsgranted[0].attrib) == 0 assert len(rightsgranted[0]) == 0 assert rightsgranted[1].tag == "{http://www.loc.gov/premis/v3}restriction" assert rightsgranted[1].text == "Allow" assert len(rightsgranted[1].attrib) == 0 assert len(rightsgranted[1]) == 0 assert rightsgranted[2].tag == "{http://www.loc.gov/premis/v3}termOfGrant" assert len(rightsgranted[2].attrib) == 0 assert len(rightsgranted[2]) == 2 assert rightsgranted[2][0].tag == "{http://www.loc.gov/premis/v3}startDate" assert rightsgranted[2][0].text == "2000" assert rightsgranted[2][1].tag == "{http://www.loc.gov/premis/v3}endDate" assert rightsgranted[2][1].text == "OPEN" assert rightsgranted[3].tag == "{http://www.loc.gov/premis/v3}rightsGrantedNote" assert rightsgranted[3].text == "Attribution required" assert len(rightsgranted[3].attrib) == 0 assert len(rightsgranted[3]) == 0
def add_new_files(job, mets, sip_uuid, sip_dir): """ Add new files to structMap, fileSec. This supports adding new metadata or preservation files. If a new file is a metadata.csv, parse it to create dmdSecs. """ # Find new files # How tell new file from old with same name? Check hash? # QUESTION should the metadata.csv be parsed and only updated if different # even if one already existed? new_files = [] old_mets_rel_path = _get_old_mets_rel_path(sip_uuid) metadata_csv = None objects_dir = os.path.join(sip_dir, "objects") for dirpath, _, filenames in os.walk(objects_dir): for filename in filenames: # Find in METS current_loc = os.path.join(dirpath, filename).replace( sip_dir, "%SIPDirectory%", 1 ) rel_path = current_loc.replace("%SIPDirectory%", "", 1) job.pyprint("Looking for", rel_path, "in METS") fsentry = mets.get_file(path=rel_path) if fsentry is None: # If not in METS (and is not old METS), get File object and # store for later if rel_path != old_mets_rel_path: job.pyprint(rel_path, "not found in METS, must be new file") f = models.File.objects.get( currentlocation=current_loc, sip_id=sip_uuid ) new_files.append(f) if rel_path == "objects/metadata/metadata.csv": metadata_csv = f else: job.pyprint(rel_path, "found in METS, no further work needed") if not new_files: return mets # Set global counters so getAMDSec will work state = createmets2.MetsState( globalAmdSecCounter=metsrw.AMDSec.get_current_id_count(), globalTechMDCounter=metsrw.SubSection.get_current_id_count("techMD"), globalDigiprovMDCounter=metsrw.SubSection.get_current_id_count("digiprovMD"), ) objects_fsentry = mets.get_file(label="objects", type="Directory") for f in new_files: # Create amdSecs job.pyprint("Adding amdSec for", f.currentlocation, "(", f.uuid, ")") amdsec, amdid = createmets2.getAMDSec( job, fileUUID=f.uuid, filePath=None, # Only needed if use=original use=f.filegrpuse, sip_uuid=sip_uuid, transferUUID=None, # Only needed if use=original itemdirectoryPath=None, # Only needed if use=original typeOfTransfer=None, # Only needed if use=original baseDirectoryPath=sip_dir, state=state, ) job.pyprint(f.uuid, "has amdSec with ID", amdid) # Create parent directories if needed dirs = os.path.dirname( f.currentlocation.replace("%SIPDirectory%objects/", "", 1) ).split("/") parent_fsentry = objects_fsentry for dirname in (d for d in dirs if d): child = mets.get_file(type="Directory", label=dirname) if child is None: child = metsrw.FSEntry(path=None, type="Directory", label=dirname) parent_fsentry.add_child(child) parent_fsentry = child derived_from = None if f.original_file_set.exists(): original_f = f.original_file_set.get().source_file derived_from = mets.get_file(file_uuid=original_f.uuid) entry = metsrw.FSEntry( path=f.currentlocation.replace("%SIPDirectory%", "", 1), use=f.filegrpuse, type="Item", file_uuid=f.uuid, derived_from=derived_from, ) metsrw_amdsec = metsrw.AMDSec(tree=amdsec, section_id=amdid) entry.amdsecs.append(metsrw_amdsec) parent_fsentry.add_child(entry) # Parse metadata.csv and add dmdSecs if metadata_csv: mets = update_metadata_csv(job, mets, metadata_csv, sip_uuid, sip_dir, state) return mets
def test_dmdsec_from_csv_parsed_metadata_dc_only(self): """It should only create a DC dmdSec from parsed metadata.""" data = collections.OrderedDict( [ ("dc.title", ["Yamani Weapons"]), ("dc.creator", ["Keladry of Mindelan"]), ("dc.subject", ["Glaives"]), ("dc.description", ["Glaives are cool"]), ("dc.publisher", ["Tortall Press"]), ("dc.contributor", ["雪 ユキ".encode("utf8")]), ("dc.date", ["2015"]), ("dc.type", ["Archival Information Package"]), ("dc.format", ["parchement"]), ("dc.identifier", ["42/1"]), ("dc.source", ["Numair's library"]), ("dc.relation", ["None"]), ("dc.language", ["en"]), ("dc.rights", ["Public Domain"]), ("dcterms.isPartOf", ["AIC#42"]), ] ) # Test state = create_mets_v2.MetsState() ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata( Job("stub", "stub", []), data, state ) # Verify assert ret assert len(ret) == 1 dmdsec = ret[0] assert dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec" assert "ID" in dmdsec.attrib mdwrap = dmdsec[0] assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap" assert "MDTYPE" in mdwrap.attrib assert mdwrap.attrib["MDTYPE"] == "DC" xmldata = mdwrap[0] assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData" # Elements are children of dublincore tag dc_elem = xmldata[0] assert dc_elem.tag == "{http://purl.org/dc/terms/}dublincore" assert len(dc_elem) == 15 assert dc_elem[0].tag == "{http://purl.org/dc/elements/1.1/}title" assert dc_elem[0].text == "Yamani Weapons" assert dc_elem[1].tag == "{http://purl.org/dc/elements/1.1/}creator" assert dc_elem[1].text == "Keladry of Mindelan" assert dc_elem[2].tag == "{http://purl.org/dc/elements/1.1/}subject" assert dc_elem[2].text == "Glaives" assert dc_elem[3].tag == "{http://purl.org/dc/elements/1.1/}description" assert dc_elem[3].text == "Glaives are cool" assert dc_elem[4].tag == "{http://purl.org/dc/elements/1.1/}publisher" assert dc_elem[4].text == "Tortall Press" assert dc_elem[5].tag == "{http://purl.org/dc/elements/1.1/}contributor" assert dc_elem[5].text == "雪 ユキ" assert dc_elem[6].tag == "{http://purl.org/dc/elements/1.1/}date" assert dc_elem[6].text == "2015" assert dc_elem[7].tag == "{http://purl.org/dc/elements/1.1/}type" assert dc_elem[7].text == "Archival Information Package" assert dc_elem[8].tag == "{http://purl.org/dc/elements/1.1/}format" assert dc_elem[8].text == "parchement" assert dc_elem[9].tag == "{http://purl.org/dc/elements/1.1/}identifier" assert dc_elem[9].text == "42/1" assert dc_elem[10].tag == "{http://purl.org/dc/elements/1.1/}source" assert dc_elem[10].text == "Numair's library" assert dc_elem[11].tag == "{http://purl.org/dc/elements/1.1/}relation" assert dc_elem[11].text == "None" assert dc_elem[12].tag == "{http://purl.org/dc/elements/1.1/}language" assert dc_elem[12].text == "en" assert dc_elem[13].tag == "{http://purl.org/dc/elements/1.1/}rights" assert dc_elem[13].text == "Public Domain" assert dc_elem[14].tag == "{http://purl.org/dc/terms/}isPartOf" assert dc_elem[14].text == "AIC#42"
def test_creates_events(self): """ It should create Events It should create Agents It should link Events only with Agents for that Event It should only include Agents used by that file """ state = create_mets_v2.MetsState() ret = create_mets_v2.createDigiprovMD( "ae8d4290-fe52-4954-b72a-0f591bee2e2f", state ) assert len(ret) == 9 # Events assert ret[0][0].attrib["MDTYPE"] == "PREMIS:EVENT" assert ( ret[0].find(".//{http://www.loc.gov/premis/v3}eventType").text == "ingestion" ) assert ( len( ret[0].findall( ".//{http://www.loc.gov/premis/v3}linkingAgentIdentifier" ) ) == 3 ) assert ret[1][0].attrib["MDTYPE"] == "PREMIS:EVENT" assert ( ret[1].find(".//{http://www.loc.gov/premis/v3}eventType").text == "message digest calculation" ) assert ( len( ret[1].findall( ".//{http://www.loc.gov/premis/v3}linkingAgentIdentifier" ) ) == 3 ) assert ret[2][0].attrib["MDTYPE"] == "PREMIS:EVENT" assert ( ret[2].find(".//{http://www.loc.gov/premis/v3}eventType").text == "virus check" ) assert ( len( ret[2].findall( ".//{http://www.loc.gov/premis/v3}linkingAgentIdentifier" ) ) == 3 ) assert ret[3][0].attrib["MDTYPE"] == "PREMIS:EVENT" assert ( ret[3].find(".//{http://www.loc.gov/premis/v3}eventType").text == "name cleanup" ) assert ( len( ret[3].findall( ".//{http://www.loc.gov/premis/v3}linkingAgentIdentifier" ) ) == 3 ) assert ret[4][0].attrib["MDTYPE"] == "PREMIS:EVENT" assert ( ret[4].find(".//{http://www.loc.gov/premis/v3}eventType").text == "format identification" ) assert ( len( ret[4].findall( ".//{http://www.loc.gov/premis/v3}linkingAgentIdentifier" ) ) == 3 ) assert ret[5][0].attrib["MDTYPE"] == "PREMIS:EVENT" assert ( ret[5].find(".//{http://www.loc.gov/premis/v3}eventType").text == "validation" ) assert ( len( ret[5].findall( ".//{http://www.loc.gov/premis/v3}linkingAgentIdentifier" ) ) == 3 ) # Agents assert ret[6][0].attrib["MDTYPE"] == "PREMIS:AGENT" assert ( ret[6].find(".//{http://www.loc.gov/premis/v3}agentIdentifierType").text == "preservation system" ) assert ( ret[6].find(".//{http://www.loc.gov/premis/v3}agentIdentifierValue").text == "Archivematica-1.4.0" ) assert ( ret[6].find(".//{http://www.loc.gov/premis/v3}agentName").text == "Archivematica" ) assert ( ret[6].find(".//{http://www.loc.gov/premis/v3}agentType").text == "software" ) assert ret[7][0].attrib["MDTYPE"] == "PREMIS:AGENT" assert ( ret[7].find(".//{http://www.loc.gov/premis/v3}agentIdentifierType").text == "repository code" ) assert ( ret[7].find(".//{http://www.loc.gov/premis/v3}agentIdentifierValue").text == "demo" ) assert ret[7].find(".//{http://www.loc.gov/premis/v3}agentName").text == "demo" assert ( ret[7].find(".//{http://www.loc.gov/premis/v3}agentType").text == "organization" ) assert ret[8][0].attrib["MDTYPE"] == "PREMIS:AGENT" assert ( ret[8].find(".//{http://www.loc.gov/premis/v3}agentIdentifierType").text == "Archivematica user pk" ) assert ( ret[8].find(".//{http://www.loc.gov/premis/v3}agentIdentifierValue").text == "1" ) assert ( ret[8].find(".//{http://www.loc.gov/premis/v3}agentName").text == 'username="******", first_name="Keladry", last_name="Mindelan"' ) assert ( ret[8].find(".//{http://www.loc.gov/premis/v3}agentType").text == "Archivematica user" )
def test_dmdsec_from_csv_parsed_metadata_both(self): """It should create a dmdSec for DC and Other parsed metadata.""" data = collections.OrderedDict( [ ("dc.title", ["Yamani Weapons"]), ("dc.contributor", ["雪 ユキ".encode("utf8")]), ("dcterms.isPartOf", ["AIC#42"]), ("Title", ["Yamani Weapons"]), ("Contributor", ["雪 ユキ".encode("utf8")]), ( "Long Description", ["This is about how glaives are used in the Yamani Islands"], ), ] ) # Test state = create_mets_v2.MetsState() ret = create_mets_v2.createDmdSecsFromCSVParsedMetadata( Job("stub", "stub", []), data, state ) # Verify assert ret assert len(ret) == 2 # Return can be DC or OTHER first, but in this case DC should be first dc_dmdsec = ret[0] assert dc_dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec" assert "ID" in dc_dmdsec.attrib mdwrap = dc_dmdsec[0] assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap" assert "MDTYPE" in mdwrap.attrib assert mdwrap.attrib["MDTYPE"] == "DC" xmldata = mdwrap[0] assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData" dc_elem = xmldata[0] # Elements are children of dublincore tag assert dc_elem.tag == "{http://purl.org/dc/terms/}dublincore" assert len(dc_elem) == 3 assert dc_elem[0].tag == "{http://purl.org/dc/elements/1.1/}title" assert dc_elem[0].text == "Yamani Weapons" assert dc_elem[1].tag == "{http://purl.org/dc/elements/1.1/}contributor" assert dc_elem[1].text == "雪 ユキ" assert dc_elem[2].tag == "{http://purl.org/dc/terms/}isPartOf" assert dc_elem[2].text == "AIC#42" other_dmdsec = ret[1] assert other_dmdsec.tag == "{http://www.loc.gov/METS/}dmdSec" assert "ID" in other_dmdsec.attrib mdwrap = other_dmdsec[0] assert mdwrap.tag == "{http://www.loc.gov/METS/}mdWrap" assert "MDTYPE" in mdwrap.attrib assert mdwrap.attrib["MDTYPE"] == "OTHER" assert "OTHERMDTYPE" in mdwrap.attrib assert mdwrap.attrib["OTHERMDTYPE"] == "CUSTOM" xmldata = mdwrap[0] assert xmldata.tag == "{http://www.loc.gov/METS/}xmlData" # Elements are direct children of xmlData assert len(xmldata) == 3 assert xmldata[0].tag == "title" assert xmldata[0].text == "Yamani Weapons" assert xmldata[1].tag == "contributor" assert xmldata[1].text == "雪 ユキ" assert xmldata[2].tag == "long_description" assert ( xmldata[2].text == "This is about how glaives are used in the Yamani Islands" )