def addFile(self, file_name, mets_filegroup): #reload(sys) #sys.setdefaultencoding('utf8') file_url = "file://./%s" % os.path.relpath(file_name, self.root_path) file_mimetype, _ = self.mime.guess_type(file_url) file_checksum = self.sha256(file_name) file_size = os.path.getsize(file_name) file_cdate = get_file_ctime_iso_date_str(file_name, DT_ISO_FMT_SEC_PREC) file_id = "ID" + uuid.uuid4().__str__() mets_file = M.file({ "MIMETYPE": file_mimetype, "CHECKSUMTYPE": "SHA-256", "CREATED": file_cdate, "CHECKSUM": file_checksum, "USE": "Datafile", "ID": file_id, "SIZE": file_size }) mets_filegroup.append(mets_file) #_,fname = os.path.split(file_name) mets_FLocat = M.FLocat({ q(XLINK_NS, 'href'): file_url, "LOCTYPE": "URL", q(XLINK_NS, 'type'): 'simple' }) mets_file.append(mets_FLocat) return file_id
def setParentRelation(self, identifier): parentmets = os.path.join(self.root_path, 'METS.xml') packagetype = self.mets_data['type'] if os.path.exists(parentmets): parser = etree.XMLParser(resolve_entities=False, remove_blank_text=True, strip_cdata=False) parent_parse = etree.parse(parentmets, parser) parent_root = parent_parse.getroot() parent = M.div({'LABEL': "parent %s" % packagetype}) pointer = M.mptr({ "LOCTYPE": "OTHER", "OTHERLOCTYPE": "UUID", q(XLINK_NS, "title"): ("Referencing a parent %s." % packagetype), q(XLINK_NS, "href"): "urn:uuid:" + identifier, "ID": "ID" + uuid.uuid4().__str__() }) parent.append(pointer) parent_map = parent_root.find( "%s[@LABEL='parent %s']" % (q(METS_NS, 'structMap'), packagetype)) if parent_map is not None: parent_div = parent_map.find( "%s[@LABEL='parent %s identifiers']" % (q(METS_NS, 'div'), packagetype)) parent_div.append(parent) else: parent_map = M.structMap({ 'LABEL': 'parent %s' % packagetype, 'TYPE': 'logical' }) parent_div = M.div( {'LABEL': 'parent %s identifiers' % packagetype}) parent_map.append(parent_div) parent_div.append(parent) parent_root.insert(len(parent_root), parent_map) str = etree.tostring(parent_root, encoding='UTF-8', pretty_print=True, xml_declaration=True) with open(parentmets, 'w') as output_file: output_file.write(str) else: print 'Couldn\'t find the parent %ss Mets file.' % packagetype
def addObject(self, abs_path): ''' Must be called with the absolute path to a file. @param abs_path: absolute file path @return: Premis object ''' hash = self.sha256(abs_path) file_url = "file://./%s" % os.path.relpath(abs_path, self.root_path) fmt = self.fid.identify_file(abs_path) size = os.path.getsize(abs_path) premis_id = 'ID' + uuid.uuid4().__str__() # create a Premis object object = P.object( { q(XSI_NS, 'type'): 'file', "xmlID": premis_id }, P.objectIdentifier(P.objectIdentifierType('filepath'), P.objectIdentifierValue(file_url)), P.objectCharacteristics( P.compositionLevel(0), P.fixity(P.messageDigestAlgorithm("SHA-256"), P.messageDigest(hash), P.messageDigestOriginator("hashlib")), P.size(size), P.format( P.formatRegistry(P.formatRegistryName("PRONOM"), P.formatRegistryKey(fmt), P.formatRegistryRole("identification"))), ), ) return object
def make_mdref(self, path, file, id, mdtype): mimetype, _ = self.mime.guess_type(os.path.join(path, file)) rel_path = "file://./%s" % os.path.relpath(os.path.join(path, file), self.root_path) mets_mdref = { "LOCTYPE": "URL", "MIMETYPE": mimetype, "CREATED": current_timestamp(), q(XLINK_NS, "type"): "simple", q(XLINK_NS, "href"): rel_path, "CHECKSUMTYPE": "SHA-256", "CHECKSUM": get_sha256_hash(os.path.join(path, file)), "ID": id, "MDTYPE": mdtype } return mets_mdref
def validate_file(self, file): ''' Validates every file found inside a Mets, so far: size, checksum, fixity. If a file exists, the counter for self.total_files is diminished. @param file: XML Element of a file that will be validated. @return: ''' err = [] log = [] # get information about the file attr_path = file.getchildren()[0].attrib[q(XLINK_NS, 'href')] attr_size = file.attrib['SIZE'] attr_checksum = file.attrib['CHECKSUM'] attr_checksumtype = file.attrib['CHECKSUMTYPE'] # mimetpye = file.attrib['MIMETYPE'] # check if file exists, if yes validate it fitem = remove_protocol(attr_path) file_path = os.path.join(self.rootpath, fitem).replace('\\', '/') if not os.path.exists(file_path): err.append( "Unable to find file referenced in delivery METS file: %s" % file_path) else: self.total_files -= 1 # check if file size is valid # TODO: is this even needed? file_size = os.path.getsize(file_path) if not int(file_size) == int(attr_size): err.append( "Actual file size %s does not equal file size attribute value %s" % (file_size, attr_size)) # workaround for earkweb.log in AIP metadata/ folder on IP root level if file_path[-22:] == './metadata/earkweb.log': err.pop() log.append( 'Forced validation result \'True\' for file: %s' % (file_path)) # validate checksum checksum_validation = ChecksumValidation() checksum_result = checksum_validation.validate_checksum( file_path, attr_checksum, attr_checksumtype) # workaround for earkweb.log in AIP metadata/ folder on IP root level if file_path[-22:] == './metadata/earkweb.log': checksum_result = True if not checksum_result == True: err.append('Checksum validation failed for: %s' % file_path) for error in err: print 'File validation error: ' + error self.validation_errors.append(error)
def __init__(self, f=None): if f is None: self.root = P.premis( { q(XSI_NS, 'schemaLocation'): PREMIS_NS + ' ../schemas/premis-v2-2.xsd' }, ) self.root.set('version', '2.0') else: self.root = objectify.parse(f).getroot()
def add_object(self, identifier_value): sequence_insert( self.root, P.object({q(XSI_NS, 'type'): 'file'}, P.objectIdentifier( P.objectIdentifierType('LOCAL'), P.objectIdentifierValue(identifier_value)), P.objectCharacteristics( P.compositionLevel(0), P.format( P.formatRegistry(P.formatRegistryName(), P.formatRegistryKey)))), self.premis_successor_sections)
def validate_mets(self, mets): ''' Validates a Mets file. The Mets file is parsed with etree.iterparse(), which allows event-driven parsing of large files. On certain events/conditions actions are taken, like file validation or adding Mets files found inside representations to a list so that they will be evaluated later on. @param mets: Path leading to a Mets file that will be evaluated. @return: Boolean validation result. ''' if mets.startswith('file://./'): mets = os.path.join(self.rootpath, mets[9:]) # change self.rootpath so it fits any relative path found in the current (subsequent) mets self.rootpath = mets.rsplit('/', 1)[0] else: self.rootpath = mets.rsplit('/', 1)[0] try: parsed_mets = etree.iterparse(open(mets), events=('start', 'end'), schema=self.schema_mets) for event, element in parsed_mets: # Define what to do with specific tags. if event == 'end' and element.tag == q(METS_NS, 'file'): # files self.total_files += 1 self.validate_file(element) element.clear() while element.getprevious() is not None: del element.getparent()[0] elif event == 'end' and element.tag == q( METS_NS, 'div') and element.attrib['LABEL'].startswith( 'representations/'): if fnmatch.fnmatch( element.attrib['LABEL'].rsplit('/', 1)[1], '*_mig-*'): # representation mets files rep = element.attrib['LABEL'].rsplit('/', 1)[1] for child in element.getchildren(): if child.tag == q(METS_NS, 'mptr'): metspath = child.attrib[q(XLINK_NS, 'href')] sub_mets = rep, metspath self.subsequent_mets.append(sub_mets) element.clear() while element.getprevious() is not None: del element.getparent()[0] elif event == 'end' and element.tag == q(METS_NS, 'dmdSec'): # dmdSec pass elif event == 'end' and element.tag == q(METS_NS, 'amdSec'): # pass if len(element.getchildren()) > 0: for element in element.getchildren(): # elements are: didiprovMD if len(element.getchildren()) > 0: for element in element.getchildren(): # elements are: mdRef if element.tag == etree.Comment or element.tag == etree.PI: # filter out comments (they also count as children) pass elif element.attrib['MDTYPE'] == 'PREMIS': if element.attrib[q( XLINK_NS, 'href')].startswith( 'file://./'): rel_path = element.attrib[q( XLINK_NS, 'href')] premis = os.path.join( self.rootpath, rel_path[9:]) try: parsed_premis = etree.iterparse( open(premis), events=('start', ), schema=self.schema_premis) for event, element in parsed_premis: pass print 'Successfully validated Premis file: %s' % premis except etree.XMLSyntaxError, e: print 'VALIDATION ERROR: The Premis file %s yielded errors:' % premis print e.error_log self.validation_errors.append( e.error_log) else: pass else: pass except etree.XMLSyntaxError, e: self.validation_errors.append(e.error_log)
def createMets(self, mets_data): self.mets_data = mets_data packageid = mets_data['packageid'] packagetype = mets_data['type'] schemafolder = mets_data['schemas'] parent = mets_data['parent'] print 'creating Mets' ########################### # create METS skeleton ########################### # create Mets root METS_ATTRIBUTES = { "OBJID": "urn:uuid:" + packageid, "LABEL": "METS file describing the %s matching the OBJID." % packagetype, "PROFILE": "http://www.ra.ee/METS/v01/IP.xml", "TYPE": packagetype } root = M.mets(METS_ATTRIBUTES) if os.path.isfile(os.path.join(schemafolder, 'mets_1_11.xsd')): mets_schema_location = os.path.relpath( os.path.join(schemafolder, 'mets_1_11.xsd'), self.root_path) else: mets_schema_location = 'empty' if os.path.isfile(os.path.join(schemafolder, 'xlink.xsd')): xlink_schema_loaction = os.path.relpath( os.path.join(schemafolder, 'xlink.xsd'), self.root_path) else: xlink_schema_loaction = 'empty' root.attrib[ '{%s}schemaLocation' % XSI_NS] = "http://www.loc.gov/METS/ %s http://www.w3.org/1999/xlink %s" % ( mets_schema_location, xlink_schema_loaction) # create Mets header mets_hdr = M.metsHdr({ "CREATEDATE": current_timestamp(), "RECORDSTATUS": "NEW" }) root.append(mets_hdr) # add an agent mets_hdr.append( self.createAgent("CREATOR", "OTHER", "SOFTWARE", "E-ARK earkweb", "VERSION=0.0.1")) # add document ID mets_hdr.append(M.metsDocumentID("METS.xml")) # create amdSec mets_amdSec = M.amdSec({"ID": "ID" + uuid.uuid4().__str__()}) root.append(mets_amdSec) # create fileSec mets_fileSec = M.fileSec() root.append(mets_fileSec) # general filegroup mets_filegroup = M.fileGrp({ "ID": "ID" + uuid.uuid4().__str__(), "USE": "general filegroup" }) mets_fileSec.append(mets_filegroup) # structMap 'E-ARK structural map' - default, physical structure mets_earkstructmap = M.structMap({ "LABEL": "E-ARK structural map", "TYPE": "physical" }) root.append(mets_earkstructmap) package_div = M.div({"LABEL": packageid}) # append physical structMap mets_earkstructmap.append(package_div) # structMap and div for the whole package (metadata, schema and /data) mets_structmap = M.structMap({ "LABEL": "Simple %s structuring" % packagetype, "TYPE": "logical" }) root.append(mets_structmap) mets_structmap_div = M.div({"LABEL": "Package structure"}) mets_structmap.append(mets_structmap_div) # metadata structmap - IP root level! mets_structmap_metadata_div = M.div({"LABEL": "metadata files"}) mets_structmap_div.append(mets_structmap_metadata_div) # structmap for schema files mets_structmap_schema_div = M.div({"LABEL": "schema files"}) mets_structmap_div.append(mets_structmap_schema_div) # content structmap - all representations! (is only filled if no separate METS exists for the rep) mets_structmap_content_div = M.div({"LABEL": "content files"}) mets_structmap_div.append(mets_structmap_content_div) # create structmap and div for Mets files from representations # mets_structmap_reps = M.structMap({"TYPE": "logical", "LABEL": "representations"}) # root.append(mets_structmap_reps) # mets_div_reps = M.div({"LABEL": "representations", "TYPE": "type"}) # mets_structmap_reps.append(mets_div_reps) # create structmap for parent/child relation, if applicable if parent != '': print 'creating link to parent %s' % packagetype mets_structmap_relation = M.structMap({ 'TYPE': 'logical', 'LABEL': 'parent' }) root.append(mets_structmap_relation) mets_div_rel = M.div( {'LABEL': '%s parent identifier' % packagetype}) mets_structmap_relation.append(mets_div_rel) parent_pointer = M.mptr({ "LOCTYPE": "OTHER", "OTHERLOCTYPE": "UUID", q(XLINK_NS, "title"): ("Referencing the parent %s of this (urn:uuid:%s) %s." % (packagetype, packageid, packagetype)), q(XLINK_NS, "href"): "urn:uuid:" + parent, "ID": "ID" + uuid.uuid4().__str__() }) mets_div_rel.append(parent_pointer) ########################### # add to Mets skeleton ########################### # add the package content to the Mets skeleton for directory, subdirectories, filenames in os.walk(self.root_path): # build the earkstructmap path = os.path.relpath(directory, self.root_path) physical_div = '' if path != '.': physical_div = M.div({"LABEL": path}) package_div.append(physical_div) # if directory.endswith('metadata/earkweb'): # # Ignore temp files only needed for IP processing with earkweb # del filenames[:] # del subdirectories[:] if directory.endswith('submission/metadata') or directory.endswith( 'submission/schemas'): del filenames[:] del subdirectories[:] if directory == os.path.join(self.root_path, 'metadata'): # Metadata on IP root level - if there are folders for representation-specific metadata, # check if the corresponding representation has a Mets file. If yes, skip; if no, add to IP root Mets. for filename in filenames: if filename == 'earkweb.log': mets_digiprovmd = M.digiprovMD( {"ID": "ID" + uuid.uuid4().__str__()}) mets_amdSec.append(mets_digiprovmd) id = "ID" + uuid.uuid4().__str__() ref = self.make_mdref(directory, filename, id, 'OTHER') mets_mdref = M.mdRef(ref) mets_digiprovmd.append(mets_mdref) mets_structmap_metadata_div.append( M.fptr({"FILEID": id})) physical_div.append(M.fptr({"FILEID": id})) del subdirectories[:] # prevent loop to iterate subfolders outside of this if statement dirlist = os.listdir(os.path.join(self.root_path, 'metadata')) for dirname in dirlist: if fnmatch.fnmatch(dirname, '*_mig-*'): # TODO: maybe list it all the time? # this folder contains metadata for a representation/migration, currently: # only listed if no representation Mets file exists if os.path.isfile( os.path.join(self.root_path, 'representations/%s/METS.xml') % dirname): pass else: for dir, subdir, files in os.walk( os.path.join(self.root_path, 'metadata/%s') % dirname): for filename in files: if dir.endswith('descriptive'): mets_dmd = M.dmdSec({ "ID": "ID" + uuid.uuid4().__str__() }) root.insert(1, mets_dmd) id = "ID" + uuid.uuid4().__str__() ref = self.make_mdref( dir, filename, id, 'OTHER') mets_mdref = M.mdRef(ref) mets_dmd.append(mets_mdref) mets_structmap_metadata_div.append( M.fptr({"FILEID": id})) physical_div.append( M.fptr({"FILEID": id})) elif dir.endswith('preservation'): mets_digiprovmd = M.digiprovMD({ "ID": "ID" + uuid.uuid4().__str__() }) mets_amdSec.append(mets_digiprovmd) id = "ID" + uuid.uuid4().__str__() mdtype = '' if filename.startswith( 'premis') or filename.endswith( 'premis.xml'): mdtype = 'PREMIS' else: mdtype = 'OTHER' ref = self.make_mdref( dir, filename, id, mdtype) mets_mdref = M.mdRef(ref) mets_digiprovmd.append(mets_mdref) mets_structmap_metadata_div.append( M.fptr({"FILEID": id})) physical_div.append( M.fptr({"FILEID": id})) elif filename: print 'Unclassified metadata file %s in %s.' % ( filename, dir) else: # metadata that should be listed in the Mets for dir, subdir, files in os.walk( os.path.join(self.root_path, 'metadata/%s') % dirname): if len(files) > 0: for filename in files: #if dir.endswith('descriptive'): if dirname == 'descriptive': mets_dmd = M.dmdSec({ "ID": "ID" + uuid.uuid4().__str__() }) root.insert(1, mets_dmd) id = "ID" + uuid.uuid4().__str__() # TODO: change MDTYPE ref = self.make_mdref( dir, filename, id, 'OTHER') mets_mdref = M.mdRef(ref) mets_dmd.append(mets_mdref) mets_structmap_metadata_div.append( M.fptr({"FILEID": id})) physical_div.append( M.fptr({"FILEID": id})) #elif dir.endswith('preservation'): elif dirname == 'preservation' or dirname == 'earkweb': mets_digiprovmd = M.digiprovMD({ "ID": "ID" + uuid.uuid4().__str__() }) mets_amdSec.append(mets_digiprovmd) id = "ID" + uuid.uuid4().__str__() mdtype = '' if filename.startswith( 'premis') or filename.endswith( 'premis.xml'): mdtype = 'PREMIS' elif filename: mdtype = 'OTHER' ref = self.make_mdref( dir, filename, id, mdtype) mets_mdref = M.mdRef(ref) mets_digiprovmd.append(mets_mdref) mets_structmap_metadata_div.append( M.fptr({"FILEID": id})) physical_div.append( M.fptr({"FILEID": id})) elif filename: print 'Unclassified metadata file %s in %s.' % ( filename, dir) else: # Any other folder outside of /<root>/metadata for filename in filenames: if directory == self.root_path: # ignore files on IP root level del filename else: # TODO: list rep metadata only in the rep Mets? rel_path_file = "file://./%s" % os.path.relpath( os.path.join(directory, filename), self.root_path) if filename.lower() == 'mets.xml': # delete the subdirectories list to stop os.walk from traversing further; # mets file should be added as <mets:mptr> to <structMap> for corresponding rep del subdirectories[:] rep_name = directory.rsplit('/', 1)[1] # create structMap div and append to representations structMap # mets_structmap_rep_div = M.div({"LABEL": rep_name, "TYPE": "representation mets", "ID": "ID" + uuid.uuid4().__str__()}) # mets_div_reps.append(mets_structmap_rep_div) # add mets file as <mets:mptr> metspointer = M.mptr({ "LOCTYPE": "URL", q(XLINK_NS, "title"): ("Mets file describing representation: %s of %s: urn:uuid:%s." % (rep_name, packagetype, packageid)), q(XLINK_NS, "href"): rel_path_file, "ID": "ID" + uuid.uuid4().__str__() }) #mets_structmap_rep_div.append(metspointer) #mets_structmap_rep_div.append(M.fptr({"FILEID": id})) physical_div.append( metspointer ) # IMPORTANT: The <mptr> element needs to be the first entry in a <div>, or the Mets will be invalid! # also create a <fptr> for the Mets file id = self.addFile( os.path.join(directory, filename), mets_filegroup) physical_div.append(M.fptr({"FILEID": id})) elif filename and directory.endswith('schemas'): # schema files id = self.addFile( os.path.join(directory, filename), mets_filegroup) mets_structmap_schema_div.append( M.fptr({'FILEID': id})) physical_div.append(M.fptr({'FILEID': id})) elif filename: id = self.addFile( os.path.join(directory, filename), mets_filegroup) mets_structmap_content_div.append( M.fptr({'FILEID': id})) physical_div.append(M.fptr({'FILEID': id})) str = etree.tostring(root, encoding='UTF-8', pretty_print=True, xml_declaration=True) path_mets = os.path.join(self.root_path, 'METS.xml') with open(path_mets, 'w') as output_file: output_file.write(str)
def createPremis(self): PREMIS_ATTRIBUTES = {"version": "2.0"} premis = P.premis(PREMIS_ATTRIBUTES) premis.attrib[ '{%s}schemaLocation' % XSI_NS] = "info:lc/xmlns/premis-v2 ../../schemas/premis-v2-2.xsd" # if there are no /data files, this will ensure that there is at least one object (the IP itself) premis_id = 'ID' + uuid.uuid4().__str__() object = P.object( { q(XSI_NS, 'type'): 'representation', "xmlID": premis_id }, P.objectIdentifier( P.objectIdentifierType('repository'), P.objectIdentifierValue('package-id-goes-here-?')), ) premis.append(object) # create premis objects for files in this representation (self.root_path/data) for directory, subdirectories, filenames in os.walk( os.path.join(self.root_path, 'data')): for filename in filenames: object = self.addObject(os.path.join(directory, filename)) premis.append(object) # # event # identifier_value = 'AIP Creation' # linking_agent = 'earkweb' # linking_object=None # premis.append(P.event( # P.eventIdentifier( # P.eventIdentifierType('local'), # P.eventIdentifierValue(identifier_value) # ), # P.eventType, # P.eventDateTime(current_timestamp()), # P.linkingAgentIdentifier( # P.linkingAgentIdentifierType('local'), # P.linkingAgentIdentifierValue(linking_agent) # ), # # P.linkingAgentIdentifier( # P.linkingAgentIdentifierType('local'), # P.linkingAgentIdentifierValue(linking_object) # ) # if linking_object is not None else None # )) # add agent identifier_value = 'earkweb' premis.append( P.agent( P.agentIdentifier(P.agentIdentifierType('LOCAL'), P.agentIdentifierValue(identifier_value)), P.agentName('E-ARK AIP to DIP Converter'), P.agentType('Software'))) str = etree.tostring(premis, encoding='UTF-8', pretty_print=True, xml_declaration=True) preservation_dir = os.path.join(self.root_path, './metadata/preservation') if not os.path.exists(preservation_dir): os.mkdir(preservation_dir) path_premis = os.path.join(self.root_path, './metadata/preservation/premis.xml') with open(path_premis, 'w') as output_file: output_file.write(str) return
def createMigrationPremis(self, premis_info): PREMIS_ATTRIBUTES = {"version": "2.0"} premis = P.premis(PREMIS_ATTRIBUTES) premis.attrib[ '{%s}schemaLocation' % XSI_NS] = "info:lc/xmlns/premis-v2 ../../schemas/premis-v2-2.xsd" # creates an object that references the package or representation # TODO: identifier! premis_id = 'ID' + uuid.uuid4().__str__() object = P.object( { q(XSI_NS, 'type'): 'representation', "xmlID": premis_id }, P.objectIdentifier( P.objectIdentifierType('repository'), P.objectIdentifierValue('package-id-goes-here-?')), ) premis.append(object) # parse the migration.xml, add events and objects migrations = etree.iterparse(open(premis_info['info']), events=('start', )) eventlist = [] for event, element in migrations: if element.tag == 'migration': event_id = 'ID' + uuid.uuid4().__str__() if self.root_path.endswith(element.attrib['targetrep']): source_object_abs = os.path.join( element.attrib['sourcedir'], element.attrib['file']) source_object_rel = "file://./%s" % os.path.relpath( source_object_abs, self.root_path) target_object_abs = os.path.join( element.attrib['targetdir'], element.attrib['output']) target_object_rel = "file://./%s" % os.path.relpath( target_object_abs, self.root_path) # event event = P.event( P.eventIdentifier(P.eventIdentifierType('local'), P.eventIdentifierValue(event_id)), P.eventType('migration'), P.eventDateTime( element.attrib['starttime'] ), # TODO: use event start or event end time? P.eventOutcomeInformation(P.eventOutcome('success')), P.linkingAgentIdentifier( P.linkingAgentIdentifierType('software'), P.linkingAgentIdentifierValue( 'should probably come from migrations.xml')), P.linkingObjectIdentifier( P.linkingObjectIdentifierType('filepath'), P.linkingObjectIdentifierValue(target_object_rel))) eventlist.append(event) # object object = self.addObject(target_object_abs) # add the relationship to the migration event and the source file relationship = P.relationship( P.relationshipType('derivation'), P.relationshipSubType('has source'), P.relatedObjectIdentification( P.relatedObjectIdentifierType('filepath'), P.relatedObjectIdentifierValue(source_object_rel), P.relatedObjectSequence('0')), P.relatedEventIdentification( P.relatedEventIdentifierType('local'), P.relatedEventIdentifierValue(event_id), P.relatedEventSequence('1')), ) object.append(relationship) premis.append(object) else: pass else: pass # append all events to premis root - they must be below the objects (due to validation) for event in eventlist: premis.append(event) # add agent identifier_value = 'earkweb' premis.append( P.agent( P.agentIdentifier(P.agentIdentifierType('LOCAL'), P.agentIdentifierValue(identifier_value)), P.agentName('E-ARK AIP to DIP Converter'), P.agentType('Software'))) # create the Premis file str = etree.tostring(premis, encoding='UTF-8', pretty_print=True, xml_declaration=True) preservation_dir = os.path.join(self.root_path, 'metadata/preservation') if not os.path.exists(preservation_dir): os.makedirs(preservation_dir) path_premis = os.path.join(self.root_path, 'metadata/preservation/premis.xml') with open(path_premis, 'w') as output_file: output_file.write(str) return