def get_events_from_db(uuid): events = [] for event_mdl in Event.objects.filter(file_uuid_id=uuid): event = [ 'event', premisrw.PREMIS_META, ( 'event_identifier', ('event_identifier_type', 'UUID'), ('event_identifier_value', event_mdl.event_id), ), ('event_type', event_mdl.event_type), ('event_date_time', event_mdl.event_datetime.isoformat()), # String detailing the program and algorithm used and the program's # version (and any notable parameters passed). ('event_detail', escape(event_mdl.event_detail)), ( 'event_outcome_information', ('event_outcome', event_mdl.event_outcome), ('event_outcome_detail', ('event_outcome_detail_note', escape(event_mdl.event_outcome_detail))), ), ] for agent_mdl in event_mdl.agents.all(): event.append( ('linking_agent_identifier', ('linking_agent_identifier_type', agent_mdl.identifiertype), ('linking_agent_identifier_value', agent_mdl.identifiervalue))) events.append(tuple(event)) return events
def tasks(request, uuid): job = models.Job.objects.get(jobuuid=uuid) objects = job.task_set.all().order_by('-exitcode', '-endtime', '-starttime', '-createdtime') if (len(objects) == 0): return tasks_subjobs(request, uuid) # Filenames can be any encoding - we want to be able to display # unicode, while just displaying unicode replacement characters # for any other encoding present. for item in objects: item.filename = escape(item.filename) item.arguments = escape(item.arguments) item.stdout = escape(item.stdout) item.stderror = escape(item.stderror) page = helpers.pager(objects, django_settings.TASKS_PER_PAGE, request.GET.get('page', None)) objects = page.object_list # figure out duration in seconds for object in objects: object.duration = helpers.task_duration_in_seconds(object) return render(request, 'main/tasks.html', locals())
def createFileSec(path, file_group_identifier, base_path, base_path_name, parentBranch, structMapParent, sip_uuid): print("createFileSec: ", path, parentBranch, structMapParent, file=sys.stderr) doneFirstRun = True pathSTR = path.__str__() pathSTR = path.__str__() if pathSTR == base_path + "objects/": #IF it's it's the SIP folder, it's OBJECTS pathSTR = "objects" #pathSTR = string.replace(path.__str__(), "/tmp/" + sys.argv[2] + "/" + sys.argv[3], "objects", 1) #if pathSTR + "/" == basePath: #if it's the very first run through (recursive function) if path == base_path: #if it's the very first run through (recursive function) pathSTR = os.path.basename(os.path.dirname(base_path)) #structMapParent.set("DMDID", "SIP-description") #currentBranch = newChild(parentBranch, "fileGrp") #currentBranch.set("USE", "directory") # structMap directory div = newChild(structMapParent, ns.metsBNS + "div") createFileSec(os.path.join(path, "objects/"), file_group_identifier, base_path, base_path_name, parentBranch, div, sip_uuid) doneFirstRun = False filename = os.path.basename(pathSTR) structMapParent.set("TYPE", "directory") structMapParent.set("LABEL", escape(filename)) if doneFirstRun: for item in each_child(path, file_group_identifier, base_path, base_path_name, sip_uuid): if isinstance(item, File): pathSTR = item.currentlocation.replace('%transferDirectory%', "", 1) ID = "file-" + item.uuid.__str__() # structMap file fptr = newChild(structMapParent, ns.metsBNS + "fptr") FILEID = "file-" + item.uuid.__str__() fptr.set("FILEID", escape(FILEID)) # If the file already exists in the fileSec, don't create # a second entry. fileI = parentBranch.find('./mets:file[@ID="{}"]'.format(ID), namespaces=ns.NSMAP) if fileI is None: fileI = etree.SubElement(parentBranch, ns.metsBNS + "file") filename = ''.join(quoteattr(pathSTR).split("\"")[1:-1]) fileI.set("ID", escape(ID)) Flocat = newChild(fileI, ns.metsBNS + "FLocat") Flocat.set(ns.xlinkBNS + "href", escape(pathSTR)) Flocat.set("LOCTYPE", "OTHER") Flocat.set("OTHERLOCTYPE", "SYSTEM") # used when adding amdSecs at a later time admid = "digiprov-" + item.uuid fileI.set("ADMID", admid) else: div = newChild(structMapParent, ns.metsBNS + "div") createFileSec(os.path.join(path, item), file_group_identifier, base_path, base_path_name, parentBranch, div, sip_uuid)
def createDigiprovMD(fileUUID): ret = [] #EVENTS sql = "SELECT pk, fileUUID, eventIdentifierUUID, eventType, eventDateTime, eventDetail, eventOutcome, eventOutcomeDetailNote, linkingAgentIdentifier FROM Events WHERE fileUUID = '" + fileUUID + "';" rows = databaseInterface.queryAllSQL(sql) for row in rows: digiprovMD = etree.Element("digiprovMD") ret.append(digiprovMD) #newChild(amdSec, "digiprovMD") #digiprovMD.set("ID", "digiprov-"+ os.path.basename(filename) + "-" + fileUUID) global globalDigiprovMDCounter globalDigiprovMDCounter += 1 digiprovMD.set("ID", "digiprovMD_"+ globalDigiprovMDCounter.__str__()) mdWrap = newChild(digiprovMD,"mdWrap") mdWrap.set("MDTYPE", "PREMIS:EVENT") xmlData = newChild(mdWrap,"xmlData") event = etree.SubElement(xmlData, "event", nsmap={None: premisNS}) event.set(xsiBNS+"schemaLocation", premisNS + " http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd") event.set("version", "2.2") eventIdentifier = etree.SubElement(event, "eventIdentifier") etree.SubElement(eventIdentifier, "eventIdentifierType").text = "UUID" etree.SubElement(eventIdentifier, "eventIdentifierValue").text = row[2] etree.SubElement(event, "eventType").text = row[3] etree.SubElement(event, "eventDateTime").text = row[4].__str__().replace(" ", "T") etree.SubElement(event, "eventDetail").text = escape(row[5]) eventOutcomeInformation = etree.SubElement(event, "eventOutcomeInformation") etree.SubElement(eventOutcomeInformation, "eventOutcome").text = row[6] eventOutcomeDetail = etree.SubElement(eventOutcomeInformation, "eventOutcomeDetail") etree.SubElement(eventOutcomeDetail, "eventOutcomeDetailNote").text = escape(row[7]) if row[8]: linkingAgentIdentifier = etree.SubElement(event, "linkingAgentIdentifier") etree.SubElement(linkingAgentIdentifier, "linkingAgentIdentifierType").text = "Archivematica user pk" etree.SubElement(linkingAgentIdentifier, "linkingAgentIdentifierValue").text = row[8].__str__() #linkingAgentIdentifier sql = """SELECT agentIdentifierType, agentIdentifierValue, agentName, agentType FROM Agents;""" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: linkingAgentIdentifier = etree.SubElement(event, "linkingAgentIdentifier") etree.SubElement(linkingAgentIdentifier, "linkingAgentIdentifierType").text = row[0] etree.SubElement(linkingAgentIdentifier, "linkingAgentIdentifierValue").text = row[1] row = c.fetchone() sqlLock.release() return ret
def createEvent(event_record): """ Returns a PREMIS Event. """ event = etree.Element(ns.premisBNS + "event", nsmap={'premis': ns.premisNS}) event.set(ns.xsiBNS + "schemaLocation", ns.premisNS + " http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd") event.set("version", "2.2") eventIdentifier = etree.SubElement(event, ns.premisBNS + "eventIdentifier") etree.SubElement(eventIdentifier, ns.premisBNS + "eventIdentifierType").text = "UUID" etree.SubElement(eventIdentifier, ns.premisBNS + "eventIdentifierValue").text = event_record.event_id etree.SubElement(event, ns.premisBNS + "eventType").text = event_record.event_type etree.SubElement(event, ns.premisBNS + "eventDateTime").text = event_record.event_datetime.isoformat() etree.SubElement(event, ns.premisBNS + "eventDetail").text = escape(event_record.event_detail) eventOutcomeInformation = etree.SubElement(event, ns.premisBNS + "eventOutcomeInformation") etree.SubElement(eventOutcomeInformation, ns.premisBNS + "eventOutcome").text = event_record.event_outcome eventOutcomeDetail = etree.SubElement(eventOutcomeInformation, ns.premisBNS + "eventOutcomeDetail") etree.SubElement(eventOutcomeDetail, ns.premisBNS + "eventOutcomeDetailNote").text = escape(event_record.event_outcome_detail) # linkingAgentIdentifier for agent in event_record.agents.all(): linkingAgentIdentifier = etree.SubElement(event, ns.premisBNS + "linkingAgentIdentifier") etree.SubElement(linkingAgentIdentifier, ns.premisBNS + "linkingAgentIdentifierType").text = agent.identifiertype etree.SubElement(linkingAgentIdentifier, ns.premisBNS + "linkingAgentIdentifierValue").text = agent.identifiervalue return event
def dir_obj_to_premis(dir_obj): """ Converts an Directory model object to a PREMIS object via metsrw. Returns: lxml.etree._Element """ original_name = escape(dir_obj.originallocation) object_identifiers = get_premis_object_identifiers( dir_obj.uuid, dir_obj.identifiers.all()) premis_data = (("object", IE_PREMIS_META) + object_identifiers + (("original_name", original_name), )) return metsrw.plugins.premisrw.data_to_premis( premis_data, premis_version=IE_PREMIS_META["version"])
def file_obj_to_premis(file_obj): """ Converts an File model object to a PREMIS event object via metsrw. Returns: lxml.etree._Element """ premis_digest_algorithm = convert_to_premis_hash_function( file_obj.checksumtype) format_data = get_premis_format_data(file_obj.fileid_set.all()) original_name = escape(file_obj.originallocation) object_identifiers = get_premis_object_identifiers( file_obj.uuid, file_obj.identifiers.all()) object_characteristics_extensions = get_premis_object_characteristics_extension( file_obj.characterization_documents) relationship_data = get_premis_relationship_data( file_obj.related_is_source_of, file_obj.related_has_source) object_characteristics = ( "object_characteristics", ("composition_level", "0"), ( "fixity", ("message_digest_algorithm", premis_digest_algorithm), ("message_digest", file_obj.checksum), ), ("size", str(file_obj.size)), format_data, ( "creating_application", ( "date_created_by_application", file_obj.modificationtime.strftime("%Y-%m-%d"), ), ), ) if object_characteristics_extensions: object_characteristics += object_characteristics_extensions premis_data = (("object", FILE_PREMIS_META) + object_identifiers + (object_characteristics, ("original_name", original_name)) + relationship_data) return metsrw.plugins.premisrw.data_to_premis( premis_data, premis_version=FILE_PREMIS_META["version"])
def ingest_normalization_report(request, uuid, current_page=None): jobs = models.Job.objects.filter(sipuuid=uuid, subjobof='') sipname = utils.get_directory_name_from_job(jobs) objects = getNormalizationReportQuery(sipUUID=uuid) for o in objects: o['location'] = archivematicaFunctions.escape(o['location']) results_per_page = 10 if current_page == None: current_page = 1 page = helpers.pager(objects, results_per_page, current_page) hit_count = len(objects) return render(request, 'ingest/normalization_report.html', locals())
def createDigiprovMD(fileUUID): ret = [] # EVENTS events = Event.objects.filter(file_uuid_id=fileUUID) for event_record in events: digiprovMD = etree.Element(ns.metsBNS + "digiprovMD") ret.append(digiprovMD) #newChild(amdSec, "digiprovMD") #digiprovMD.set("ID", "digiprov-"+ os.path.basename(filename) + "-" + fileUUID) global globalDigiprovMDCounter globalDigiprovMDCounter += 1 digiprovMD.set("ID", "digiprovMD_"+ globalDigiprovMDCounter.__str__()) mdWrap = etree.SubElement(digiprovMD, ns.metsBNS + "mdWrap") mdWrap.set("MDTYPE", "PREMIS:EVENT") xmlData = etree.SubElement(mdWrap, ns.metsBNS + "xmlData") event = etree.SubElement(xmlData, ns.premisBNS + "event", nsmap={'premis': ns.premisNS}) event.set(ns.xsiBNS+"schemaLocation", ns.premisNS + " http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd") event.set("version", "2.2") eventIdentifier = etree.SubElement(event, ns.premisBNS + "eventIdentifier") etree.SubElement(eventIdentifier, ns.premisBNS + "eventIdentifierType").text = "UUID" etree.SubElement(eventIdentifier, ns.premisBNS + "eventIdentifierValue").text = event_record.event_id etree.SubElement(event, ns.premisBNS + "eventType").text = event_record.event_type etree.SubElement(event, ns.premisBNS + "eventDateTime").text = event_record.event_datetime.isoformat() etree.SubElement(event, ns.premisBNS + "eventDetail").text = escape(event_record.event_detail) eventOutcomeInformation = etree.SubElement(event, ns.premisBNS + "eventOutcomeInformation") etree.SubElement(eventOutcomeInformation, ns.premisBNS + "eventOutcome").text = event_record.event_outcome eventOutcomeDetail = etree.SubElement(eventOutcomeInformation, ns.premisBNS + "eventOutcomeDetail") etree.SubElement(eventOutcomeDetail, ns.premisBNS + "eventOutcomeDetailNote").text = escape(event_record.event_outcome_detail) if event_record.linking_agent: linkingAgentIdentifier = etree.SubElement(event, ns.premisBNS + "linkingAgentIdentifier") etree.SubElement(linkingAgentIdentifier, ns.premisBNS + "linkingAgentIdentifierType").text = "Archivematica user pk" etree.SubElement(linkingAgentIdentifier, ns.premisBNS + "linkingAgentIdentifierValue").text = str(event_record.linking_agent) # linkingAgentIdentifier for agent in Agent.objects.all(): linkingAgentIdentifier = etree.SubElement(event, ns.premisBNS + "linkingAgentIdentifier") etree.SubElement(linkingAgentIdentifier, ns.premisBNS + "linkingAgentIdentifierType").text = agent.identifiertype etree.SubElement(linkingAgentIdentifier, ns.premisBNS + "linkingAgentIdentifierValue").text = agent.identifiervalue return ret
def create_premis_object(fileUUID): """ Create a PREMIS:OBJECT for fileUUID. Access the models for File, FileID, FPCommandOutput, Derivation :param str fileUUID: UUID of the File to create an object for :return: premis:object Element, suitable for inserting into mets:xmlData """ f = File.objects.get(uuid=fileUUID) # PREMIS:OBJECT object_elem = etree.Element(ns.premisBNS + "object", nsmap={'premis': ns.premisNS}) object_elem.set(ns.xsiBNS+"type", "premis:file") object_elem.set(ns.xsiBNS+"schemaLocation", ns.premisNS + " http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd") object_elem.set("version", "2.2") objectIdentifier = etree.SubElement(object_elem, ns.premisBNS + "objectIdentifier") etree.SubElement(objectIdentifier, ns.premisBNS + "objectIdentifierType").text = "UUID" etree.SubElement(objectIdentifier, ns.premisBNS + "objectIdentifierValue").text = fileUUID objectCharacteristics = etree.SubElement(object_elem, ns.premisBNS + "objectCharacteristics") etree.SubElement(objectCharacteristics, ns.premisBNS + "compositionLevel").text = "0" fixity = etree.SubElement(objectCharacteristics, ns.premisBNS + "fixity") etree.SubElement(fixity, ns.premisBNS + "messageDigestAlgorithm").text = f.checksumtype etree.SubElement(fixity, ns.premisBNS + "messageDigest").text = f.checksum etree.SubElement(objectCharacteristics, ns.premisBNS + "size").text = str(f.size) for elem in create_premis_object_formats(fileUUID): objectCharacteristics.append(elem) for elem in create_premis_object_characteristics_extensions(fileUUID): objectCharacteristics.append(elem) etree.SubElement(object_elem, ns.premisBNS + "originalName").text = escape(f.originallocation) for elem in create_premis_object_derivations(fileUUID): object_elem.append(elem) return object_elem
def ingest_normalization_report(request, uuid, current_page=None): jobs = models.Job.objects.filter(sipuuid=uuid, subjobof='') sipname = jobs.get_directory_name() objects = getNormalizationReportQuery(sipUUID=uuid) for o in objects: o['location'] = archivematicaFunctions.escape(o['location']) (o['preservation_derivative_validation_attempted'], o['preservation_derivative_validation_failed'], o['access_derivative_validation_attempted'], o['access_derivative_validation_failed'] ) = derivative_validation_report(o) results_per_page = 10 if current_page is None: current_page = 1 page = helpers.pager(objects, results_per_page, current_page) hit_count = len(objects) return render(request, 'ingest/normalization_report.html', locals())
def ingest_normalization_report(request, uuid, current_page=None): jobs = models.Job.objects.filter(sipuuid=uuid) sipname = jobs.get_directory_name() objects = getNormalizationReportQuery(sipUUID=uuid) for o in objects: o["location"] = escape(o["location"]) ( o["preservation_derivative_validation_attempted"], o["preservation_derivative_validation_failed"], o["access_derivative_validation_attempted"], o["access_derivative_validation_failed"], ) = derivative_validation_report(o) results_per_page = 10 if current_page is None: current_page = 1 page = helpers.pager(objects, results_per_page, current_page) hit_count = len(objects) return render(request, "ingest/normalization_report.html", locals())
def createTechMD(fileUUID): ret = etree.Element("techMD") techMD = ret #newChild(amdSec, "digiprovMD") #digiprovMD.set("ID", "digiprov-"+ os.path.basename(filename) + "-" + fileUUID) global globalTechMDCounter globalTechMDCounter += 1 techMD.set("ID", "techMD_" + globalTechMDCounter.__str__()) mdWrap = newChild(techMD, "mdWrap") mdWrap.set("MDTYPE", "PREMIS:OBJECT") xmlData = newChild(mdWrap, "xmlData") #premis = etree.SubElement( xmlData, "premis", nsmap={None: premisNS}, \ # attrib = { "{" + xsiNS + "}schemaLocation" : "info:lc/xmlns/premis-v2 http://www.loc.gov/standards/premis/premis.xsd" }) #premis.set("version", "2.0") #premis = etree.SubElement( xmlData, "premis", attrib = {xsiBNS+"type": "premis:file"}) sql = "SELECT fileSize, checksum FROM Files WHERE fileUUID = '%s';" % ( fileUUID) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: fileSize = row[0].__str__() checksum = row[1].__str__() row = c.fetchone() sqlLock.release() #OBJECT object = etree.SubElement(xmlData, "object", nsmap={None: premisNS}) object.set(xsiBNS + "type", "file") object.set( xsiBNS + "schemaLocation", premisNS + " http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd") object.set("version", "2.2") objectIdentifier = etree.SubElement(object, "objectIdentifier") etree.SubElement(objectIdentifier, "objectIdentifierType").text = "UUID" etree.SubElement(objectIdentifier, "objectIdentifierValue").text = fileUUID #etree.SubElement(object, "objectCategory").text = "file" objectCharacteristics = etree.SubElement(object, "objectCharacteristics") etree.SubElement(objectCharacteristics, "compositionLevel").text = "0" fixity = etree.SubElement(objectCharacteristics, "fixity") etree.SubElement(fixity, "messageDigestAlgorithm").text = "sha256" etree.SubElement(fixity, "messageDigest").text = checksum etree.SubElement(objectCharacteristics, "size").text = fileSize sql = "SELECT formatName, formatVersion, formatRegistryName, formatRegistryKey FROM FilesIDs WHERE fileUUID = '%s';" % ( fileUUID) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if not row: format = etree.SubElement(objectCharacteristics, "format") formatDesignation = etree.SubElement(format, "formatDesignation") etree.SubElement(formatDesignation, "formatName").text = "Unknown" while row != None: #print row format = etree.SubElement(objectCharacteristics, "format") #fileUUID = row[0] formatDesignation = etree.SubElement(format, "formatDesignation") etree.SubElement(formatDesignation, "formatName").text = row[0] etree.SubElement(formatDesignation, "formatVersion").text = row[1] formatRegistry = etree.SubElement(format, "formatRegistry") etree.SubElement(formatRegistry, "formatRegistryName").text = row[2] etree.SubElement(formatRegistry, "formatRegistryKey").text = row[3] row = c.fetchone() sqlLock.release() objectCharacteristicsExtension = etree.SubElement( objectCharacteristics, "objectCharacteristicsExtension") sql = "SELECT FilesFits.FITSxml FROM FilesFits WHERE fileUUID = '" + fileUUID + "';" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() #if not row: # print >>sys.stderr, "Error no fits.", fileUUID parser = etree.XMLParser(remove_blank_text=True) while row != None: #fits = etree.fromstring(row[0]) fits = etree.XML(row[0], parser) objectCharacteristicsExtension.append(fits) row = c.fetchone() sqlLock.release() sql = "SELECT Files.originalLocation FROM Files WHERE Files.fileUUID = '" + fileUUID + "';" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if not row: print >> sys.stderr, "Error no fits." while row != None: etree.SubElement(object, "originalName").text = escape(row[0]) row = c.fetchone() sqlLock.release() #Derivations sql = "SELECT sourceFileUUID, derivedFileUUID, relatedEventUUID FROM Derivations WHERE sourceFileUUID = '" + fileUUID + "';" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: relationship = etree.SubElement(object, "relationship") etree.SubElement(relationship, "relationshipType").text = "derivation" etree.SubElement(relationship, "relationshipSubType").text = "is source of" relatedObjectIdentification = etree.SubElement( relationship, "relatedObjectIdentification") etree.SubElement(relatedObjectIdentification, "relatedObjectIdentifierType").text = "UUID" etree.SubElement(relatedObjectIdentification, "relatedObjectIdentifierValue").text = row[1] relatedEventIdentification = etree.SubElement( relationship, "relatedEventIdentification") etree.SubElement(relatedEventIdentification, "relatedEventIdentifierType").text = "UUID" etree.SubElement(relatedEventIdentification, "relatedEventIdentifierValue").text = row[2] row = c.fetchone() sqlLock.release() sql = "SELECT sourceFileUUID, derivedFileUUID, relatedEventUUID FROM Derivations WHERE derivedFileUUID = '" + fileUUID + "';" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: relationship = etree.SubElement(object, "relationship") etree.SubElement(relationship, "relationshipType").text = "derivation" etree.SubElement(relationship, "relationshipSubType").text = "has source" relatedObjectIdentification = etree.SubElement( relationship, "relatedObjectIdentification") etree.SubElement(relatedObjectIdentification, "relatedObjectIdentifierType").text = "UUID" etree.SubElement(relatedObjectIdentification, "relatedObjectIdentifierValue").text = row[0] relatedEventIdentification = etree.SubElement( relationship, "relatedEventIdentification") etree.SubElement(relatedEventIdentification, "relatedEventIdentifierType").text = "UUID" etree.SubElement(relatedEventIdentification, "relatedEventIdentifierValue").text = row[2] row = c.fetchone() sqlLock.release() return ret
def createTechMD(fileUUID): ret = etree.Element(ns.metsBNS + "techMD") techMD = ret #newChild(amdSec, "digiprovMD") #digiprovMD.set("ID", "digiprov-"+ os.path.basename(filename) + "-" + fileUUID) global globalTechMDCounter globalTechMDCounter += 1 techMD.set("ID", "techMD_"+ globalTechMDCounter.__str__()) mdWrap = etree.SubElement(techMD, ns.metsBNS + "mdWrap") mdWrap.set("MDTYPE", "PREMIS:OBJECT") xmlData = etree.SubElement(mdWrap, ns.metsBNS + "xmlData") #premis = etree.SubElement( xmlData, "premis", nsmap={None: ns.premisNS}, \ # attrib = { "{" + ns.xsiNS + "}schemaLocation" : "info:lc/xmlns/premis-v2 http://www.loc.gov/standards/premis/premis.xsd" }) #premis.set("version", "2.0") #premis = etree.SubElement( xmlData, "premis", attrib = {ns.xsiBNS+"type": "premis:file"}) f = File.objects.get(uuid=fileUUID) fileSize = str(f.size) checksum = f.checksum #OBJECT object = etree.SubElement(xmlData, ns.premisBNS + "object", nsmap={'premis': ns.premisNS}) object.set(ns.xsiBNS+"type", "premis:file") object.set(ns.xsiBNS+"schemaLocation", ns.premisNS + " http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd") object.set("version", "2.2") objectIdentifier = etree.SubElement(object, ns.premisBNS + "objectIdentifier") etree.SubElement(objectIdentifier, ns.premisBNS + "objectIdentifierType").text = "UUID" etree.SubElement(objectIdentifier, ns.premisBNS + "objectIdentifierValue").text = fileUUID #etree.SubElement(object, "objectCategory").text = "file" objectCharacteristics = etree.SubElement(object, ns.premisBNS + "objectCharacteristics") etree.SubElement(objectCharacteristics, ns.premisBNS + "compositionLevel").text = "0" fixity = etree.SubElement(objectCharacteristics, ns.premisBNS + "fixity") etree.SubElement(fixity, ns.premisBNS + "messageDigestAlgorithm").text = "sha256" etree.SubElement(fixity, ns.premisBNS + "messageDigest").text = checksum etree.SubElement(objectCharacteristics, ns.premisBNS + "size").text = fileSize files = FileID.objects.filter(file_id=fileUUID) if not files.exists(): format = etree.SubElement(objectCharacteristics, ns.premisBNS + "format") formatDesignation = etree.SubElement(format, ns.premisBNS + "formatDesignation") etree.SubElement(formatDesignation, ns.premisBNS + "formatName").text = "Unknown" for row in files.values_list('format_name', 'format_version', 'format_registry_name', 'format_registry_key'): #print row format = etree.SubElement(objectCharacteristics, ns.premisBNS + "format") formatDesignation = etree.SubElement(format, ns.premisBNS + "formatDesignation") etree.SubElement(formatDesignation, ns.premisBNS + "formatName").text = row[0] etree.SubElement(formatDesignation, ns.premisBNS + "formatVersion").text = row[1] formatRegistry = etree.SubElement(format, ns.premisBNS + "formatRegistry") etree.SubElement(formatRegistry, ns.premisBNS + "formatRegistryName").text = row[2] etree.SubElement(formatRegistry, ns.premisBNS + "formatRegistryKey").text = row[3] objectCharacteristicsExtension = etree.SubElement(objectCharacteristics, ns.premisBNS + "objectCharacteristicsExtension") parser = etree.XMLParser(remove_blank_text=True) documents = FPCommandOutput.objects.filter(file_id=fileUUID, rule__purpose__in=['characterization', 'default_characterization']).values_list('content') for document, in documents: # This needs to be converted into an str because lxml doesn't accept # XML documents in unicode strings if the document contains an # encoding declaration. output = etree.XML(document.encode("utf-8"), parser) objectCharacteristicsExtension.append(output) try: f = File.objects.get(uuid=fileUUID) except File.DoesNotExist: print >>sys.stderr, "Error: no location found." else: etree.SubElement(object, ns.premisBNS + "originalName").text = escape(f.originallocation) # Derivations derivations = Derivation.objects.filter(source_file_id=fileUUID, event__isnull=False) for derivation in derivations: relationship = etree.SubElement(object, ns.premisBNS + "relationship") etree.SubElement(relationship, ns.premisBNS + "relationshipType").text = "derivation" etree.SubElement(relationship, ns.premisBNS + "relationshipSubType").text = "is source of" relatedObjectIdentification = etree.SubElement(relationship, ns.premisBNS + "relatedObjectIdentification") etree.SubElement(relatedObjectIdentification, ns.premisBNS + "relatedObjectIdentifierType").text = "UUID" etree.SubElement(relatedObjectIdentification, ns.premisBNS + "relatedObjectIdentifierValue").text = derivation.derived_file_id relatedEventIdentification = etree.SubElement(relationship, ns.premisBNS + "relatedEventIdentification") etree.SubElement(relatedEventIdentification, ns.premisBNS + "relatedEventIdentifierType").text = "UUID" etree.SubElement(relatedEventIdentification, ns.premisBNS + "relatedEventIdentifierValue").text = derivation.event_id derivations = Derivation.objects.filter(derived_file_id=fileUUID, event__isnull=False) for derivation in derivations: relationship = etree.SubElement(object, ns.premisBNS + "relationship") etree.SubElement(relationship, ns.premisBNS + "relationshipType").text = "derivation" etree.SubElement(relationship, ns.premisBNS + "relationshipSubType").text = "has source" relatedObjectIdentification = etree.SubElement(relationship, ns.premisBNS + "relatedObjectIdentification") etree.SubElement(relatedObjectIdentification, ns.premisBNS + "relatedObjectIdentifierType").text = "UUID" etree.SubElement(relatedObjectIdentification, ns.premisBNS + "relatedObjectIdentifierValue").text = derivation.source_file_id relatedEventIdentification = etree.SubElement(relationship, ns.premisBNS + "relatedEventIdentification") etree.SubElement(relatedEventIdentification, ns.premisBNS + "relatedEventIdentifierType").text = "UUID" etree.SubElement(relatedEventIdentification, ns.premisBNS + "relatedEventIdentifierValue").text = derivation.event_id return ret
def createTechMD(fileUUID): ret = etree.Element("techMD") techMD = ret #newChild(amdSec, "digiprovMD") #digiprovMD.set("ID", "digiprov-"+ os.path.basename(filename) + "-" + fileUUID) global globalTechMDCounter globalTechMDCounter += 1 techMD.set("ID", "techMD_"+ globalTechMDCounter.__str__()) mdWrap = newChild(techMD,"mdWrap") mdWrap.set("MDTYPE", "PREMIS:OBJECT") xmlData = newChild(mdWrap, "xmlData") #premis = etree.SubElement( xmlData, "premis", nsmap={None: premisNS}, \ # attrib = { "{" + xsiNS + "}schemaLocation" : "info:lc/xmlns/premis-v2 http://www.loc.gov/standards/premis/premis.xsd" }) #premis.set("version", "2.0") #premis = etree.SubElement( xmlData, "premis", attrib = {xsiBNS+"type": "premis:file"}) sql = "SELECT fileSize, checksum FROM Files WHERE fileUUID = '%s';" % (fileUUID) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: fileSize = row[0].__str__() checksum = row[1].__str__() row = c.fetchone() sqlLock.release() #OBJECT object = etree.SubElement(xmlData, "object", nsmap={None: premisNS}) object.set( xsiBNS+"type", "file") object.set(xsiBNS+"schemaLocation", premisNS + " http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd") object.set("version", "2.2") objectIdentifier = etree.SubElement(object, "objectIdentifier") etree.SubElement(objectIdentifier, "objectIdentifierType").text = "UUID" etree.SubElement(objectIdentifier, "objectIdentifierValue").text = fileUUID #etree.SubElement(object, "objectCategory").text = "file" objectCharacteristics = etree.SubElement(object, "objectCharacteristics") etree.SubElement(objectCharacteristics, "compositionLevel").text = "0" fixity = etree.SubElement(objectCharacteristics, "fixity") etree.SubElement(fixity, "messageDigestAlgorithm").text = "sha256" etree.SubElement(fixity, "messageDigest").text = checksum etree.SubElement(objectCharacteristics, "size").text = fileSize sql = "SELECT formatName, formatVersion, formatRegistryName, formatRegistryKey FROM FilesIDs WHERE fileUUID = '%s';" % (fileUUID) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if not row: format = etree.SubElement(objectCharacteristics, "format") formatDesignation = etree.SubElement(format, "formatDesignation") etree.SubElement(formatDesignation, "formatName").text = "Unknown" while row != None: #print row format = etree.SubElement(objectCharacteristics, "format") #fileUUID = row[0] formatDesignation = etree.SubElement(format, "formatDesignation") etree.SubElement(formatDesignation, "formatName").text = row[0] etree.SubElement(formatDesignation, "formatVersion").text = row[1] formatRegistry = etree.SubElement(format, "formatRegistry") etree.SubElement(formatRegistry, "formatRegistryName").text = row[2] etree.SubElement(formatRegistry, "formatRegistryKey").text = row[3] row = c.fetchone() sqlLock.release() objectCharacteristicsExtension = etree.SubElement(objectCharacteristics, "objectCharacteristicsExtension") sql = "SELECT FilesFits.FITSxml FROM FilesFits WHERE fileUUID = '" + fileUUID + "';" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() #if not row: # print >>sys.stderr, "Error no fits.", fileUUID parser = etree.XMLParser(remove_blank_text=True) while row != None: #fits = etree.fromstring(row[0]) fits = etree.XML(row[0], parser) objectCharacteristicsExtension.append(fits) row = c.fetchone() sqlLock.release() sql = "SELECT Files.originalLocation FROM Files WHERE Files.fileUUID = '" + fileUUID + "';" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() if not row: print >>sys.stderr, "Error no fits." while row != None: etree.SubElement(object, "originalName").text = escape(row[0]) row = c.fetchone() sqlLock.release() #Derivations sql = "SELECT sourceFileUUID, derivedFileUUID, relatedEventUUID FROM Derivations WHERE sourceFileUUID = '" + fileUUID + "';" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: relationship = etree.SubElement(object, "relationship") etree.SubElement(relationship, "relationshipType").text = "derivation" etree.SubElement(relationship, "relationshipSubType").text = "is source of" relatedObjectIdentification = etree.SubElement(relationship, "relatedObjectIdentification") etree.SubElement(relatedObjectIdentification, "relatedObjectIdentifierType").text = "UUID" etree.SubElement(relatedObjectIdentification, "relatedObjectIdentifierValue").text = row[1] relatedEventIdentification = etree.SubElement(relationship, "relatedEventIdentification") etree.SubElement(relatedEventIdentification, "relatedEventIdentifierType").text = "UUID" etree.SubElement(relatedEventIdentification, "relatedEventIdentifierValue").text = row[2] row = c.fetchone() sqlLock.release() sql = "SELECT sourceFileUUID, derivedFileUUID, relatedEventUUID FROM Derivations WHERE derivedFileUUID = '" + fileUUID + "';" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: relationship = etree.SubElement(object, "relationship") etree.SubElement(relationship, "relationshipType").text = "derivation" etree.SubElement(relationship, "relationshipSubType").text = "has source" relatedObjectIdentification = etree.SubElement(relationship, "relatedObjectIdentification") etree.SubElement(relatedObjectIdentification, "relatedObjectIdentifierType").text = "UUID" etree.SubElement(relatedObjectIdentification, "relatedObjectIdentifierValue").text = row[0] relatedEventIdentification = etree.SubElement(relationship, "relatedEventIdentification") etree.SubElement(relatedEventIdentification, "relatedEventIdentifierType").text = "UUID" etree.SubElement(relatedEventIdentification, "relatedEventIdentifierValue").text = row[2] row = c.fetchone() sqlLock.release() return ret