Esempio n. 1
0
def get_events_from_db(uuid):
    events = []
    for event_mdl in Event.objects.filter(file_uuid_id=uuid):
        event = [
            'event',
            premisrw.PREMIS_META,
            (
                'event_identifier',
                ('event_identifier_type', 'UUID'),
                ('event_identifier_value', event_mdl.event_id),
            ),
            ('event_type', event_mdl.event_type),
            ('event_date_time', event_mdl.event_datetime.isoformat()),
            # String detailing the program and algorithm used and the program's
            # version (and any notable parameters passed).
            ('event_detail', escape(event_mdl.event_detail)),
            (
                'event_outcome_information',
                ('event_outcome', event_mdl.event_outcome),
                ('event_outcome_detail',
                 ('event_outcome_detail_note',
                  escape(event_mdl.event_outcome_detail))),
            ),
        ]
        for agent_mdl in event_mdl.agents.all():
            event.append(
                ('linking_agent_identifier', ('linking_agent_identifier_type',
                                              agent_mdl.identifiertype),
                 ('linking_agent_identifier_value',
                  agent_mdl.identifiervalue)))
        events.append(tuple(event))
    return events
Esempio n. 2
0
def tasks(request, uuid):
    job = models.Job.objects.get(jobuuid=uuid)
    objects = job.task_set.all().order_by('-exitcode', '-endtime',
                                          '-starttime', '-createdtime')

    if (len(objects) == 0):
        return tasks_subjobs(request, uuid)

    # Filenames can be any encoding - we want to be able to display
    # unicode, while just displaying unicode replacement characters
    # for any other encoding present.
    for item in objects:
        item.filename = escape(item.filename)
        item.arguments = escape(item.arguments)
        item.stdout = escape(item.stdout)
        item.stderror = escape(item.stderror)

    page = helpers.pager(objects, django_settings.TASKS_PER_PAGE,
                         request.GET.get('page', None))
    objects = page.object_list

    # figure out duration in seconds
    for object in objects:
        object.duration = helpers.task_duration_in_seconds(object)

    return render(request, 'main/tasks.html', locals())
Esempio n. 3
0
def createFileSec(path, file_group_identifier, base_path, base_path_name, parentBranch, structMapParent, sip_uuid):
    print("createFileSec: ", path, parentBranch, structMapParent, file=sys.stderr)
    doneFirstRun = True
    pathSTR = path.__str__()
    pathSTR = path.__str__()
    if pathSTR == base_path + "objects/": #IF it's it's the SIP folder, it's OBJECTS
        pathSTR = "objects"
    #pathSTR = string.replace(path.__str__(), "/tmp/" + sys.argv[2] + "/" + sys.argv[3], "objects", 1)
    #if pathSTR + "/" == basePath: #if it's the very first run through (recursive function)
    if path == base_path: #if it's the very first run through (recursive function)
        pathSTR = os.path.basename(os.path.dirname(base_path))
        #structMapParent.set("DMDID", "SIP-description")

        #currentBranch = newChild(parentBranch, "fileGrp")
        #currentBranch.set("USE", "directory")
        # structMap directory
        div = newChild(structMapParent, ns.metsBNS + "div")
        createFileSec(os.path.join(path, "objects/"), file_group_identifier, base_path, base_path_name, parentBranch, div, sip_uuid)
        doneFirstRun = False
    filename = os.path.basename(pathSTR)

    structMapParent.set("TYPE", "directory")
    structMapParent.set("LABEL", escape(filename))

    if doneFirstRun:
        for item in each_child(path, file_group_identifier, base_path, base_path_name, sip_uuid):
            if isinstance(item, File):
                pathSTR = item.currentlocation.replace('%transferDirectory%', "", 1)

                ID = "file-" + item.uuid.__str__()

                # structMap file
                fptr = newChild(structMapParent, ns.metsBNS + "fptr")
                FILEID = "file-" + item.uuid.__str__()
                fptr.set("FILEID", escape(FILEID))

                # If the file already exists in the fileSec, don't create
                # a second entry.
                fileI = parentBranch.find('./mets:file[@ID="{}"]'.format(ID), namespaces=ns.NSMAP)
                if fileI is None:
                    fileI = etree.SubElement(parentBranch, ns.metsBNS + "file")

                    filename = ''.join(quoteattr(pathSTR).split("\"")[1:-1])

                    fileI.set("ID", escape(ID))

                    Flocat = newChild(fileI, ns.metsBNS + "FLocat")
                    Flocat.set(ns.xlinkBNS + "href", escape(pathSTR))
                    Flocat.set("LOCTYPE", "OTHER")
                    Flocat.set("OTHERLOCTYPE", "SYSTEM")

                    # used when adding amdSecs at a later time
                    admid = "digiprov-" + item.uuid
                    fileI.set("ADMID", admid)

            else:
                div = newChild(structMapParent, ns.metsBNS + "div")
                createFileSec(os.path.join(path, item), file_group_identifier, base_path, base_path_name, parentBranch, div, sip_uuid)
def createDigiprovMD(fileUUID):
    ret = []
    #EVENTS

    sql = "SELECT pk, fileUUID, eventIdentifierUUID, eventType, eventDateTime, eventDetail, eventOutcome, eventOutcomeDetailNote, linkingAgentIdentifier FROM Events WHERE fileUUID = '" + fileUUID + "';"
    rows = databaseInterface.queryAllSQL(sql)
    for row in rows:
        digiprovMD = etree.Element("digiprovMD")
        ret.append(digiprovMD) #newChild(amdSec, "digiprovMD")
        #digiprovMD.set("ID", "digiprov-"+ os.path.basename(filename) + "-" + fileUUID)
        global globalDigiprovMDCounter
        globalDigiprovMDCounter += 1
        digiprovMD.set("ID", "digiprovMD_"+ globalDigiprovMDCounter.__str__())

        mdWrap = newChild(digiprovMD,"mdWrap")
        mdWrap.set("MDTYPE", "PREMIS:EVENT")
        xmlData = newChild(mdWrap,"xmlData")
        event = etree.SubElement(xmlData, "event", nsmap={None: premisNS})
        event.set(xsiBNS+"schemaLocation", premisNS + " http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd")
        event.set("version", "2.2")

        eventIdentifier = etree.SubElement(event, "eventIdentifier")
        etree.SubElement(eventIdentifier, "eventIdentifierType").text = "UUID"
        etree.SubElement(eventIdentifier, "eventIdentifierValue").text = row[2]

        etree.SubElement(event, "eventType").text = row[3]
        etree.SubElement(event, "eventDateTime").text = row[4].__str__().replace(" ", "T")
        etree.SubElement(event, "eventDetail").text = escape(row[5])

        eventOutcomeInformation  = etree.SubElement(event, "eventOutcomeInformation")
        etree.SubElement(eventOutcomeInformation, "eventOutcome").text = row[6]
        eventOutcomeDetail = etree.SubElement(eventOutcomeInformation, "eventOutcomeDetail")
        etree.SubElement(eventOutcomeDetail, "eventOutcomeDetailNote").text = escape(row[7])
        
        if row[8]:
            linkingAgentIdentifier = etree.SubElement(event, "linkingAgentIdentifier")
            etree.SubElement(linkingAgentIdentifier, "linkingAgentIdentifierType").text = "Archivematica user pk"
            etree.SubElement(linkingAgentIdentifier, "linkingAgentIdentifierValue").text = row[8].__str__()
        
        #linkingAgentIdentifier
        sql = """SELECT agentIdentifierType, agentIdentifierValue, agentName, agentType FROM Agents;"""
        c, sqlLock = databaseInterface.querySQL(sql)
        row = c.fetchone()
        while row != None:
            linkingAgentIdentifier = etree.SubElement(event, "linkingAgentIdentifier")
            etree.SubElement(linkingAgentIdentifier, "linkingAgentIdentifierType").text = row[0]
            etree.SubElement(linkingAgentIdentifier, "linkingAgentIdentifierValue").text = row[1]
            row = c.fetchone()
        sqlLock.release()
    return ret
Esempio n. 5
0
def createEvent(event_record):
    """ Returns a PREMIS Event. """
    event = etree.Element(ns.premisBNS + "event", nsmap={'premis': ns.premisNS})
    event.set(ns.xsiBNS + "schemaLocation", ns.premisNS + " http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd")
    event.set("version", "2.2")

    eventIdentifier = etree.SubElement(event, ns.premisBNS + "eventIdentifier")
    etree.SubElement(eventIdentifier, ns.premisBNS + "eventIdentifierType").text = "UUID"
    etree.SubElement(eventIdentifier, ns.premisBNS + "eventIdentifierValue").text = event_record.event_id

    etree.SubElement(event, ns.premisBNS + "eventType").text = event_record.event_type
    etree.SubElement(event, ns.premisBNS + "eventDateTime").text = event_record.event_datetime.isoformat()
    etree.SubElement(event, ns.premisBNS + "eventDetail").text = escape(event_record.event_detail)

    eventOutcomeInformation = etree.SubElement(event, ns.premisBNS + "eventOutcomeInformation")
    etree.SubElement(eventOutcomeInformation, ns.premisBNS + "eventOutcome").text = event_record.event_outcome
    eventOutcomeDetail = etree.SubElement(eventOutcomeInformation, ns.premisBNS + "eventOutcomeDetail")
    etree.SubElement(eventOutcomeDetail, ns.premisBNS + "eventOutcomeDetailNote").text = escape(event_record.event_outcome_detail)

    # linkingAgentIdentifier
    for agent in event_record.agents.all():
        linkingAgentIdentifier = etree.SubElement(event, ns.premisBNS + "linkingAgentIdentifier")
        etree.SubElement(linkingAgentIdentifier, ns.premisBNS + "linkingAgentIdentifierType").text = agent.identifiertype
        etree.SubElement(linkingAgentIdentifier, ns.premisBNS + "linkingAgentIdentifierValue").text = agent.identifiervalue
    return event
Esempio n. 6
0
def dir_obj_to_premis(dir_obj):
    """
    Converts an Directory model object to a PREMIS object via metsrw.

    Returns:
        lxml.etree._Element
    """
    original_name = escape(dir_obj.originallocation)
    object_identifiers = get_premis_object_identifiers(
        dir_obj.uuid, dir_obj.identifiers.all())

    premis_data = (("object", IE_PREMIS_META) + object_identifiers +
                   (("original_name", original_name), ))

    return metsrw.plugins.premisrw.data_to_premis(
        premis_data, premis_version=IE_PREMIS_META["version"])
Esempio n. 7
0
def file_obj_to_premis(file_obj):
    """
    Converts an File model object to a PREMIS event object via metsrw.

    Returns:
        lxml.etree._Element
    """

    premis_digest_algorithm = convert_to_premis_hash_function(
        file_obj.checksumtype)
    format_data = get_premis_format_data(file_obj.fileid_set.all())
    original_name = escape(file_obj.originallocation)
    object_identifiers = get_premis_object_identifiers(
        file_obj.uuid, file_obj.identifiers.all())
    object_characteristics_extensions = get_premis_object_characteristics_extension(
        file_obj.characterization_documents)
    relationship_data = get_premis_relationship_data(
        file_obj.related_is_source_of, file_obj.related_has_source)

    object_characteristics = (
        "object_characteristics",
        ("composition_level", "0"),
        (
            "fixity",
            ("message_digest_algorithm", premis_digest_algorithm),
            ("message_digest", file_obj.checksum),
        ),
        ("size", str(file_obj.size)),
        format_data,
        (
            "creating_application",
            (
                "date_created_by_application",
                file_obj.modificationtime.strftime("%Y-%m-%d"),
            ),
        ),
    )

    if object_characteristics_extensions:
        object_characteristics += object_characteristics_extensions

    premis_data = (("object", FILE_PREMIS_META) + object_identifiers +
                   (object_characteristics,
                    ("original_name", original_name)) + relationship_data)

    return metsrw.plugins.premisrw.data_to_premis(
        premis_data, premis_version=FILE_PREMIS_META["version"])
Esempio n. 8
0
def ingest_normalization_report(request, uuid, current_page=None):
    jobs = models.Job.objects.filter(sipuuid=uuid, subjobof='')
    sipname = utils.get_directory_name_from_job(jobs)

    objects = getNormalizationReportQuery(sipUUID=uuid)
    for o in objects:
        o['location'] = archivematicaFunctions.escape(o['location'])

    results_per_page = 10

    if current_page == None:
        current_page = 1

    page = helpers.pager(objects, results_per_page, current_page)
    hit_count = len(objects)

    return render(request, 'ingest/normalization_report.html', locals())
def createDigiprovMD(fileUUID):
    ret = []
    # EVENTS

    events = Event.objects.filter(file_uuid_id=fileUUID)
    for event_record in events:
        digiprovMD = etree.Element(ns.metsBNS + "digiprovMD")
        ret.append(digiprovMD) #newChild(amdSec, "digiprovMD")
        #digiprovMD.set("ID", "digiprov-"+ os.path.basename(filename) + "-" + fileUUID)
        global globalDigiprovMDCounter
        globalDigiprovMDCounter += 1
        digiprovMD.set("ID", "digiprovMD_"+ globalDigiprovMDCounter.__str__())

        mdWrap = etree.SubElement(digiprovMD, ns.metsBNS + "mdWrap")
        mdWrap.set("MDTYPE", "PREMIS:EVENT")
        xmlData = etree.SubElement(mdWrap, ns.metsBNS + "xmlData")
        event = etree.SubElement(xmlData, ns.premisBNS + "event", nsmap={'premis': ns.premisNS})
        event.set(ns.xsiBNS+"schemaLocation", ns.premisNS + " http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd")
        event.set("version", "2.2")

        eventIdentifier = etree.SubElement(event, ns.premisBNS + "eventIdentifier")
        etree.SubElement(eventIdentifier, ns.premisBNS + "eventIdentifierType").text = "UUID"
        etree.SubElement(eventIdentifier, ns.premisBNS + "eventIdentifierValue").text = event_record.event_id

        etree.SubElement(event, ns.premisBNS + "eventType").text = event_record.event_type
        etree.SubElement(event, ns.premisBNS + "eventDateTime").text = event_record.event_datetime.isoformat()
        etree.SubElement(event, ns.premisBNS + "eventDetail").text = escape(event_record.event_detail)

        eventOutcomeInformation  = etree.SubElement(event, ns.premisBNS + "eventOutcomeInformation")
        etree.SubElement(eventOutcomeInformation, ns.premisBNS + "eventOutcome").text = event_record.event_outcome
        eventOutcomeDetail = etree.SubElement(eventOutcomeInformation, ns.premisBNS + "eventOutcomeDetail")
        etree.SubElement(eventOutcomeDetail, ns.premisBNS + "eventOutcomeDetailNote").text = escape(event_record.event_outcome_detail)
        
        if event_record.linking_agent:
            linkingAgentIdentifier = etree.SubElement(event, ns.premisBNS + "linkingAgentIdentifier")
            etree.SubElement(linkingAgentIdentifier, ns.premisBNS + "linkingAgentIdentifierType").text = "Archivematica user pk"
            etree.SubElement(linkingAgentIdentifier, ns.premisBNS + "linkingAgentIdentifierValue").text = str(event_record.linking_agent)
        
        # linkingAgentIdentifier
        for agent in Agent.objects.all():
            linkingAgentIdentifier = etree.SubElement(event, ns.premisBNS + "linkingAgentIdentifier")
            etree.SubElement(linkingAgentIdentifier, ns.premisBNS + "linkingAgentIdentifierType").text = agent.identifiertype
            etree.SubElement(linkingAgentIdentifier, ns.premisBNS + "linkingAgentIdentifierValue").text = agent.identifiervalue
    return ret
Esempio n. 10
0
def create_premis_object(fileUUID):
    """
    Create a PREMIS:OBJECT for fileUUID.

    Access the models for File, FileID, FPCommandOutput, Derivation

    :param str fileUUID: UUID of the File to create an object for
    :return: premis:object Element, suitable for inserting into mets:xmlData
    """
    f = File.objects.get(uuid=fileUUID)
    # PREMIS:OBJECT
    object_elem = etree.Element(ns.premisBNS + "object", nsmap={'premis': ns.premisNS})
    object_elem.set(ns.xsiBNS+"type", "premis:file")
    object_elem.set(ns.xsiBNS+"schemaLocation", ns.premisNS + " http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd")
    object_elem.set("version", "2.2")

    objectIdentifier = etree.SubElement(object_elem, ns.premisBNS + "objectIdentifier")
    etree.SubElement(objectIdentifier, ns.premisBNS + "objectIdentifierType").text = "UUID"
    etree.SubElement(objectIdentifier, ns.premisBNS + "objectIdentifierValue").text = fileUUID

    objectCharacteristics = etree.SubElement(object_elem, ns.premisBNS + "objectCharacteristics")
    etree.SubElement(objectCharacteristics, ns.premisBNS + "compositionLevel").text = "0"

    fixity = etree.SubElement(objectCharacteristics, ns.premisBNS + "fixity")
    etree.SubElement(fixity, ns.premisBNS + "messageDigestAlgorithm").text = f.checksumtype
    etree.SubElement(fixity, ns.premisBNS + "messageDigest").text = f.checksum

    etree.SubElement(objectCharacteristics, ns.premisBNS + "size").text = str(f.size)

    for elem in create_premis_object_formats(fileUUID):
        objectCharacteristics.append(elem)
    for elem in create_premis_object_characteristics_extensions(fileUUID):
        objectCharacteristics.append(elem)

    etree.SubElement(object_elem, ns.premisBNS + "originalName").text = escape(f.originallocation)

    for elem in create_premis_object_derivations(fileUUID):
        object_elem.append(elem)

    return object_elem
Esempio n. 11
0
def ingest_normalization_report(request, uuid, current_page=None):
    jobs = models.Job.objects.filter(sipuuid=uuid, subjobof='')
    sipname = jobs.get_directory_name()

    objects = getNormalizationReportQuery(sipUUID=uuid)
    for o in objects:
        o['location'] = archivematicaFunctions.escape(o['location'])
        (o['preservation_derivative_validation_attempted'],
         o['preservation_derivative_validation_failed'],
         o['access_derivative_validation_attempted'],
         o['access_derivative_validation_failed']
         ) = derivative_validation_report(o)

    results_per_page = 10

    if current_page is None:
        current_page = 1

    page = helpers.pager(objects, results_per_page, current_page)
    hit_count = len(objects)

    return render(request, 'ingest/normalization_report.html', locals())
Esempio n. 12
0
def ingest_normalization_report(request, uuid, current_page=None):
    jobs = models.Job.objects.filter(sipuuid=uuid)
    sipname = jobs.get_directory_name()

    objects = getNormalizationReportQuery(sipUUID=uuid)
    for o in objects:
        o["location"] = escape(o["location"])
        (
            o["preservation_derivative_validation_attempted"],
            o["preservation_derivative_validation_failed"],
            o["access_derivative_validation_attempted"],
            o["access_derivative_validation_failed"],
        ) = derivative_validation_report(o)

    results_per_page = 10

    if current_page is None:
        current_page = 1

    page = helpers.pager(objects, results_per_page, current_page)
    hit_count = len(objects)

    return render(request, "ingest/normalization_report.html", locals())
Esempio n. 13
0
def createTechMD(fileUUID):
    ret = etree.Element("techMD")
    techMD = ret  #newChild(amdSec, "digiprovMD")
    #digiprovMD.set("ID", "digiprov-"+ os.path.basename(filename) + "-" + fileUUID)
    global globalTechMDCounter
    globalTechMDCounter += 1
    techMD.set("ID", "techMD_" + globalTechMDCounter.__str__())

    mdWrap = newChild(techMD, "mdWrap")
    mdWrap.set("MDTYPE", "PREMIS:OBJECT")
    xmlData = newChild(mdWrap, "xmlData")
    #premis = etree.SubElement( xmlData, "premis", nsmap={None: premisNS}, \
    #    attrib = { "{" + xsiNS + "}schemaLocation" : "info:lc/xmlns/premis-v2 http://www.loc.gov/standards/premis/premis.xsd" })
    #premis.set("version", "2.0")

    #premis = etree.SubElement( xmlData, "premis", attrib = {xsiBNS+"type": "premis:file"})

    sql = "SELECT fileSize, checksum FROM Files WHERE fileUUID = '%s';" % (
        fileUUID)
    c, sqlLock = databaseInterface.querySQL(sql)
    row = c.fetchone()
    while row != None:
        fileSize = row[0].__str__()
        checksum = row[1].__str__()
        row = c.fetchone()
    sqlLock.release()

    #OBJECT
    object = etree.SubElement(xmlData, "object", nsmap={None: premisNS})
    object.set(xsiBNS + "type", "file")
    object.set(
        xsiBNS + "schemaLocation",
        premisNS + " http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd")
    object.set("version", "2.2")

    objectIdentifier = etree.SubElement(object, "objectIdentifier")
    etree.SubElement(objectIdentifier, "objectIdentifierType").text = "UUID"
    etree.SubElement(objectIdentifier, "objectIdentifierValue").text = fileUUID

    #etree.SubElement(object, "objectCategory").text = "file"

    objectCharacteristics = etree.SubElement(object, "objectCharacteristics")
    etree.SubElement(objectCharacteristics, "compositionLevel").text = "0"

    fixity = etree.SubElement(objectCharacteristics, "fixity")
    etree.SubElement(fixity, "messageDigestAlgorithm").text = "sha256"
    etree.SubElement(fixity, "messageDigest").text = checksum

    etree.SubElement(objectCharacteristics, "size").text = fileSize

    sql = "SELECT formatName, formatVersion, formatRegistryName, formatRegistryKey FROM FilesIDs WHERE fileUUID = '%s';" % (
        fileUUID)
    c, sqlLock = databaseInterface.querySQL(sql)
    row = c.fetchone()
    if not row:
        format = etree.SubElement(objectCharacteristics, "format")
        formatDesignation = etree.SubElement(format, "formatDesignation")
        etree.SubElement(formatDesignation, "formatName").text = "Unknown"
    while row != None:
        #print row
        format = etree.SubElement(objectCharacteristics, "format")
        #fileUUID = row[0]

        formatDesignation = etree.SubElement(format, "formatDesignation")
        etree.SubElement(formatDesignation, "formatName").text = row[0]
        etree.SubElement(formatDesignation, "formatVersion").text = row[1]

        formatRegistry = etree.SubElement(format, "formatRegistry")
        etree.SubElement(formatRegistry, "formatRegistryName").text = row[2]
        etree.SubElement(formatRegistry, "formatRegistryKey").text = row[3]
        row = c.fetchone()
    sqlLock.release()

    objectCharacteristicsExtension = etree.SubElement(
        objectCharacteristics, "objectCharacteristicsExtension")

    sql = "SELECT FilesFits.FITSxml FROM FilesFits WHERE fileUUID = '" + fileUUID + "';"
    c, sqlLock = databaseInterface.querySQL(sql)
    row = c.fetchone()
    #if not row:
    #    print >>sys.stderr, "Error no fits.", fileUUID
    parser = etree.XMLParser(remove_blank_text=True)
    while row != None:
        #fits = etree.fromstring(row[0])
        fits = etree.XML(row[0], parser)
        objectCharacteristicsExtension.append(fits)
        row = c.fetchone()
    sqlLock.release()

    sql = "SELECT Files.originalLocation FROM Files WHERE Files.fileUUID = '" + fileUUID + "';"
    c, sqlLock = databaseInterface.querySQL(sql)
    row = c.fetchone()
    if not row:
        print >> sys.stderr, "Error no fits."
    while row != None:
        etree.SubElement(object, "originalName").text = escape(row[0])
        row = c.fetchone()
    sqlLock.release()

    #Derivations
    sql = "SELECT sourceFileUUID, derivedFileUUID, relatedEventUUID FROM Derivations WHERE sourceFileUUID = '" + fileUUID + "';"
    c, sqlLock = databaseInterface.querySQL(sql)
    row = c.fetchone()
    while row != None:
        relationship = etree.SubElement(object, "relationship")
        etree.SubElement(relationship, "relationshipType").text = "derivation"
        etree.SubElement(relationship,
                         "relationshipSubType").text = "is source of"

        relatedObjectIdentification = etree.SubElement(
            relationship, "relatedObjectIdentification")
        etree.SubElement(relatedObjectIdentification,
                         "relatedObjectIdentifierType").text = "UUID"
        etree.SubElement(relatedObjectIdentification,
                         "relatedObjectIdentifierValue").text = row[1]

        relatedEventIdentification = etree.SubElement(
            relationship, "relatedEventIdentification")
        etree.SubElement(relatedEventIdentification,
                         "relatedEventIdentifierType").text = "UUID"
        etree.SubElement(relatedEventIdentification,
                         "relatedEventIdentifierValue").text = row[2]

        row = c.fetchone()
    sqlLock.release()

    sql = "SELECT sourceFileUUID, derivedFileUUID, relatedEventUUID FROM Derivations WHERE derivedFileUUID = '" + fileUUID + "';"
    c, sqlLock = databaseInterface.querySQL(sql)
    row = c.fetchone()
    while row != None:
        relationship = etree.SubElement(object, "relationship")
        etree.SubElement(relationship, "relationshipType").text = "derivation"
        etree.SubElement(relationship,
                         "relationshipSubType").text = "has source"

        relatedObjectIdentification = etree.SubElement(
            relationship, "relatedObjectIdentification")
        etree.SubElement(relatedObjectIdentification,
                         "relatedObjectIdentifierType").text = "UUID"
        etree.SubElement(relatedObjectIdentification,
                         "relatedObjectIdentifierValue").text = row[0]

        relatedEventIdentification = etree.SubElement(
            relationship, "relatedEventIdentification")
        etree.SubElement(relatedEventIdentification,
                         "relatedEventIdentifierType").text = "UUID"
        etree.SubElement(relatedEventIdentification,
                         "relatedEventIdentifierValue").text = row[2]

        row = c.fetchone()
    sqlLock.release()
    return ret
def createTechMD(fileUUID):
    ret = etree.Element(ns.metsBNS + "techMD")
    techMD = ret #newChild(amdSec, "digiprovMD")
    #digiprovMD.set("ID", "digiprov-"+ os.path.basename(filename) + "-" + fileUUID)
    global globalTechMDCounter
    globalTechMDCounter += 1
    techMD.set("ID", "techMD_"+ globalTechMDCounter.__str__())

    mdWrap = etree.SubElement(techMD, ns.metsBNS + "mdWrap")
    mdWrap.set("MDTYPE", "PREMIS:OBJECT")
    xmlData = etree.SubElement(mdWrap, ns.metsBNS + "xmlData")
    #premis = etree.SubElement( xmlData, "premis", nsmap={None: ns.premisNS}, \
    #    attrib = { "{" + ns.xsiNS + "}schemaLocation" : "info:lc/xmlns/premis-v2 http://www.loc.gov/standards/premis/premis.xsd" })
    #premis.set("version", "2.0")

    #premis = etree.SubElement( xmlData, "premis", attrib = {ns.xsiBNS+"type": "premis:file"})

    f = File.objects.get(uuid=fileUUID)
    fileSize = str(f.size)
    checksum = f.checksum

    #OBJECT
    object = etree.SubElement(xmlData, ns.premisBNS + "object", nsmap={'premis': ns.premisNS})
    object.set(ns.xsiBNS+"type", "premis:file")
    object.set(ns.xsiBNS+"schemaLocation", ns.premisNS + " http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd")
    object.set("version", "2.2")

    objectIdentifier = etree.SubElement(object, ns.premisBNS + "objectIdentifier")
    etree.SubElement(objectIdentifier, ns.premisBNS + "objectIdentifierType").text = "UUID"
    etree.SubElement(objectIdentifier, ns.premisBNS + "objectIdentifierValue").text = fileUUID

    #etree.SubElement(object, "objectCategory").text = "file"

    objectCharacteristics = etree.SubElement(object, ns.premisBNS + "objectCharacteristics")
    etree.SubElement(objectCharacteristics, ns.premisBNS + "compositionLevel").text = "0"

    fixity = etree.SubElement(objectCharacteristics, ns.premisBNS + "fixity")
    etree.SubElement(fixity, ns.premisBNS + "messageDigestAlgorithm").text = "sha256"
    etree.SubElement(fixity, ns.premisBNS + "messageDigest").text = checksum

    etree.SubElement(objectCharacteristics, ns.premisBNS + "size").text = fileSize

    files = FileID.objects.filter(file_id=fileUUID)
    if not files.exists():
        format = etree.SubElement(objectCharacteristics, ns.premisBNS + "format")
        formatDesignation = etree.SubElement(format, ns.premisBNS + "formatDesignation")
        etree.SubElement(formatDesignation, ns.premisBNS + "formatName").text = "Unknown"
    for row in files.values_list('format_name', 'format_version', 'format_registry_name', 'format_registry_key'):
        #print row
        format = etree.SubElement(objectCharacteristics, ns.premisBNS + "format")

        formatDesignation = etree.SubElement(format, ns.premisBNS + "formatDesignation")
        etree.SubElement(formatDesignation, ns.premisBNS + "formatName").text = row[0]
        etree.SubElement(formatDesignation, ns.premisBNS + "formatVersion").text = row[1]

        formatRegistry = etree.SubElement(format, ns.premisBNS + "formatRegistry")
        etree.SubElement(formatRegistry, ns.premisBNS + "formatRegistryName").text = row[2]
        etree.SubElement(formatRegistry, ns.premisBNS + "formatRegistryKey").text = row[3]

    objectCharacteristicsExtension = etree.SubElement(objectCharacteristics, ns.premisBNS + "objectCharacteristicsExtension")

    parser = etree.XMLParser(remove_blank_text=True)
    documents = FPCommandOutput.objects.filter(file_id=fileUUID, rule__purpose__in=['characterization', 'default_characterization']).values_list('content')
    for document, in documents:
        # This needs to be converted into an str because lxml doesn't accept
        # XML documents in unicode strings if the document contains an
        # encoding declaration.
        output = etree.XML(document.encode("utf-8"), parser)
        objectCharacteristicsExtension.append(output)

    try:
        f = File.objects.get(uuid=fileUUID)
    except File.DoesNotExist:
        print >>sys.stderr, "Error: no location found."
    else:
        etree.SubElement(object, ns.premisBNS + "originalName").text = escape(f.originallocation)

    # Derivations
    derivations = Derivation.objects.filter(source_file_id=fileUUID, event__isnull=False)
    for derivation in derivations:
        relationship = etree.SubElement(object, ns.premisBNS + "relationship")
        etree.SubElement(relationship, ns.premisBNS + "relationshipType").text = "derivation"
        etree.SubElement(relationship, ns.premisBNS + "relationshipSubType").text = "is source of"

        relatedObjectIdentification = etree.SubElement(relationship, ns.premisBNS + "relatedObjectIdentification")
        etree.SubElement(relatedObjectIdentification, ns.premisBNS + "relatedObjectIdentifierType").text = "UUID"
        etree.SubElement(relatedObjectIdentification, ns.premisBNS + "relatedObjectIdentifierValue").text = derivation.derived_file_id

        relatedEventIdentification = etree.SubElement(relationship, ns.premisBNS + "relatedEventIdentification")
        etree.SubElement(relatedEventIdentification, ns.premisBNS + "relatedEventIdentifierType").text = "UUID"
        etree.SubElement(relatedEventIdentification, ns.premisBNS + "relatedEventIdentifierValue").text = derivation.event_id

    derivations = Derivation.objects.filter(derived_file_id=fileUUID, event__isnull=False)
    for derivation in derivations:
        relationship = etree.SubElement(object, ns.premisBNS + "relationship")
        etree.SubElement(relationship, ns.premisBNS + "relationshipType").text = "derivation"
        etree.SubElement(relationship, ns.premisBNS + "relationshipSubType").text = "has source"

        relatedObjectIdentification = etree.SubElement(relationship, ns.premisBNS + "relatedObjectIdentification")
        etree.SubElement(relatedObjectIdentification, ns.premisBNS + "relatedObjectIdentifierType").text = "UUID"
        etree.SubElement(relatedObjectIdentification, ns.premisBNS + "relatedObjectIdentifierValue").text = derivation.source_file_id

        relatedEventIdentification = etree.SubElement(relationship, ns.premisBNS + "relatedEventIdentification")
        etree.SubElement(relatedEventIdentification, ns.premisBNS + "relatedEventIdentifierType").text = "UUID"
        etree.SubElement(relatedEventIdentification, ns.premisBNS + "relatedEventIdentifierValue").text = derivation.event_id

    return ret
def createTechMD(fileUUID):
    ret = etree.Element("techMD")
    techMD = ret #newChild(amdSec, "digiprovMD")
    #digiprovMD.set("ID", "digiprov-"+ os.path.basename(filename) + "-" + fileUUID)
    global globalTechMDCounter
    globalTechMDCounter += 1
    techMD.set("ID", "techMD_"+ globalTechMDCounter.__str__())

    mdWrap = newChild(techMD,"mdWrap")
    mdWrap.set("MDTYPE", "PREMIS:OBJECT")
    xmlData = newChild(mdWrap, "xmlData")
    #premis = etree.SubElement( xmlData, "premis", nsmap={None: premisNS}, \
    #    attrib = { "{" + xsiNS + "}schemaLocation" : "info:lc/xmlns/premis-v2 http://www.loc.gov/standards/premis/premis.xsd" })
    #premis.set("version", "2.0")

    #premis = etree.SubElement( xmlData, "premis", attrib = {xsiBNS+"type": "premis:file"})

    sql = "SELECT fileSize, checksum FROM Files WHERE fileUUID = '%s';" % (fileUUID)
    c, sqlLock = databaseInterface.querySQL(sql)
    row = c.fetchone()
    while row != None:
        fileSize = row[0].__str__()
        checksum = row[1].__str__()
        row = c.fetchone()
    sqlLock.release()

    #OBJECT
    object = etree.SubElement(xmlData, "object", nsmap={None: premisNS})
    object.set( xsiBNS+"type", "file")
    object.set(xsiBNS+"schemaLocation", premisNS + " http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd")
    object.set("version", "2.2")

    objectIdentifier = etree.SubElement(object, "objectIdentifier")
    etree.SubElement(objectIdentifier, "objectIdentifierType").text = "UUID"
    etree.SubElement(objectIdentifier, "objectIdentifierValue").text = fileUUID

    #etree.SubElement(object, "objectCategory").text = "file"

    objectCharacteristics = etree.SubElement(object, "objectCharacteristics")
    etree.SubElement(objectCharacteristics, "compositionLevel").text = "0"

    fixity = etree.SubElement(objectCharacteristics, "fixity")
    etree.SubElement(fixity, "messageDigestAlgorithm").text = "sha256"
    etree.SubElement(fixity, "messageDigest").text = checksum

    etree.SubElement(objectCharacteristics, "size").text = fileSize

    sql = "SELECT formatName, formatVersion, formatRegistryName, formatRegistryKey FROM FilesIDs WHERE fileUUID = '%s';" % (fileUUID)
    c, sqlLock = databaseInterface.querySQL(sql)
    row = c.fetchone()
    if not row:
        format = etree.SubElement(objectCharacteristics, "format")
        formatDesignation = etree.SubElement(format, "formatDesignation")
        etree.SubElement(formatDesignation, "formatName").text = "Unknown"
    while row != None:
        #print row
        format = etree.SubElement(objectCharacteristics, "format")
        #fileUUID = row[0]

        formatDesignation = etree.SubElement(format, "formatDesignation")
        etree.SubElement(formatDesignation, "formatName").text = row[0]
        etree.SubElement(formatDesignation, "formatVersion").text = row[1]

        formatRegistry = etree.SubElement(format, "formatRegistry")
        etree.SubElement(formatRegistry, "formatRegistryName").text = row[2]
        etree.SubElement(formatRegistry, "formatRegistryKey").text = row[3]
        row = c.fetchone()
    sqlLock.release()

    objectCharacteristicsExtension = etree.SubElement(objectCharacteristics, "objectCharacteristicsExtension")

    sql = "SELECT FilesFits.FITSxml FROM FilesFits WHERE fileUUID = '" + fileUUID + "';"
    c, sqlLock = databaseInterface.querySQL(sql)
    row = c.fetchone()
    #if not row:
    #    print >>sys.stderr, "Error no fits.", fileUUID
    parser = etree.XMLParser(remove_blank_text=True)
    while row != None:
        #fits = etree.fromstring(row[0])
        fits = etree.XML(row[0], parser)
        objectCharacteristicsExtension.append(fits)
        row = c.fetchone()
    sqlLock.release()

    sql = "SELECT Files.originalLocation FROM Files WHERE Files.fileUUID = '" + fileUUID + "';"
    c, sqlLock = databaseInterface.querySQL(sql)
    row = c.fetchone()
    if not row:
        print >>sys.stderr, "Error no fits."
    while row != None:
        etree.SubElement(object, "originalName").text = escape(row[0])
        row = c.fetchone()
    sqlLock.release()

    #Derivations
    sql = "SELECT sourceFileUUID, derivedFileUUID, relatedEventUUID FROM Derivations WHERE sourceFileUUID = '" + fileUUID + "';"
    c, sqlLock = databaseInterface.querySQL(sql)
    row = c.fetchone()
    while row != None:
        relationship = etree.SubElement(object, "relationship")
        etree.SubElement(relationship, "relationshipType").text = "derivation"
        etree.SubElement(relationship, "relationshipSubType").text = "is source of"

        relatedObjectIdentification = etree.SubElement(relationship, "relatedObjectIdentification")
        etree.SubElement(relatedObjectIdentification, "relatedObjectIdentifierType").text = "UUID"
        etree.SubElement(relatedObjectIdentification, "relatedObjectIdentifierValue").text = row[1]

        relatedEventIdentification = etree.SubElement(relationship, "relatedEventIdentification")
        etree.SubElement(relatedEventIdentification, "relatedEventIdentifierType").text = "UUID"
        etree.SubElement(relatedEventIdentification, "relatedEventIdentifierValue").text = row[2]

        row = c.fetchone()
    sqlLock.release()

    sql = "SELECT sourceFileUUID, derivedFileUUID, relatedEventUUID FROM Derivations WHERE derivedFileUUID = '" + fileUUID + "';"
    c, sqlLock = databaseInterface.querySQL(sql)
    row = c.fetchone()
    while row != None:
        relationship = etree.SubElement(object, "relationship")
        etree.SubElement(relationship, "relationshipType").text = "derivation"
        etree.SubElement(relationship, "relationshipSubType").text = "has source"

        relatedObjectIdentification = etree.SubElement(relationship, "relatedObjectIdentification")
        etree.SubElement(relatedObjectIdentification, "relatedObjectIdentifierType").text = "UUID"
        etree.SubElement(relatedObjectIdentification, "relatedObjectIdentifierValue").text = row[0]

        relatedEventIdentification = etree.SubElement(relationship, "relatedEventIdentification")
        etree.SubElement(relatedEventIdentification, "relatedEventIdentifierType").text = "UUID"
        etree.SubElement(relatedEventIdentification, "relatedEventIdentifierValue").text = row[2]

        row = c.fetchone()
    sqlLock.release()
    return ret