Example #1
0
def verifyChecksum(fileUUID, filePath, date, eventIdentifierUUID):
    f = File.objects.get(uuid=fileUUID)

    if f.checksum in ("", "None"):
        print >> sys.stderr, "No checksum found in database for file:", fileUUID, filePath
        exit(1)
    checksumFile = sha_for_file(filePath)

    eventOutcome = ""
    eventOutcomeDetailNote = ""
    exitCode = 0
    if checksumFile != f.checksum:
        eventOutcomeDetailNote = str(checksumFile) + " != " + f.checksum
        eventOutcome = "Fail"
        exitCode = 2
        print >> sys.stderr, "Checksums do not match:", fileUUID, filePath
        print >> sys.stderr, eventOutcomeDetailNote
    else:
        eventOutcomeDetailNote = "%s %s" % (str(checksumFile), "verified")
        eventOutcome = "Pass"
        exitCode = 0

    databaseFunctions.insertIntoEvents(fileUUID=fileUUID, \
                 eventIdentifierUUID=eventIdentifierUUID, \
                 eventType="fixity check", \
                 eventDateTime=date, \
                 eventOutcome=eventOutcome, \
                 eventOutcomeDetailNote=eventOutcomeDetailNote, \
                 eventDetail="program=\"python\"; module=\"hashlib.sha256()\"")

    exit(exitCode)
Example #2
0
def verifyMetsFileSecChecksums(metsFile,
                               date,
                               taskUUID,
                               relativeDirectory="./"):
    print metsFile
    exitCode = 0
    tree = etree.parse(metsFile)
    root = tree.getroot()
    for item in root.findall(
            "{http://www.loc.gov/METS/}fileSec/{http://www.loc.gov/METS/}fileGrp/{http://www.loc.gov/METS/}file"
    ):
        #print etree.tostring(item)
        #print item

        checksum = item.get("CHECKSUM")
        checksumType = item.get("CHECKSUMTYPE")
        for item2 in item:
            if item2.tag == "{http://www.loc.gov/METS/}FLocat":
                #print "floc: ", item2.tag, etree.tostring(item2)
                #print item2.attrib
                fileLocation = item2.get("{http://www.w3.org/1999/xlink}href")
        #print "%s - %s - %s " % (checksumType, checksum, fileLocation)
        fileFullPath = os.path.join(relativeDirectory, fileLocation)
        if checksumType == "MD5":
            checksum2 = md5_for_file(fileFullPath)
            eventDetail = "program=\"python\"; module=\"hashlib.sha256()\""
        elif checksumType == "sha256":
            checksum2 = sha_for_file(fileFullPath)
            eventDetail = "program=\"python\"; module=\"hashlib.md5()\""
        else:
            print >> sys.stderr, "Unsupported checksum type: %s" % (
                checksumType.__str__())
            exit(300)

        if checksum != checksum2:
            #eventOutcomeDetailNote = checksumFile.__str__() + " != " + checksumDB.__str__()
            eventOutcome = "Fail"
            print "%s - %s - %s" % ((checksum == checksum2).__str__(),
                                    checksum.__str__(), checksum2.__str__())
            print >> sys.stderr, eventOutcome, fileFullPath
            exitCode = exitCode + 22
        else:
            #eventOutcomeDetailNote = checksumFile.__str__() + "verified"
            eventOutcome = "Pass"
            print eventOutcome, fileLocation

    return exitCode

    #insertIntoEvents(fileUUID="", eventIdentifierUUID="", eventType="", eventDateTime=databaseInterface.getUTCDate(), eventDetail="", eventOutcome="", eventOutcomeDetailNote="")
    databaseFunctions.insertIntoEvents(fileUUID=fileUUID, \
                 eventIdentifierUUID=eventIdentifierUUID, \
                 eventType="fixity check", \
                 eventDateTime=date, \
                 eventOutcome=eventOutcome, \
                 eventOutcomeDetailNote=eventOutcomeDetailNote, \
                 eventDetail=eventDetail)
def verifyMetsFileSecChecksums(metsFile, date, taskUUID, relativeDirectory="./"):
    print metsFile
    exitCode = 0
    tree = etree.parse(metsFile)
    root = tree.getroot()
    for item in root.findall("{http://www.loc.gov/METS/}fileSec/{http://www.loc.gov/METS/}fileGrp/{http://www.loc.gov/METS/}file"):
        #print etree.tostring(item)
        #print item

        checksum = item.get("CHECKSUM")
        checksumType = item.get("CHECKSUMTYPE")
        for item2 in item:
            if item2.tag == "{http://www.loc.gov/METS/}FLocat":
                #print "floc: ", item2.tag, etree.tostring(item2)
                #print item2.attrib
                fileLocation = item2.get("{http://www.w3.org/1999/xlink}href")
        #print "%s - %s - %s " % (checksumType, checksum, fileLocation)
        fileFullPath = os.path.join(relativeDirectory, fileLocation)
        if checksumType == "MD5":
            checksum2 = md5_for_file(fileFullPath)
            eventDetail = "program=\"python\"; module=\"hashlib.sha256()\""
        elif checksumType == "sha256":
            checksum2 = sha_for_file(fileFullPath)
            eventDetail = "program=\"python\"; module=\"hashlib.md5()\""
        else:
            print >>sys.stderr, "Unsupported checksum type: %s" % (checksumType.__str__())
            exit(300)


        if checksum != checksum2:
            #eventOutcomeDetailNote = checksumFile.__str__() + " != " + checksumDB.__str__()
            eventOutcome="Fail"
            print "%s - %s - %s" % ((checksum == checksum2).__str__(), checksum.__str__(), checksum2.__str__())
            print >>sys.stderr, eventOutcome,  fileFullPath
            exitCode = exitCode + 22
        else:
            #eventOutcomeDetailNote = checksumFile.__str__() + "verified"
            eventOutcome="Pass"
            print eventOutcome, fileLocation






    return exitCode

    #insertIntoEvents(fileUUID="", eventIdentifierUUID="", eventType="", eventDateTime=databaseInterface.getUTCDate(), eventDetail="", eventOutcome="", eventOutcomeDetailNote="")
    databaseFunctions.insertIntoEvents(fileUUID=fileUUID, \
                 eventIdentifierUUID=eventIdentifierUUID, \
                 eventType="fixity check", \
                 eventDateTime=date, \
                 eventOutcome=eventOutcome, \
                 eventOutcomeDetailNote=eventOutcomeDetailNote, \
                 eventDetail=eventDetail)
def updateSizeAndChecksum(fileUUID, filePath, date, eventIdentifierUUID):
    fileSize = os.path.getsize(filePath)
    checksum = str(sha_for_file(filePath))

    File.objects.filter(uuid=fileUUID).update(size=fileSize, checksum=checksum)

    insertIntoEvents(fileUUID=fileUUID, \
                     eventType="message digest calculation", \
                     eventDateTime=date, \
                     eventDetail="program=\"python\"; module=\"hashlib.sha256()\"", \
                     eventOutcomeDetailNote=checksum)
Example #5
0
def updateSizeAndChecksum(fileUUID, filePath, date, eventIdentifierUUID):
    fileSize = os.path.getsize(filePath).__str__()
    checksum = sha_for_file(filePath).__str__()

    sql = "UPDATE Files " + \
        "SET fileSize='" + fileSize +"', checksum='" + checksum +  "' " + \
        "WHERE fileUUID='" + fileUUID + "'"
    databaseInterface.runSQL(sql)

    insertIntoEvents(fileUUID=fileUUID, \
                     eventIdentifierUUID=eventIdentifierUUID, \
                     eventType="message digest calculation", \
                     eventDateTime=date, \
                     eventDetail="program=\"python\"; module=\"hashlib.sha256()\"", \
                     eventOutcomeDetailNote=checksum)
Example #6
0
def checksumFile(filePath, fileUUID):
    global transferDirectory
    truePath = filePath.replace("transfer/", transferDirectory, 1)
    checksum = sha_for_file(truePath)
    utcDate = databaseInterface.getUTCDate()

    #Create Event
    eventIdentifierUUID = uuid.uuid4().__str__()
    eventType = "message digest calculation"
    eventDateTime = utcDate
    eventDetail = 'program="python"; module="hashlib.sha256()" ; file="/usr/lib/python2.6/hashlib.pyc"'
    eventOutcome = ""
    eventOutcomeDetailNote = checksum.__str__()

    databaseInterface.insertIntoEvents(fileUUID=fileUUID, \
                                       eventIdentifierUUID=eventIdentifierUUID, \
                                       eventType=eventType, \
                                       eventDateTime=eventDateTime, \
                                       eventDetail=eventDetail, \
                                       eventOutcome=eventOutcome, \
                                       eventOutcomeDetailNote=eventOutcomeDetailNote)
def checksumFile(filePath, fileUUID):
    global transferDirectory
    truePath = filePath.replace("transfer/", transferDirectory, 1)
    checksum = sha_for_file(truePath)
    utcDate = databaseInterface.getUTCDate()

    #Create Event
    eventIdentifierUUID = uuid.uuid4().__str__()
    eventType = "message digest calculation"
    eventDateTime = utcDate
    eventDetail = 'program="python"; module="hashlib.sha256()" ; file="/usr/lib/python2.6/hashlib.pyc"'
    eventOutcome = ""
    eventOutcomeDetailNote = checksum.__str__()

    databaseInterface.insertIntoEvents(fileUUID=fileUUID, \
                                       eventIdentifierUUID=eventIdentifierUUID, \
                                       eventType=eventType, \
                                       eventDateTime=eventDateTime, \
                                       eventDetail=eventDetail, \
                                       eventOutcome=eventOutcome, \
                                       eventOutcomeDetailNote=eventOutcomeDetailNote)
def verifyChecksum(fileUUID, filePath, date, eventIdentifierUUID):
    sql = """SELECT checksum FROM Files WHERE fileUUID = '""" + fileUUID + "'"
    c, sqlLock = databaseInterface.querySQL(sql)
    row = c.fetchone()
    checksumDB = ""
    while row != None:
        checksumDB = row[0]
        row = c.fetchone()
    sqlLock.release()
    if checksumDB == None or checksumDB == "" or checksumDB == "None":
        print >> sys.stderr, "No checksum found in database for file:", fileUUID, filePath
        exit(1)
    checksumFile = sha_for_file(filePath)

    eventOutcome = ""
    eventOutcomeDetailNote = ""
    exitCode = 0
    if checksumFile != checksumDB:
        eventOutcomeDetailNote = checksumFile.__str__(
        ) + " != " + checksumDB.__str__()
        eventOutcome = "Fail"
        exitCode = 2
        print >> sys.stderr, "Checksums do not match:", fileUUID, filePath
        print >> sys.stderr, eventOutcomeDetailNote
    else:
        eventOutcomeDetailNote = "%s %s" % (checksumFile.__str__(), "verified")
        eventOutcome = "Pass"
        exitCode = 0

    #insertIntoEvents(fileUUID="", eventIdentifierUUID="", eventType="", eventDateTime=databaseInterface.getUTCDate(), eventDetail="", eventOutcome="", eventOutcomeDetailNote="")
    databaseFunctions.insertIntoEvents(fileUUID=fileUUID, \
                 eventIdentifierUUID=eventIdentifierUUID, \
                 eventType="fixity check", \
                 eventDateTime=date, \
                 eventOutcome=eventOutcome, \
                 eventOutcomeDetailNote=eventOutcomeDetailNote, \
                 eventDetail="program=\"python\"; module=\"hashlib.sha256()\"")

    exit(exitCode)
def verifyChecksum(fileUUID, filePath, date, eventIdentifierUUID):
    sql = """SELECT checksum FROM Files WHERE fileUUID = '""" + fileUUID + "'"
    c, sqlLock = databaseInterface.querySQL(sql)
    row = c.fetchone()
    checksumDB = ""
    while row != None:
        checksumDB = row[0]
        row = c.fetchone()
    sqlLock.release()
    if checksumDB == None or checksumDB == "" or checksumDB == "None":
        print >>sys.stderr, "No checksum found in database for file:", fileUUID, filePath
        exit(1)
    checksumFile = sha_for_file(filePath)

    eventOutcome=""
    eventOutcomeDetailNote=""
    exitCode = 0
    if checksumFile != checksumDB:
        eventOutcomeDetailNote = checksumFile.__str__() + " != " + checksumDB.__str__()
        eventOutcome="Fail"
        exitCode = 2
        print >>sys.stderr, "Checksums do not match:", fileUUID, filePath
        print >>sys.stderr, eventOutcomeDetailNote
    else:
        eventOutcomeDetailNote = "%s %s" % (checksumFile.__str__(), "verified")
        eventOutcome="Pass"
        exitCode = 0

    #insertIntoEvents(fileUUID="", eventIdentifierUUID="", eventType="", eventDateTime=databaseInterface.getUTCDate(), eventDetail="", eventOutcome="", eventOutcomeDetailNote="")
    databaseFunctions.insertIntoEvents(fileUUID=fileUUID, \
                 eventIdentifierUUID=eventIdentifierUUID, \
                 eventType="fixity check", \
                 eventDateTime=date, \
                 eventOutcome=eventOutcome, \
                 eventOutcomeDetailNote=eventOutcomeDetailNote, \
                 eventDetail="program=\"python\"; module=\"hashlib.sha256()\"")

    exit(exitCode)
Example #10
0
def main(aip_uuid, aip_name, compression, sip_dir, aip_filename):

    # Prep work
    mets_schema_location = 'http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version18/mets.xsd'
    premis_schema_location = 'info:lc/xmlns/premis-v2 http://www.loc.gov/standards/premis/v2/premis-v2-2.xsd'
    # Datetime format string from http://docs.python.org/2/library/datetime.html
    # %Y = 4 digit year, %m = 2 digit month, %d = 2 digit day
    # %H = 24-hour hour, %M = 2-digit minute, %S = 2 digit second
    now = timezone.now().strftime("%Y-%m-%dT%H:%M:%S")
    aip_identifier = aip_name + '-' + aip_uuid
    aip_path = os.path.join(sip_dir, aip_filename)
    # Get archive tool and version
    program, algorithm = compression.split('-')

    # Pointer files are not written for uncompressed AIPs;
    # the purpose of the pointer file is primarily to provide information
    # on how to read a compressed AIP file, so there isn't anything for
    # it to do when pointing at an uncompressed AIP.
    if program == 'None':
        return 0

    if program == '7z':
        archive_tool = '7-Zip'
        archive_tool_version = '9.20'  # TODO get this dynamically
    elif program == 'pbzip2':
        archive_tool = program
        archive_tool_version = '1.1.6'  # TODO get this dynamically
    # Format / file extension
    _, extension = os.path.splitext(aip_filename)
    # PRONOM ID and PRONOM name for each file extension
    pronom_conversion = {
        '.7z': {
            'puid': 'fmt/484',
            'name': '7Zip format'
        },
        '.bz2': {
            'puid': 'x-fmt/268',
            'name': 'BZIP2 Compressed Archive'
        },
    }
    num_files = 1
    # Get size
    try:
        aip_size = os.path.getsize(aip_path)
    except os.error:
        print >> sys.stderr, "File {} does not exist or is inaccessible.  Aborting.".format(
            aip_path)
        return -1
    # Calculate checksum
    checksum_algorithm = 'sha256'
    checksum = checksummingTools.sha_for_file(aip_path)
    # Get package type (AIP, AIC)
    sip_metadata_uuid = '3e48343d-e2d2-4956-aaa3-b54d26eb9761'

    try:
        dc = DublinCore.objects.get(metadataappliestotype_id=sip_metadata_uuid,
                                    metadataappliestoidentifier=aip_uuid)
    except DublinCore.DoesNotExist:
        package_type = "Archival Information Package"
    else:
        package_type = dc.type

    # Namespaces
    nsmap = {
        # Default, unprefixed namespace
        None: namespaces.metsNS,
        'xsi': namespaces.xsiNS,
        'xlink': namespaces.xlinkNS,
    }
    # Set up structure
    E = ElementMaker(namespace=namespaces.metsNS, nsmap=nsmap)
    E_P = ElementMaker(namespace=namespaces.premisNS,
                       nsmap={None: namespaces.premisNS})

    root = (
        E.mets(
            E.metsHdr(CREATEDATE=now),
            # amdSec goes here
            E.fileSec(E.fileGrp(USE='Archival Information Package'), ),
            E.structMap(TYPE='physical'),
        ))
    # Namespaced attributes have to be added separately - don't know how to do
    # inline with E
    root.attrib[namespaces.xsiBNS + 'schemaLocation'] = mets_schema_location

    add_amdsec_after = root.find('mets:metsHdr', namespaces=namespaces.NSMAP)
    filegrp = root.find('.//mets:fileGrp', namespaces=namespaces.NSMAP)
    structmap = root.find('.//mets:structMap', namespaces=namespaces.NSMAP)
    # For each file, add amdSec, file, fptr
    for admin_id in range(1, num_files + 1):

        # amdSec
        amdsec_id = 'amdSec_{}'.format(admin_id)
        amdsec = E.amdSec(
            E.techMD(
                E.mdWrap(
                    E.xmlData(),
                    MDTYPE='PREMIS:OBJECT',  # mdWrap
                ),
                ID='techMD_1',  # techMD
            ),
            ID=amdsec_id,  # amdSec
        )
        # Add PREMIS:OBJECT
        obj = E_P.object(
            E_P.objectIdentifier(
                E_P.objectIdentifierType('UUID'),
                E_P.objectIdentifierValue(aip_uuid),
            ),
            E_P.objectCharacteristics(
                E_P.compositionLevel('1'),
                E_P.fixity(
                    E_P.messageDigestAlgorithm(checksum_algorithm),
                    E_P.messageDigest(checksum),
                ),
                E_P.size(str(aip_size)),
                E_P.format(
                    E_P.formatDesignation(
                        E_P.formatName(pronom_conversion[extension]['name']),
                        E_P.formatVersion(),
                    ),
                    E_P.formatRegistry(
                        E_P.formatRegistryName('PRONOM'),
                        E_P.formatRegistryKey(
                            pronom_conversion[extension]['puid'])),
                ),
                E_P.creatingApplication(
                    E_P.creatingApplicationName(archive_tool),
                    E_P.creatingApplicationVersion(archive_tool_version),
                    E_P.dateCreatedByApplication(now),
                ),
            ),
            version='2.2',
        )
        obj.attrib[namespaces.xsiBNS + 'type'] = 'file'
        obj.attrib[namespaces.xsiBNS +
                   'schemaLocation'] = premis_schema_location

        # Add as child of xmldata
        amdsec.find('.//mets:mdWrap[@MDTYPE="PREMIS:OBJECT"]/mets:xmlData',
                    namespaces=namespaces.NSMAP).append(obj)

        # Add PREMIS:EVENT for compression, use archivematicaCreateMETS2 code
        elements = archivematicaCreateMETS2.createDigiprovMD(aip_uuid)
        for element in elements:
            amdsec.append(element)
        # Add PREMIS:AGENT for Archivematica
        elements = archivematicaCreateMETS2.createDigiprovMDAgents()
        for element in elements:
            amdsec.append(element)

        # add amdSec after previous amdSec (or metsHdr if first one)
        add_amdsec_after.addnext(amdsec)
        add_amdsec_after = amdsec

        # fileGrp
        file_ = E.file(E.FLocat(
            LOCTYPE="OTHER",
            OTHERLOCTYPE="SYSTEM",
        ),
                       ID=aip_identifier)
        filegrp.append(file_)
        flocat = file_.find('mets:FLocat', namespaces=namespaces.NSMAP)
        flocat.attrib['{{{ns}}}href'.format(ns=namespaces.xlinkNS)] = aip_path

        # compression - 7z or tar.bz2
        if extension == '.7z':
            etree.SubElement(file_,
                             "transformFile",
                             TRANSFORMORDER='1',
                             TRANSFORMTYPE='decompression',
                             TRANSFORMALGORITHM=algorithm)
        elif extension == '.bz2':
            etree.SubElement(file_,
                             "transformFile",
                             TRANSFORMORDER='1',
                             TRANSFORMTYPE='decompression',
                             TRANSFORMALGORITHM='bzip2')
            etree.SubElement(file_,
                             "transformFile",
                             TRANSFORMORDER='2',
                             TRANSFORMTYPE='decompression',
                             TRANSFORMALGORITHM='tar')

        # structMap
        div = etree.SubElement(structmap,
                               namespaces.metsBNS + 'div',
                               ADMID=amdsec_id,
                               TYPE=package_type)
        etree.SubElement(div,
                         namespaces.metsBNS + 'fptr',
                         FILEID=aip_identifier)

    print etree.tostring(root, pretty_print=True)

    # Write out pointer.xml
    xml_filename = 'pointer.xml'
    filename = os.path.join(os.path.dirname(aip_path), xml_filename)
    with open(filename, 'w') as f:
        f.write(etree.tostring(root, pretty_print=True))
    fileOperations.addFileToSIP(
        filePathRelativeToSIP='%SIPDirectory%' + xml_filename,
        fileUUID=str(uuid.uuid4()),
        sipUUID=aip_uuid,
        taskUUID=str(uuid.uuid4()),  # Unsure what should go here
        date=now,
        sourceType="aip creation",
    )
    return 0