Beispiel #1
0
def main(job, shared_path, file_uuid, file_path, date, event_uuid):
    try:
        file_ = File.objects.get(uuid=file_uuid)
    except File.DoesNotExist:
        logger.exception("File with UUID %s cannot be found.", file_uuid)
        return 1

    # See if it's a Transfer and in particular a Archivematica AIP transfer.
    # If so, try to extract the size, checksum and checksum function from the
    # original METS document.
    kw = {}
    if (file_.transfer and (not file_.sip)
            and file_.transfer.type == "Archivematica AIP"):
        info = get_file_info_from_mets(job, shared_path, file_)
        kw.update(
            fileSize=info["file_size"],
            checksum=info["checksum"],
            checksumType=info["checksum_type"],
            add_event=False,
        )
        if info.get("derivation"):
            insertIntoDerivations(sourceFileUUID=file_uuid,
                                  derivedFileUUID=info["derivation"])
        if info.get("format_version"):
            FileFormatVersion.objects.create(
                file_uuid_id=file_uuid, format_version=info["format_version"])

    updateSizeAndChecksum(file_uuid, file_path, date, event_uuid, **kw)

    return 0
def assign_uuid(
    job,
    filename,
    extracted_file_original_location,
    package_uuid,
    transfer_uuid,
    date,
    task_uuid,
    sip_directory,
    package_filename,
):
    """Assign a uuid to each file in the extracted package."""
    file_uuid = str(uuid.uuid4())
    # Correct the information in the path strings sent to this function. First
    # remove the SIP directory from the string. Second, make sure that file
    # paths have not been modified for processing purpose, i.e. in
    # Archivematica current terminology, sanitized.
    relative_path = filename.replace(sip_directory, TRANSFER_DIRECTORY, 1)
    relative_package_path = package_filename.replace(sip_directory,
                                                     TRANSFER_DIRECTORY, 1)
    package_detail = "{} ({})".format(relative_package_path, package_uuid)
    event_detail = "Unpacked from: " + package_detail
    addFileToTransfer(
        relative_path,
        file_uuid,
        transfer_uuid,
        task_uuid,
        date,
        sourceType="unpacking",
        eventDetail=event_detail,
        originalLocation=extracted_file_original_location,
    )
    updateSizeAndChecksum(file_uuid, filename, date, str(uuid.uuid4()))
    job.pyprint("Assigning new file UUID:", file_uuid, "to file", filename)
def insert_file_into_database(file_uuid, sip_uuid, event_uuid, rule, output_path, relative_path):
    transcription_uuid = str(uuid4())
    today = timezone.now()
    fileOperations.addFileToSIP(
        relative_path,
        transcription_uuid,
        sip_uuid,
        task_uuid,
        today,
        sourceType="creation",
        use="text/ocr"
    )

    fileOperations.updateSizeAndChecksum(
        transcription_uuid,
        output_path,
        today,
        str(uuid4())
    )

    databaseFunctions.insertIntoDerivations(
        sourceFileUUID=file_uuid,
        derivedFileUUID=transcription_uuid,
        relatedEventUUID=event_uuid
    )
def addFile(filePath, transferPath, transferUUID, date, eventDetail="", fileUUID=uuid.uuid4().__str__()):
    taskUUID = uuid.uuid4().__str__()
    filePathRelativeToSIP = filePath.replace(transferPath, "%transferDirectory%", 1)
    addFileToTransfer(
        filePathRelativeToSIP, fileUUID, transferUUID, taskUUID, date, sourceType="unpacking", eventDetail=eventDetail
    )
    updateSizeAndChecksum(fileUUID, filePath, date, uuid.uuid4.__str__())
def xmlCreateFileAssociationBetween(originalFileFullPath, outputFromNormalizationFileFullPath, SIPFullPath, sipUUID, eventDetailText, eventOutcomeDetailNote, outputFileUUID=""):
    #assign file UUID

    date = databaseInterface.getUTCDate()
    if outputFileUUID == "":
        outputFileUUID = uuid.uuid4().__str__()

    originalFilePathRelativeToSIP = originalFileFullPath.replace(SIPFullPath,"%SIPDirectory%", 1)
    sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string(originalFilePathRelativeToSIP) + "' AND Files.sipUUID = '" + sipUUID + "';"
    print sql
    rows = databaseInterface.queryAllSQL(sql)
    print rows
    fileUUID = rows[0][0]


    filePathRelativeToSIP = outputFromNormalizationFileFullPath.replace(SIPFullPath,"%SIPDirectory%", 1)
    addFileToSIP(filePathRelativeToSIP, outputFileUUID, sipUUID, uuid.uuid4().__str__(), date, sourceType="creation", use="preservation")
    updateSizeAndChecksum(outputFileUUID, outputFromNormalizationFileFullPath, date, uuid.uuid4().__str__())

    taskUUID = uuid.uuid4().__str__()
    insertIntoEvents(fileUUID=fileUUID, \
               eventIdentifierUUID=taskUUID, \
               eventType="normalization", \
               eventDateTime=date, \
               eventDetail=eventDetailText, \
               eventOutcome="", \
               eventOutcomeDetailNote=eventOutcomeDetailNote)

    insertIntoDerivations(sourceFileUUID=fileUUID, derivedFileUUID=outputFileUUID, relatedEventUUID=taskUUID)
def onceNormalized(command, opts, replacementDic):
    transcodedFiles = []
    if not command.outputLocation:
        command.outputLocation = ""
    if os.path.isfile(command.outputLocation):
        transcodedFiles.append(command.outputLocation)
    elif os.path.isdir(command.outputLocation):
        for w in os.walk(command.outputLocation):
            path, directories, files = w
            for p in files:
                p = os.path.join(path, p)
                if os.path.isfile(p):
                    transcodedFiles.append(p)
    elif command.outputLocation:
        print >> sys.stderr, command
        print >> sys.stderr, "Error - output file does not exist [" + command.outputLocation + "]"
        command.exitCode = -2

    derivationEventUUID = uuid.uuid4().__str__()
    eventDetail = ""
    if command.eventDetailCommand != None:
        eventDetail = eventDetail = command.eventDetailCommand.stdOut
    for ef in transcodedFiles:
        if opts["commandClassifications"] == "preservation":
            # Add the new file to the sip
            filePathRelativeToSIP = ef.replace(opts["sipPath"], "%SIPDirectory%", 1)
            # addFileToSIP(filePathRelativeToSIP, fileUUID, sipUUID, taskUUID, date, sourceType="ingestion"):
            addFileToSIP(
                filePathRelativeToSIP,
                replacementDic["%outputFileUUID%"],
                opts["sipUUID"],
                uuid.uuid4().__str__(),
                opts["date"],
                sourceType="creation",
                use="preservation",
            )
            # Calculate new file checksum
            # Add event information to current file
            insertIntoEvents(
                fileUUID=opts["fileUUID"],
                eventIdentifierUUID=derivationEventUUID,
                eventType="normalization",
                eventDateTime=opts["date"],
                eventDetail=eventDetail,
                eventOutcome="",
                eventOutcomeDetailNote=filePathRelativeToSIP,
            )

            updateSizeAndChecksum(replacementDic["%outputFileUUID%"], ef, opts["date"], uuid.uuid4().__str__())

            # Add linking information between files
            insertIntoDerivations(
                sourceFileUUID=opts["fileUUID"],
                derivedFileUUID=replacementDic["%outputFileUUID%"],
                relatedEventUUID=derivationEventUUID,
            )

            replacementDic["%outputFileUUID%"] = uuid.uuid4().__str__()
            replacementDic["%postfix%"] = "-" + replacementDic["%outputFileUUID%"]
def onceNormalized(command):
    transcodedFiles = []
    if not command.outputLocation:
        command.outputLocation = ""
    elif os.path.isfile(command.outputLocation):
        transcodedFiles.append(command.outputLocation)
    elif os.path.isdir(command.outputLocation):
        for w in os.walk(command.outputLocation):
            path, directories, files = w
            for p in files:
                p = os.path.join(path, p)
                if os.path.isfile(p):
                    transcodedFiles.append(p)
    elif command.outputLocation:
        print >>sys.stderr, command
        print >>sys.stderr, "Error - output file does not exist [" + command.outputLocation + "]"
        command.exitCode = -2

    derivationEventUUID = uuid.uuid4().__str__()
    for ef in transcodedFiles:
        global outputFileUUID
        global replacementDic
        global opts
        if opts.commandClassifications == "preservation":
            old = """xmlNormalize(outputFileUUID, \
                     ef, \
                     command.eventDetailCommand.stdOut, \
                     opts.fileUUID, \
                     opts.objectsDirectory, \
                     opts.taskUUID, \
                     opts.date, \
                     opts.logsDirectory, \
                     ) #    {normalized; not normalized}"""

            #Add the new file to the sip
            filePathRelativeToSIP = ef.replace(opts.sipPath, "%SIPDirectory%", 1)
            # addFileToSIP(filePathRelativeToSIP, fileUUID, sipUUID, taskUUID, date, sourceType="ingestion"):
            addFileToSIP(filePathRelativeToSIP, outputFileUUID, opts.sipUUID, uuid.uuid4().__str__(), opts.date, sourceType="creation", use="preservation")
            #Calculate new file checksum
            print >>sys.stderr, "TODO: calculate new file checksum"
            #Add event information to current file
            insertIntoEvents(fileUUID=opts.fileUUID, \
               eventIdentifierUUID=derivationEventUUID, \
               eventType="normalization", \
               eventDateTime=opts.date, \
               eventDetail=command.eventDetailCommand.stdOut, \
               eventOutcome="", \
               eventOutcomeDetailNote=filePathRelativeToSIP)

            updateSizeAndChecksum(outputFileUUID, ef, opts.date, uuid.uuid4().__str__())

            #Add linking information between files
            insertIntoDerivations(sourceFileUUID=opts.fileUUID, derivedFileUUID=outputFileUUID, relatedEventUUID=derivationEventUUID)

            outputFileUUID = uuid.uuid4().__str__()
            replacementDic["%postfix%"] = "-" + outputFileUUID
def preservation():
    for file in files:
        
        #create an entry for the file
        fileUUID = uuid.uuid4().__str__()
        addFileToSIP(filePathRelativeToSIP, fileUUID, opts.sipUUID, opts.eventIdentifierUUID, opts.date, use=opts.use)
        updateSizeAndChecksum(opts.fileUUID, \
                     opts.filePath, \
                     opts.date, \
                     opts.eventIdentifierUUID)
def onceExtracted(command):
    extractedFiles = []
    print "TODO - Metadata regarding removal of extracted archive"
    if removeOnceExtracted:
        packageFileUUID = sys.argv[6].__str__()
        sipDirectory = sys.argv[2].__str__()
        os.remove(replacementDic["%inputFile%"])
        currentLocation =  replacementDic["%inputFile%"].replace(sipDirectory, "%transferDirectory%", 1)
        fileWasRemoved(packageFileUUID, eventOutcomeDetailNote = "removed from: " + currentLocation)

    print "OUTPUT DIRECTORY: ", replacementDic["%outputDirectory%"]
    for w in os.walk(replacementDic["%outputDirectory%"].replace("*", "asterisk*")):
        path, directories, files = w
        for p in files:
            p = os.path.join(path, p)
            #print "path: ", p
            if os.path.isfile(p):
                extractedFiles.append(p)
    for ef in extractedFiles:
        fileUUID = uuid.uuid4().__str__()
        #print "File Extracted:", ef
        if True: #Add the file to the SIP
            #<arguments>"%relativeLocation%" "%SIPObjectsDirectory%" "%SIPLogsDirectory%" "%date%" "%taskUUID%" "%fileUUID%"</arguments>
            sipDirectory = sys.argv[2].__str__()
            transferUUID = sys.argv[3].__str__()
            date = sys.argv[4].__str__()
            taskUUID = sys.argv[5].__str__()
            packageFileUUID = sys.argv[6].__str__()

            filePathRelativeToSIP = ef.replace(sipDirectory,"%transferDirectory%", 1)
            print "File Extracted:: {" + fileUUID + "} ", filePathRelativeToSIP
            eventDetail="Unpacked from: {" + packageFileUUID + "}" + filePathRelativeToSIP
            addFileToTransfer(filePathRelativeToSIP, fileUUID, transferUUID, taskUUID, date, sourceType="unpacking", eventDetail=eventDetail)
            updateSizeAndChecksum(fileUUID, ef, date, uuid.uuid4.__str__())


        run = sys.argv[0].__str__() + \
        " \"" + transcoder.escapeForCommand(ef) + "\""
        if True: #Add the file to the SIP
            run = run + " \"" + transcoder.escapeForCommand(sys.argv[2].__str__()) + "\"" + \
            " \"" + transcoder.escapeForCommand(sys.argv[3].__str__()) + "\"" + \
            " \"" + transcoder.escapeForCommand(sys.argv[4].__str__()) + "\"" + \
            " \"" + transcoder.escapeForCommand(sys.argv[5].__str__()) + "\"" + \
            " \"" + fileUUID + "\""

        exitCode, stdOut, stdError = executeOrRun("command", run)
        print stdOut
        print >>sys.stderr, stdError
        if exitCode != 0 and command.exitCode == 0:
            command.exitCode = exitCode

    global extractedCount
    date = sys.argv[4].__str__().split(".", 1)[0]
    extractedCount = extractedCount + 1
    replacementDic["%outputDirectory%"] = transcoder.fileFullName + '-' + extractedCount.__str__() + '-' + date
def onceExtracted(command):
    extractedFiles = []
    print "TODO - Metadata regarding removal of extracted archive"
    if removeOnceExtracted:
        packageFileUUID = sys.argv[6].__str__()
        sipDirectory = sys.argv[2].__str__()
        os.remove(replacementDic["%inputFile%"])
        currentLocation =  replacementDic["%inputFile%"].replace(sipDirectory, "%transferDirectory%", 1)
        fileWasRemoved(packageFileUUID, eventOutcomeDetailNote = "removed from: " + currentLocation)

    print "OUTPUT DIRECTORY: ", replacementDic["%outputDirectory%"]
    for w in os.walk(replacementDic["%outputDirectory%"].replace("*", "asterisk*")):
        path, directories, files = w
        for p in files:
            p = os.path.join(path, p)
            #print "path: ", p
            if os.path.isfile(p):
                extractedFiles.append(p)
    for ef in extractedFiles:
        fileUUID = uuid.uuid4().__str__()
        #print "File Extracted:", ef
        if True: #Add the file to the SIP
            #<arguments>"%relativeLocation%" "%SIPObjectsDirectory%" "%SIPLogsDirectory%" "%date%" "%taskUUID%" "%fileUUID%"</arguments>
            sipDirectory = sys.argv[2].__str__()
            transferUUID = sys.argv[3].__str__()
            date = sys.argv[4].__str__()
            taskUUID = sys.argv[5].__str__()
            packageFileUUID = sys.argv[6].__str__()

            filePathRelativeToSIP = ef.replace(sipDirectory,"%transferDirectory%", 1)
            print "File Extracted:: {" + fileUUID + "} ", filePathRelativeToSIP
            eventDetail="Unpacked from: {" + packageFileUUID + "}" + filePathRelativeToSIP
            addFileToTransfer(filePathRelativeToSIP, fileUUID, transferUUID, taskUUID, date, sourceType="unpacking", eventDetail=eventDetail)
            updateSizeAndChecksum(fileUUID, ef, date, uuid.uuid4.__str__())


        run = sys.argv[0].__str__() + \
        " \"" + transcoder.escapeForCommand(ef) + "\""
        if True: #Add the file to the SIP
            run = run + " \"" + transcoder.escapeForCommand(sys.argv[2].__str__()) + "\"" + \
            " \"" + transcoder.escapeForCommand(sys.argv[3].__str__()) + "\"" + \
            " \"" + transcoder.escapeForCommand(sys.argv[4].__str__()) + "\"" + \
            " \"" + transcoder.escapeForCommand(sys.argv[5].__str__()) + "\"" + \
            " \"" + fileUUID + "\""

        exitCode, stdOut, stdError = executeOrRun("command", run)
        print stdOut
        print >>sys.stderr, stdError
        if exitCode != 0 and command.exitCode == 0:
            command.exitCode = exitCode

    global extractedCount
    date = sys.argv[4].__str__().split(".", 1)[0]
    extractedCount = extractedCount + 1
    replacementDic["%outputDirectory%"] = transcoder.fileFullName + '-' + extractedCount.__str__() + '-' + date
Beispiel #11
0
def assign_uuid(filename, package_uuid, transfer_uuid, date, task_uuid, sip_directory, package_filename):
    file_uuid = uuid.uuid4().__str__()
    relative_path = filename.replace(sip_directory, "%transferDirectory%", 1)
    relative_package_path = package_filename.replace(sip_directory, "%transferDirectory%", 1)
    package_detail = "{} ({})".format(relative_package_path, package_uuid)
    event_detail = "Unpacked from: " + package_detail
    addFileToTransfer(relative_path, file_uuid, transfer_uuid, task_uuid, date,
                      sourceType="unpacking", eventDetail=event_detail)
    updateSizeAndChecksum(file_uuid, filename, date, uuid.uuid4().__str__())

    print('Assigning new file UUID:', file_uuid, 'to file', filename)
def onceNormalized(command, opts, replacementDic):
    transcodedFiles = []
    if not command.outputLocation:
        command.outputLocation = ""
    if os.path.isfile(command.outputLocation):
        transcodedFiles.append(command.outputLocation)
    elif os.path.isdir(command.outputLocation):
        for w in os.walk(command.outputLocation):
            path, directories, files = w
            for p in files:
                p = os.path.join(path, p)
                if os.path.isfile(p):
                    transcodedFiles.append(p)
    elif command.outputLocation:
        print >>sys.stderr, command
        print >>sys.stderr, "Error - output file does not exist [" + command.outputLocation + "]"
        command.exitCode = -2

    derivationEventUUID = uuid.uuid4().__str__()
    eventDetail = "ArchivematicaFPRCommandID=\"%s\"" % (command.pk)
    if command.eventDetailCommand != None:
        eventDetail = '%s; %s' % (eventDetail, command.eventDetailCommand.stdOut)
    for ef in transcodedFiles:
        if opts["commandClassifications"] == "preservation":
            # TODO Add manual normalization for files of same name mapping
            #Add the new file to the sip
            filePathRelativeToSIP = ef.replace(opts["sipPath"], "%SIPDirectory%", 1)
            # addFileToSIP(filePathRelativeToSIP, fileUUID, sipUUID, taskUUID, date, sourceType="ingestion"):
            addFileToSIP(filePathRelativeToSIP, replacementDic["%outputFileUUID%"], opts["sipUUID"], uuid.uuid4().__str__(), opts["date"], sourceType="creation", use="preservation")
            #Calculate new file checksum
            #Add event information to current file
            insertIntoEvents(fileUUID=opts["fileUUID"], \
               eventIdentifierUUID=derivationEventUUID, \
               eventType="normalization", \
               eventDateTime=opts["date"], \
               eventDetail=eventDetail, \
               eventOutcome="", \
               eventOutcomeDetailNote=filePathRelativeToSIP)

            updateSizeAndChecksum(replacementDic["%outputFileUUID%"], ef, opts["date"], uuid.uuid4().__str__())

            #Add linking information between files
            insertIntoDerivations(sourceFileUUID=opts["fileUUID"], derivedFileUUID=replacementDic["%outputFileUUID%"], relatedEventUUID=derivationEventUUID)

            sql = "INSERT INTO FilesIDs (fileUUID, formatName, formatVersion, formatRegistryName, formatRegistryKey) VALUES ('%s', '%s', NULL, NULL, NULL);" % (replacementDic["%outputFileUUID%"], command.outputFormat)
            databaseInterface.runSQL(sql)
            
            replacementDic["%outputFileUUID%"] = uuid.uuid4().__str__()
            replacementDic["%postfix%"] = "-" + replacementDic["%outputFileUUID%"]
Beispiel #13
0
def preservation():
    for file in files:

        #create an entry for the file
        fileUUID = uuid.uuid4().__str__()
        addFileToSIP(filePathRelativeToSIP,
                     fileUUID,
                     opts.sipUUID,
                     opts.eventIdentifierUUID,
                     opts.date,
                     use=opts.use)
        updateSizeAndChecksum(opts.fileUUID, \
                     opts.filePath, \
                     opts.date, \
                     opts.eventIdentifierUUID)
Beispiel #14
0
def xmlCreateFileAssociationBetween(originalFileFullPath,
                                    outputFromNormalizationFileFullPath,
                                    SIPFullPath,
                                    sipUUID,
                                    eventDetailText,
                                    eventOutcomeDetailNote,
                                    outputFileUUID=""):
    #assign file UUID

    date = databaseInterface.getUTCDate()
    if outputFileUUID == "":
        outputFileUUID = uuid.uuid4().__str__()

    originalFilePathRelativeToSIP = originalFileFullPath.replace(
        SIPFullPath, "%SIPDirectory%", 1)
    sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string(
        originalFilePathRelativeToSIP
    ) + "' AND Files.sipUUID = '" + sipUUID + "';"
    print sql
    rows = databaseInterface.queryAllSQL(sql)
    print rows
    fileUUID = rows[0][0]

    filePathRelativeToSIP = outputFromNormalizationFileFullPath.replace(
        SIPFullPath, "%SIPDirectory%", 1)
    addFileToSIP(filePathRelativeToSIP,
                 outputFileUUID,
                 sipUUID,
                 uuid.uuid4().__str__(),
                 date,
                 sourceType="creation",
                 use="preservation")
    updateSizeAndChecksum(outputFileUUID, outputFromNormalizationFileFullPath,
                          date,
                          uuid.uuid4().__str__())

    taskUUID = uuid.uuid4().__str__()
    insertIntoEvents(fileUUID=fileUUID, \
               eventIdentifierUUID=taskUUID, \
               eventType="normalization", \
               eventDateTime=date, \
               eventDetail=eventDetailText, \
               eventOutcome="", \
               eventOutcomeDetailNote=eventOutcomeDetailNote)

    insertIntoDerivations(sourceFileUUID=fileUUID,
                          derivedFileUUID=outputFileUUID,
                          relatedEventUUID=taskUUID)
def addFile(filePath,
            transferPath,
            transferUUID,
            date,
            eventDetail="",
            fileUUID=uuid.uuid4().__str__()):
    taskUUID = uuid.uuid4().__str__()
    filePathRelativeToSIP = filePath.replace(transferPath,
                                             "%transferDirectory%", 1)
    addFileToTransfer(filePathRelativeToSIP,
                      fileUUID,
                      transferUUID,
                      taskUUID,
                      date,
                      sourceType="unpacking",
                      eventDetail=eventDetail)
    updateSizeAndChecksum(fileUUID, filePath, date, uuid.uuid4.__str__())
# (at your option) any later version.
#
# Archivematica is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Archivematica.  If not, see <http://www.gnu.org/licenses/>.

# @package Archivematica
# @subpackage archivematicaClientScript
# @author Joseph Perry <*****@*****.**>
import sys
from optparse import OptionParser
sys.path.append("/usr/lib/archivematica/archivematicaCommon")
from fileOperations import updateSizeAndChecksum

if __name__ == '__main__':
    parser = OptionParser()
    parser.add_option("-i",  "--fileUUID",          action="store", dest="fileUUID", default="")
    parser.add_option("-p",  "--filePath",          action="store", dest="filePath", default="")
    parser.add_option("-d",  "--date",              action="store", dest="date", default="")
    parser.add_option("-u",  "--eventIdentifierUUID", action="store", dest="eventIdentifierUUID", default="")
    (opts, args) = parser.parse_args()

    updateSizeAndChecksum(opts.fileUUID, \
                     opts.filePath, \
                     opts.date, \
                     opts.eventIdentifierUUID)
def onceNormalized(command):
    transcodedFiles = []
    if not command.outputLocation:
        command.outputLocation = ""
    elif os.path.isfile(command.outputLocation):
        transcodedFiles.append(command.outputLocation)
    elif os.path.isdir(command.outputLocation):
        for w in os.walk(command.outputLocation):
            path, directories, files = w
            for p in files:
                p = os.path.join(path, p)
                if os.path.isfile(p):
                    transcodedFiles.append(p)
    elif command.outputLocation:
        print >> sys.stderr, command
        print >> sys.stderr, "Error - output file does not exist [" + command.outputLocation + "]"
        command.exitCode = -2

    derivationEventUUID = uuid.uuid4().__str__()
    for ef in transcodedFiles:
        global outputFileUUID
        global replacementDic
        global opts
        if opts.commandClassifications == "preservation":
            old = """xmlNormalize(outputFileUUID, \
                     ef, \
                     command.eventDetailCommand.stdOut, \
                     opts.fileUUID, \
                     opts.objectsDirectory, \
                     opts.taskUUID, \
                     opts.date, \
                     opts.logsDirectory, \
                     ) #    {normalized; not normalized}"""

            #Add the new file to the sip
            filePathRelativeToSIP = ef.replace(opts.sipPath, "%SIPDirectory%",
                                               1)
            # addFileToSIP(filePathRelativeToSIP, fileUUID, sipUUID, taskUUID, date, sourceType="ingestion"):
            addFileToSIP(filePathRelativeToSIP,
                         outputFileUUID,
                         opts.sipUUID,
                         uuid.uuid4().__str__(),
                         opts.date,
                         sourceType="creation",
                         use="preservation")
            #Calculate new file checksum
            print >> sys.stderr, "TODO: calculate new file checksum"
            #Add event information to current file
            insertIntoEvents(fileUUID=opts.fileUUID, \
               eventIdentifierUUID=derivationEventUUID, \
               eventType="normalization", \
               eventDateTime=opts.date, \
               eventDetail=command.eventDetailCommand.stdOut, \
               eventOutcome="", \
               eventOutcomeDetailNote=filePathRelativeToSIP)

            updateSizeAndChecksum(outputFileUUID, ef, opts.date,
                                  uuid.uuid4().__str__())

            #Add linking information between files
            insertIntoDerivations(sourceFileUUID=opts.fileUUID,
                                  derivedFileUUID=outputFileUUID,
                                  relatedEventUUID=derivationEventUUID)

            outputFileUUID = uuid.uuid4().__str__()
            replacementDic["%postfix%"] = "-" + outputFileUUID
Beispiel #18
0
def once_normalized(job, command, opts, replacement_dict):
    """ Updates the database if normalization completed successfully.

    Callback from transcoder.Command

    For preservation files, adds a normalization event, and derivation, as well
    as updating the size and checksum for the new file in the DB.  Adds format
    information for use in the METS file to FilesIDs.
    """
    transcoded_files = []
    if not command.output_location:
        command.output_location = ""
    if os.path.isfile(command.output_location):
        transcoded_files.append(command.output_location)
    elif os.path.isdir(command.output_location):
        for w in os.walk(command.output_location):
            path, _, files = w
            for p in files:
                p = os.path.join(path, p)
                if os.path.isfile(p):
                    transcoded_files.append(p)
    elif command.output_location:
        job.print_error("Error - output file does not exist [",
                        command.output_location, "]")
        command.exit_code = -2

    derivation_event_uuid = str(uuid.uuid4())
    event_detail_output = 'ArchivematicaFPRCommandID="{}"'.format(
        command.fpcommand.uuid)
    if command.event_detail_command is not None:
        event_detail_output += '; {}'.format(
            command.event_detail_command.std_out)
    for ef in transcoded_files:
        if "thumbnails" in opts.purpose:
            continue

        today = timezone.now()
        output_file_uuid = opts.task_uuid  # Match the UUID on disk
        # TODO Add manual normalization for files of same name mapping?
        # Add the new file to the SIP
        path_relative_to_sip = ef.replace(opts.sip_path, "%SIPDirectory%", 1)
        fileOperations.addFileToSIP(
            path_relative_to_sip,
            output_file_uuid,  # File UUID
            opts.sip_uuid,  # SIP UUID
            opts.task_uuid,  # Task UUID
            today,  # Current date
            sourceType="creation",
            use=opts.purpose,
        )

        # Calculate new file checksum
        fileOperations.updateSizeAndChecksum(
            output_file_uuid,  # File UUID, same as task UUID for preservation
            ef,  # File path
            today,  # Date
            str(uuid.uuid4()),  # Event UUID, new UUID
        )

        # Add derivation link and associated event
        #
        # Track both events and insert into Derivations table for
        # preservation copies
        if "preservation" in opts.purpose:
            insert_derivation_event(
                original_uuid=opts.file_uuid,
                output_uuid=output_file_uuid,
                derivation_uuid=derivation_event_uuid,
                event_detail_output=event_detail_output,
                outcome_detail_note=path_relative_to_sip,
                today=today,
            )
        # Other derivatives go into the Derivations table, but
        # don't get added to the PREMIS Events because they will
        # not appear in the METS.
        else:
            d = Derivation(source_file_id=opts.file_uuid,
                           derived_file_id=output_file_uuid,
                           event=None)
            d.save()

        # Use the format info from the normalization command
        # to save identification into the DB
        ffv = FileFormatVersion(file_uuid_id=output_file_uuid,
                                format_version=command.fpcommand.output_format)
        ffv.save()

        FileID.objects.create(
            file_id=output_file_uuid,
            format_name=command.fpcommand.output_format.format.description)
from custom_handlers import get_script_logger
from fileOperations import updateSizeAndChecksum

if __name__ == '__main__':
    logger = get_script_logger(
        "archivematica.mcp.client.updateSizeAndChecksum")

    parser = OptionParser()
    parser.add_option("-i",
                      "--fileUUID",
                      action="store",
                      dest="fileUUID",
                      default="")
    parser.add_option("-p",
                      "--filePath",
                      action="store",
                      dest="filePath",
                      default="")
    parser.add_option("-d", "--date", action="store", dest="date", default="")
    parser.add_option("-u",
                      "--eventIdentifierUUID",
                      action="store",
                      dest="eventIdentifierUUID",
                      default="")
    (opts, args) = parser.parse_args()

    updateSizeAndChecksum(opts.fileUUID, \
                     opts.filePath, \
                     opts.date, \
                     opts.eventIdentifierUUID)
def onceNormalized(command, opts, replacementDic):
    transcodedFiles = []
    if not command.outputLocation:
        command.outputLocation = ""
    if os.path.isfile(command.outputLocation):
        transcodedFiles.append(command.outputLocation)
    elif os.path.isdir(command.outputLocation):
        for w in os.walk(command.outputLocation):
            path, directories, files = w
            for p in files:
                p = os.path.join(path, p)
                if os.path.isfile(p):
                    transcodedFiles.append(p)
    elif command.outputLocation:
        print >> sys.stderr, command
        print >> sys.stderr, "Error - output file does not exist [" + command.outputLocation + "]"
        command.exitCode = -2

    derivationEventUUID = uuid.uuid4().__str__()
    eventDetail = "ArchivematicaFPRCommandID=\"%s\"" % (command.pk)
    if command.eventDetailCommand != None:
        eventDetail = '%s; %s' % (eventDetail,
                                  command.eventDetailCommand.stdOut)
    for ef in transcodedFiles:
        if opts["commandClassifications"] == "preservation":
            # TODO Add manual normalization for files of same name mapping
            #Add the new file to the sip
            filePathRelativeToSIP = ef.replace(opts["sipPath"],
                                               "%SIPDirectory%", 1)
            # addFileToSIP(filePathRelativeToSIP, fileUUID, sipUUID, taskUUID, date, sourceType="ingestion"):
            addFileToSIP(filePathRelativeToSIP,
                         replacementDic["%outputFileUUID%"],
                         opts["sipUUID"],
                         uuid.uuid4().__str__(),
                         opts["date"],
                         sourceType="creation",
                         use="preservation")
            #Calculate new file checksum
            #Add event information to current file
            insertIntoEvents(fileUUID=opts["fileUUID"], \
               eventIdentifierUUID=derivationEventUUID, \
               eventType="normalization", \
               eventDateTime=opts["date"], \
               eventDetail=eventDetail, \
               eventOutcome="", \
               eventOutcomeDetailNote=filePathRelativeToSIP)

            updateSizeAndChecksum(replacementDic["%outputFileUUID%"], ef,
                                  opts["date"],
                                  uuid.uuid4().__str__())

            #Add linking information between files
            insertIntoDerivations(
                sourceFileUUID=opts["fileUUID"],
                derivedFileUUID=replacementDic["%outputFileUUID%"],
                relatedEventUUID=derivationEventUUID)

            sql = "INSERT INTO FilesIDs (fileUUID, formatName, formatVersion, formatRegistryName, formatRegistryKey) VALUES ('%s', '%s', NULL, NULL, NULL);" % (
                replacementDic["%outputFileUUID%"], command.outputFormat)
            databaseInterface.runSQL(sql)

            replacementDic["%outputFileUUID%"] = uuid.uuid4().__str__()
            replacementDic[
                "%postfix%"] = "-" + replacementDic["%outputFileUUID%"]