Example #1
0
def addFileToTransfer(
    filePathRelativeToSIP,
    fileUUID,
    transferUUID,
    taskUUID,
    date,
    sourceType="ingestion",
    eventDetail="",
    use="original",
    originalLocation=None,
):
    if not originalLocation:
        originalLocation = filePathRelativeToSIP
    file_obj = insertIntoFiles(
        fileUUID,
        filePathRelativeToSIP,
        date,
        transferUUID=transferUUID,
        use=use,
        originalLocation=originalLocation,
    )
    insertIntoEvents(
        fileUUID=fileUUID,
        eventType=sourceType,
        eventDateTime=date,
        eventDetail=eventDetail,
        eventOutcome="",
        eventOutcomeDetailNote="",
    )
    addAccessionEvent(fileUUID, transferUUID, date)
    return file_obj
def create_db_entries(job, mapping, dataverse_agent_id):
    """
    Create event and derivatives entries for the derived tabular data in the
    database.
    """
    for entry, file_entry in mapping.items():
        if entry.derived_from and entry.use == 'derivative':
            original_uuid = mapping[entry.derived_from].uuid
            event_uuid = uuid.uuid4()
            # Add event
            databaseFunctions.insertIntoEvents(
                original_uuid,
                eventIdentifierUUID=event_uuid,
                eventType="derivation",
                eventDateTime=None,  # From Dataverse?
                eventDetail="",  # From Dataverse?
                eventOutcome="",  # From Dataverse?
                eventOutcomeDetailNote=file_entry.currentlocation,
                agents=[dataverse_agent_id],
            )
            # Add derivation
            databaseFunctions.insertIntoDerivations(
                sourceFileUUID=original_uuid,
                derivedFileUUID=file_entry.uuid,
                relatedEventUUID=event_uuid,
            )
            job.pyprint(
                'Added derivation from', original_uuid, 'to', file_entry.uuid)
def includeFits(fits, xmlFile, date, eventUUID, fileUUID):
    global exitCode
    #TO DO... Gleam the event outcome information from the output

    #</CREATE formatIdentificationFITSAssist EVENTS>
    eventDetailText, eventOutcomeText, eventOutcomeDetailNotes = formatIdentificationFITSAssist(fits, fileUUID)

    for eventOutcomeDetailNote in eventOutcomeDetailNotes:
        insertIntoEvents(fileUUID=fileUUID, \
                         eventIdentifierUUID=uuid.uuid4().__str__(), \
                         eventType="format identification", \
                         eventDateTime=date, \
                         eventDetail=eventDetailText, \
                         eventOutcome=eventOutcomeText, \
                         eventOutcomeDetailNote=eventOutcomeDetailNote)

    #</CREATE formatIdentificationFITSAssist EVENTS>
    try:
        eventDetailText, eventOutcomeText, eventOutcomeDetailNote = formatValidationFITSAssist(fits)
    except:
        eventDetailText = "Failed"
        eventOutcomeText = "Failed"
        eventOutcomeDetailNote = "Failed"
        exitCode += 3
    insertIntoEvents(fileUUID=fileUUID, \
                     eventIdentifierUUID=uuid.uuid4().__str__(), \
                     eventType="validation", \
                     eventDateTime=date, \
                     eventDetail=eventDetailText, \
                     eventOutcome=eventOutcomeText, \
                     eventOutcomeDetailNote=eventOutcomeDetailNote)
Example #4
0
def includeFits(fits, xmlFile, date, eventUUID, fileUUID):
    global exitCode
    #TO DO... Gleam the event outcome information from the output

    #</CREATE formatIdentificationFITSAssist EVENTS>
    eventDetailText, eventOutcomeText, eventOutcomeDetailNotes = formatIdentificationFITSAssist(
        fits, fileUUID)

    for eventOutcomeDetailNote in eventOutcomeDetailNotes:
        insertIntoEvents(fileUUID=fileUUID, \
                         eventIdentifierUUID=uuid.uuid4().__str__(), \
                         eventType="format identification", \
                         eventDateTime=date, \
                         eventDetail=eventDetailText, \
                         eventOutcome=eventOutcomeText, \
                         eventOutcomeDetailNote=eventOutcomeDetailNote)

    #</CREATE formatIdentificationFITSAssist EVENTS>
    try:
        eventDetailText, eventOutcomeText, eventOutcomeDetailNote = formatValidationFITSAssist(
            fits)
    except:
        eventDetailText = "Failed"
        eventOutcomeText = "Failed"
        eventOutcomeDetailNote = "Failed"
        exitCode += 3
    insertIntoEvents(fileUUID=fileUUID, \
                     eventIdentifierUUID=uuid.uuid4().__str__(), \
                     eventType="validation", \
                     eventDateTime=date, \
                     eventDetail=eventDetailText, \
                     eventOutcome=eventOutcomeText, \
                     eventOutcomeDetailNote=eventOutcomeDetailNote)
Example #5
0
def verifyChecksum(fileUUID, filePath, date, eventIdentifierUUID):
    f = File.objects.get(uuid=fileUUID)

    if f.checksum in ("", "None"):
        print >> sys.stderr, "No checksum found in database for file:", fileUUID, filePath
        exit(1)
    checksumFile = sha_for_file(filePath)

    eventOutcome = ""
    eventOutcomeDetailNote = ""
    exitCode = 0
    if checksumFile != f.checksum:
        eventOutcomeDetailNote = str(checksumFile) + " != " + f.checksum
        eventOutcome = "Fail"
        exitCode = 2
        print >> sys.stderr, "Checksums do not match:", fileUUID, filePath
        print >> sys.stderr, eventOutcomeDetailNote
    else:
        eventOutcomeDetailNote = "%s %s" % (str(checksumFile), "verified")
        eventOutcome = "Pass"
        exitCode = 0

    databaseFunctions.insertIntoEvents(fileUUID=fileUUID, \
                 eventIdentifierUUID=eventIdentifierUUID, \
                 eventType="fixity check", \
                 eventDateTime=date, \
                 eventOutcome=eventOutcome, \
                 eventOutcomeDetailNote=eventOutcomeDetailNote, \
                 eventDetail="program=\"python\"; module=\"hashlib.sha256()\"")

    exit(exitCode)
Example #6
0
def create_db_entries(job, mapping, dataverse_agent_id):
    """
    Create derivation event and derivative entries for the tabular bundle data
    in the transfer.
    """
    for entry, file_entry in mapping.items():
        if entry.derived_from and entry.use == "derivative":
            original_uuid = mapping[entry.derived_from].uuid
            event_uuid = uuid.uuid4()
            try:
                databaseFunctions.insertIntoEvents(
                    original_uuid,
                    eventIdentifierUUID=event_uuid,
                    eventType="derivation",
                    eventDateTime=None,
                    eventDetail="",
                    eventOutcome="",
                    eventOutcomeDetailNote=file_entry.currentlocation,
                    agents=[dataverse_agent_id],
                )
                # Add derivation
                databaseFunctions.insertIntoDerivations(
                    sourceFileUUID=original_uuid,
                    derivedFileUUID=file_entry.uuid,
                    relatedEventUUID=event_uuid,
                )
                job.pyprint(
                    "Added derivation from", original_uuid, "to", file_entry.uuid
                )
            except django.db.IntegrityError:
                err_log = "Database integrity error, entry: {} for file {}".format(
                    file_entry.currentlocation, file_entry.originallocation
                )
                raise ParseDataverseError(err_log)
Example #7
0
def insert_derivation_event(original_uuid,
                            output_uuid,
                            derivation_uuid,
                            event_detail_output,
                            outcome_detail_note,
                            today=None):
    """ Add the derivation link for preservation files and the event. """
    if today is None:
        today = timezone.now()
    # Add event information to current file
    databaseFunctions.insertIntoEvents(
        fileUUID=original_uuid,
        eventIdentifierUUID=derivation_uuid,
        eventType="normalization",
        eventDateTime=today,
        eventDetail=event_detail_output,
        eventOutcome="",
        eventOutcomeDetailNote=outcome_detail_note or "",
    )

    # Add linking information between files
    databaseFunctions.insertIntoDerivations(
        sourceFileUUID=original_uuid,
        derivedFileUUID=output_uuid,
        relatedEventUUID=derivation_uuid,
    )
Example #8
0
def _record_backlog_event(transfer_id, transfer_path, created_at):
    """Record backlog event in both the database and the transfer METS."""
    mets_path = os.path.join(
        transfer_path, "metadata", "submissionDocumentation", "METS.xml"
    )
    mets = metsrw.METSDocument().fromfile(mets_path)

    # Run all_files once, convert into a dict for faster lookups.
    fsentries = {entry.file_uuid: entry for entry in mets.all_files()}

    # Assuming the same agents apply to all files.
    agents = _transfer_agents(transfer_id)

    for file_obj in File.objects.filter(transfer_id=transfer_id).iterator():
        try:
            fsentry = fsentries[file_obj.uuid]
        except KeyError:
            continue
        event_id, event_type = str(uuid.uuid4()), "placement in backlog"
        fsentry.add_premis_event(
            _premis_event_data(event_id, event_type, created_at, agents)
        )
        insertIntoEvents(
            fileUUID=file_obj.uuid,
            eventIdentifierUUID=event_id,
            eventType=event_type,
            eventDateTime=created_at,
            agents=agents,
        )

    mets.write(mets_path, pretty_print=True)
Example #9
0
def main(file_path, file_uuid, sip_uuid):
    failed = False

    # Get file format
    try:
        fmt = FormatVersion.active.get(fileformatversion__file_uuid=file_uuid)
    except FormatVersion.DoesNotExist:
        rules = fmt = None

    if fmt:
        rules = FPRule.active.filter(format=fmt.uuid, purpose='validation')

    # Check for a default rule exists
    if not rules:
        rules = FPRule.active.filter(purpose='default_validation')

    for rule in rules:
        if rule.command.script_type in ('bashScript', 'command'):
            command_to_execute = replace_string_values(rule.command.command,
                file_=file_uuid, sip=sip_uuid, type_='file')
            args = []
        else:
            command_to_execute = rule.command.command
            args = [file_path]

        print('Running', rule.command.description)
        exitstatus, stdout, stderr = executeOrRun(rule.command.script_type,
            command_to_execute, arguments=args)
        if exitstatus != 0:
            print('Command {} failed with exit status {}; stderr:'.format(rule.command.description, exitstatus),
                stderr, file=sys.stderr)
            failed = True
            continue

        print('Command {} completed with output {}'.format(rule.command.description, stdout))

        # Parse output and generate an Event
        # Output is JSON in format:
        # { "eventOutcomeInformation": "pass",
        #   "eventOutcomeDetailNote": "format=\"JPEG\"; version=\"1.01\"; result=\"Well-Formed and valid\"" }
        # Or
        # { "eventOutcomeInformation": "fail",
        #   "eventOutcomeDetailNote": "format=\"Not detected\"; result=\"Not well-formed\"" }
        output = ast.literal_eval(stdout)
        event_detail = 'program="{tool.description}"; version="{tool.version}"'.format(tool=rule.command.tool)

        print('Creating validation event for {} ({})'.format(file_path, file_uuid))

        databaseFunctions.insertIntoEvents(
            fileUUID=file_uuid,
            eventType='validation',
            eventDetail=event_detail,
            eventOutcome=output.get('eventOutcomeInformation'),
            eventOutcomeDetailNote=output.get('eventOutcomeDetailNote'),
        )

    if failed:
        return -1
    else:
        return 0
Example #10
0
def xmlCreateFileAssociationBetween(originalFileFullPath, outputFromNormalizationFileFullPath, SIPFullPath, sipUUID, eventDetailText, eventOutcomeDetailNote, outputFileUUID=""):
    #assign file UUID

    date = databaseInterface.getUTCDate()
    if outputFileUUID == "":
        outputFileUUID = uuid.uuid4().__str__()

    originalFilePathRelativeToSIP = originalFileFullPath.replace(SIPFullPath,"%SIPDirectory%", 1)
    sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string(originalFilePathRelativeToSIP) + "' AND Files.sipUUID = '" + sipUUID + "';"
    print sql
    rows = databaseInterface.queryAllSQL(sql)
    print rows
    fileUUID = rows[0][0]


    filePathRelativeToSIP = outputFromNormalizationFileFullPath.replace(SIPFullPath,"%SIPDirectory%", 1)
    addFileToSIP(filePathRelativeToSIP, outputFileUUID, sipUUID, uuid.uuid4().__str__(), date, sourceType="creation", use="preservation")
    updateSizeAndChecksum(outputFileUUID, outputFromNormalizationFileFullPath, date, uuid.uuid4().__str__())

    taskUUID = uuid.uuid4().__str__()
    insertIntoEvents(fileUUID=fileUUID, \
               eventIdentifierUUID=taskUUID, \
               eventType="normalization", \
               eventDateTime=date, \
               eventDetail=eventDetailText, \
               eventOutcome="", \
               eventOutcomeDetailNote=eventOutcomeDetailNote)

    insertIntoDerivations(sourceFileUUID=fileUUID, derivedFileUUID=outputFileUUID, relatedEventUUID=taskUUID)
Example #11
0
def includeFits(fits, xmlFile, date, eventUUID, fileUUID):
    global exitCode
    ##eventOutcome = createOutcomeInformation( eventOutcomeDetailNote = uuid)
    #TO DO... Gleam the event outcome information from the output

    #print etree.tostring(fits, pretty_print=True)
    #</CREATE formatIdentificationFITSAssist EVENTS>
    #try:
    eventDetailText, eventOutcomeText, eventOutcomeDetailNotes = formatIdentificationFITSAssist(
        fits, fileUUID)
    #except:
    if 0:
        eventDetailText = "Failed"
        eventOutcomeText = "Failed"
        eventOutcomeDetailNotes = ["Failed"]
        exitCode += 4
    outcomeInformation = createOutcomeInformation("To be removed",
                                                  eventOutcomeText)
    #formatIdentificationEvent = createEvent( eventUUID, "format identification", \
    #                                         eventDateTime=date, \
    #                                         eventDetailText=eventDetailText, \
    #                                         eOutcomeInformation=outcomeInformation)

    #eventOutcomeInformation = getTagged(formatIdentificationEvent, "eventOutcomeInformation")[0]
    #eventOutcomeDetail = getTagged(eventOutcomeInformation, "eventOutcomeDetail")[0]
    #eventOutcomeInformation.remove(eventOutcomeDetail)

    for eventOutcomeDetailNote in eventOutcomeDetailNotes:
        #eventOutcomeDetail = etree.SubElement(eventOutcomeInformation, "eventOutcomeDetail")
        #etree.SubElement(eventOutcomeDetail, "eventOutcomeDetailNote").text = eventOutcomeDetailNote

        insertIntoEvents(fileUUID=fileUUID, \
                         eventIdentifierUUID=uuid.uuid4().__str__(), \
                         eventType="format identification", \
                         eventDateTime=date, \
                         eventDetail=eventDetailText, \
                         eventOutcome=eventOutcomeText, \
                         eventOutcomeDetailNote=eventOutcomeDetailNote)

    #</CREATE formatIdentificationFITSAssist EVENTS>
    try:
        eventDetailText, eventOutcomeText, eventOutcomeDetailNote = formatValidationFITSAssist(
            fits)
    except:
        eventDetailText = "Failed"
        eventOutcomeText = "Failed"
        eventOutcomeDetailNotes = "Failed"
        exitCode += 3
    #outcomeInformation = createOutcomeInformation( eventOutcomeDetailNote, eventOutcomeText)
    #formatValidationEvent = createEvent( uuid.uuid4().__str__(), "validation", \
    #                                         eventDateTime=date, \
    #                                         eventDetailText=eventDetailText, \
    #                                         eOutcomeInformation=outcomeInformation)
    insertIntoEvents(fileUUID=fileUUID, \
                     eventIdentifierUUID=uuid.uuid4().__str__(), \
                     eventType="validation", \
                     eventDateTime=date, \
                     eventDetail=eventDetailText, \
                     eventOutcome=eventOutcomeText, \
                     eventOutcomeDetailNote=eventOutcomeDetailNote)
Example #12
0
def call(jobs):
    with transaction.atomic():
        for job in jobs:
            with job.JobContext():
                target = job.args[1]
                transferUUID = job.args[2]
                exitCode = verify_bag(job, target)
                if exitCode != 0:
                    job.pyprint("Failed bagit compliance. Not restructuring.", file=sys.stderr)
                    job.set_status(exitCode)
                else:
                    try:
                        restructureBagForComplianceFileUUIDsAssigned(job, target, transferUUID)
                    except fileOperations.UpdateFileLocationFailed as e:
                        job.set_status(e.code)
                        continue

                    files = File.objects.filter(removedtime__isnull=True,
                                                transfer_id=transferUUID,
                                                currentlocation__startswith="%transferDirectory%objects/").values_list('uuid')
                    for uuid, in files:
                        insertIntoEvents(fileUUID=uuid,
                                         eventType="fixity check",
                                         eventDetail="Bagit - verifypayloadmanifests",
                                         eventOutcome="Pass")

                    job.set_status(exitCode)
Example #13
0
def updateSizeAndChecksum(fileUUID,
                          filePath,
                          date,
                          eventIdentifierUUID,
                          fileSize=None,
                          checksum=None,
                          checksumType=None,
                          add_event=True):
    """
    Update a File with its size, checksum and checksum type. These are
    parameters that can be either generated or provided via keywords.

    Finally, insert the corresponding Event. This behavior can be cancelled
    using the boolean keyword 'add_event'.
    """
    if not fileSize:
        fileSize = os.path.getsize(filePath)
    if not checksumType:
        checksumType = get_setting('checksum_type', 'sha256')
    if not checksum:
        checksum = get_file_checksum(filePath, checksumType)

    File.objects.filter(uuid=fileUUID).update(size=fileSize,
                                              checksum=checksum,
                                              checksumtype=checksumType)

    if add_event:
        insertIntoEvents(
            fileUUID=fileUUID,
            eventType='message digest calculation',
            eventDateTime=date,
            eventDetail='program="python"; module="hashlib.{}()"'.format(
                checksumType),
            eventOutcomeDetailNote=checksum)
 def test_insert_into_events(self):
     assert Event.objects.filter(event_id="new_event").count() == 0
     databaseFunctions.insertIntoEvents(
         fileUUID="88c8f115-80bc-4da4-a1e6-0158f5df13b9",
         eventIdentifierUUID="new_event",
     )
     assert Event.objects.filter(event_id="new_event").count() == 1
Example #15
0
def updateFileLocation(src, dst, eventType, eventDateTime, eventDetail, eventIdentifierUUID = uuid.uuid4().__str__(), fileUUID="None", sipUUID = None, transferUUID=None, eventOutcomeDetailNote = ""):
    """If the file uuid is not provided, will use the sip uuid and old path to find the file uuid"""
    src = unicodeToStr(src)
    dst = unicodeToStr(dst)
    fileUUID = unicodeToStr(fileUUID)
    if not fileUUID or fileUUID == "None":
        sql = "Need to define transferUUID or sipUUID"
        if sipUUID:
            sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string(src) + "' AND Files.sipUUID = '" + sipUUID + "';"
        elif transferUUID:
            sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string(src) + "' AND Files.transferUUID = '" + transferUUID + "';"
        c, sqlLock = databaseInterface.querySQL(sql)
        row = c.fetchone()
        while row != None:
            fileUUID = unicodeToStr(row[0])
            row = c.fetchone()
        sqlLock.release()

    if eventOutcomeDetailNote == "":
        eventOutcomeDetailNote = "Original name=\"%s\"; cleaned up name=\"%s\"" %(src, dst)
        #eventOutcomeDetailNote = eventOutcomeDetailNote.decode('utf-8')
    #CREATE THE EVENT
    if not fileUUID:
        print >>sys.stderr, "Unable to find file uuid for: ", src, " -> ", dst
        exit(6)
    insertIntoEvents(fileUUID=fileUUID, eventIdentifierUUID=eventIdentifierUUID, eventType=eventType, eventDateTime=eventDateTime, eventDetail=eventDetail, eventOutcome="", eventOutcomeDetailNote=eventOutcomeDetailNote)

    #UPDATE THE CURRENT FILE PATH
    sql =  """UPDATE Files SET currentLocation='%s' WHERE fileUUID='%s';""" % (MySQLdb.escape_string(dst), fileUUID)
    databaseInterface.runSQL(sql)
Example #16
0
 def _execute_rule_command(self, rule):
     """Execute the FPR command of FPR rule ``rule`` against the file passed
     in to this client script. The output of that command determines what we
     print to stdout and stderr, and the nature of the validation event that
     we save to the db. We also copy the MediaConch policy file to the logs/
     directory of the AIP if it has not already been copied there.
     """
     result = 'passed'
     command_to_execute, args = self._get_command_to_execute(rule)
     self.job.pyprint('Running', rule.command.description)
     exitstatus, stdout, stderr = executeOrRun(rule.command.script_type,
                                               command_to_execute,
                                               arguments=args,
                                               printing=False,
                                               capture_output=True)
     try:
         output = json.loads(stdout)
     except ValueError:
         logger.exception(
             'Unable to load an object from the malformed JSON in\n%s',
             stdout)
         raise
     if self.file_type in ('preservation', 'original'):
         self._save_to_logs_dir(output)
     if exitstatus == 0:
         self.job.pyprint('Command {} completed with output {}'.format(
             rule.command.description, stdout))
     else:
         self.job.print_error(
             'Command {} failed with exit status {}; stderr:'.format(
                 rule.command.description, exitstatus), stderr)
         return 'failed'
     event_detail = ('program="{tool.description}";'
                     ' version="{tool.version}"'.format(
                         tool=rule.command.tool))
     if output.get('eventOutcomeInformation') != 'pass':
         self.job.print_error(
             'Command {descr} returned a non-pass outcome for the policy'
             ' check;\n\noutcome: {outcome}\n\ndetails: {details}.'.format(
                 descr=rule.command.description,
                 outcome=output.get('eventOutcomeInformation'),
                 details=output.get('eventOutcomeDetailNote')))
         result = 'failed'
     self.job.pyprint('Creating policy checking event for {} ({})'.format(
         self.file_path, self.file_uuid))
     # Manually-normalized access derivatives have no file UUID so we can't
     # create a validation event for them. TODO/QUESTION: should we use the
     # UUID that was assigned to the manually normalized derivative during
     # transfer, i.e., the one that we retrieve in
     # ``_get_manually_normalized_access_derivative_file_uuid`` above?
     if not self.is_manually_normalized_access_derivative:
         databaseFunctions.insertIntoEvents(
             fileUUID=self.file_uuid,
             eventType='validation',  # From PREMIS controlled vocab.
             eventDetail=event_detail,
             eventOutcome=output.get('eventOutcomeInformation'),
             eventOutcomeDetailNote=output.get('eventOutcomeDetailNote'),
         )
     return result
def onceNormalized(command, opts, replacementDic):
    transcodedFiles = []
    if not command.outputLocation:
        command.outputLocation = ""
    if os.path.isfile(command.outputLocation):
        transcodedFiles.append(command.outputLocation)
    elif os.path.isdir(command.outputLocation):
        for w in os.walk(command.outputLocation):
            path, directories, files = w
            for p in files:
                p = os.path.join(path, p)
                if os.path.isfile(p):
                    transcodedFiles.append(p)
    elif command.outputLocation:
        print >> sys.stderr, command
        print >> sys.stderr, "Error - output file does not exist [" + command.outputLocation + "]"
        command.exitCode = -2

    derivationEventUUID = uuid.uuid4().__str__()
    eventDetail = ""
    if command.eventDetailCommand != None:
        eventDetail = eventDetail = command.eventDetailCommand.stdOut
    for ef in transcodedFiles:
        if opts["commandClassifications"] == "preservation":
            # Add the new file to the sip
            filePathRelativeToSIP = ef.replace(opts["sipPath"], "%SIPDirectory%", 1)
            # addFileToSIP(filePathRelativeToSIP, fileUUID, sipUUID, taskUUID, date, sourceType="ingestion"):
            addFileToSIP(
                filePathRelativeToSIP,
                replacementDic["%outputFileUUID%"],
                opts["sipUUID"],
                uuid.uuid4().__str__(),
                opts["date"],
                sourceType="creation",
                use="preservation",
            )
            # Calculate new file checksum
            # Add event information to current file
            insertIntoEvents(
                fileUUID=opts["fileUUID"],
                eventIdentifierUUID=derivationEventUUID,
                eventType="normalization",
                eventDateTime=opts["date"],
                eventDetail=eventDetail,
                eventOutcome="",
                eventOutcomeDetailNote=filePathRelativeToSIP,
            )

            updateSizeAndChecksum(replacementDic["%outputFileUUID%"], ef, opts["date"], uuid.uuid4().__str__())

            # Add linking information between files
            insertIntoDerivations(
                sourceFileUUID=opts["fileUUID"],
                derivedFileUUID=replacementDic["%outputFileUUID%"],
                relatedEventUUID=derivationEventUUID,
            )

            replacementDic["%outputFileUUID%"] = uuid.uuid4().__str__()
            replacementDic["%postfix%"] = "-" + replacementDic["%outputFileUUID%"]
Example #18
0
def addFileToSIP(filePathRelativeToSIP, fileUUID, sipUUID, taskUUID, date, sourceType="ingestion", use="original"):
    insertIntoFiles(fileUUID, filePathRelativeToSIP, date, sipUUID=sipUUID, use=use)
    insertIntoEvents(fileUUID=fileUUID, \
                   eventType=sourceType, \
                   eventDateTime=date, \
                   eventDetail="", \
                   eventOutcome="", \
                   eventOutcomeDetailNote="")
 def test_insert_into_event_fetches_correct_agent_from_file(self):
     databaseFunctions.insertIntoEvents(
         fileUUID="88c8f115-80bc-4da4-a1e6-0158f5df13b9",
         eventIdentifierUUID="event_agent_id")
     agents = Event.objects.get(event_id="event_agent_id").agents
     assert agents.count() == 3
     assert agents.get(id=1)
     assert agents.get(id=2)
     assert agents.get(id=5)
def includeFits(fits, xmlFile, date, eventUUID, fileUUID):
    global exitCode
    ##eventOutcome = createOutcomeInformation( eventOutcomeDetailNote = uuid)
    #TO DO... Gleam the event outcome information from the output

    #print etree.tostring(fits, pretty_print=True)
    #</CREATE formatIdentificationFITSAssist EVENTS>
    #try:
    eventDetailText, eventOutcomeText, eventOutcomeDetailNotes = formatIdentificationFITSAssist(fits, fileUUID)
    #except:
    if 0:
        eventDetailText = "Failed"
        eventOutcomeText = "Failed"
        eventOutcomeDetailNotes = ["Failed"]
        exitCode += 4
    outcomeInformation = createOutcomeInformation( "To be removed", eventOutcomeText)
    #formatIdentificationEvent = createEvent( eventUUID, "format identification", \
    #                                         eventDateTime=date, \
    #                                         eventDetailText=eventDetailText, \
    #                                         eOutcomeInformation=outcomeInformation)

    #eventOutcomeInformation = getTagged(formatIdentificationEvent, "eventOutcomeInformation")[0]
    #eventOutcomeDetail = getTagged(eventOutcomeInformation, "eventOutcomeDetail")[0]
    #eventOutcomeInformation.remove(eventOutcomeDetail)

    for eventOutcomeDetailNote in eventOutcomeDetailNotes:
        #eventOutcomeDetail = etree.SubElement(eventOutcomeInformation, "eventOutcomeDetail")
        #etree.SubElement(eventOutcomeDetail, "eventOutcomeDetailNote").text = eventOutcomeDetailNote

        insertIntoEvents(fileUUID=fileUUID, \
                         eventIdentifierUUID=uuid.uuid4().__str__(), \
                         eventType="format identification", \
                         eventDateTime=date, \
                         eventDetail=eventDetailText, \
                         eventOutcome=eventOutcomeText, \
                         eventOutcomeDetailNote=eventOutcomeDetailNote)

    #</CREATE formatIdentificationFITSAssist EVENTS>
    try:
        eventDetailText, eventOutcomeText, eventOutcomeDetailNote = formatValidationFITSAssist(fits)
    except:
        eventDetailText = "Failed"
        eventOutcomeText = "Failed"
        eventOutcomeDetailNotes = "Failed"
        exitCode += 3
    #outcomeInformation = createOutcomeInformation( eventOutcomeDetailNote, eventOutcomeText)
    #formatValidationEvent = createEvent( uuid.uuid4().__str__(), "validation", \
    #                                         eventDateTime=date, \
    #                                         eventDetailText=eventDetailText, \
    #                                         eOutcomeInformation=outcomeInformation)
    insertIntoEvents(fileUUID=fileUUID, \
                     eventIdentifierUUID=uuid.uuid4().__str__(), \
                     eventType="validation", \
                     eventDateTime=date, \
                     eventDetail=eventDetailText, \
                     eventOutcome=eventOutcomeText, \
                     eventOutcomeDetailNote=eventOutcomeDetailNote)
Example #21
0
def updateFileLocation(
    src,
    dst,
    eventType="",
    eventDateTime="",
    eventDetail="",
    eventIdentifierUUID=uuid.uuid4().__str__(),
    fileUUID="None",
    sipUUID=None,
    transferUUID=None,
    eventOutcomeDetailNote="",
    createEvent=True,
):
    """
    Updates file location in the database, and optionally writes an event for the sanitization to the database.
    Note that this does not actually move a file on disk.
    If the file uuid is not provided, will use the SIP uuid and the old path to find the file uuid.
    To suppress creation of an event, pass the createEvent keyword argument (for example, if the file moved due to the renaming of a parent directory and not the file itself).
    """

    src = unicodeToStr(src)
    dst = unicodeToStr(dst)
    fileUUID = unicodeToStr(fileUUID)
    if not fileUUID or fileUUID == "None":
        kwargs = {"removedtime__isnull": True, "currentlocation": src}

        if sipUUID:
            kwargs["sip_id"] = sipUUID
        elif transferUUID:
            kwargs["transfer_id"] = transferUUID
        else:
            raise ValueError(
                "One of fileUUID, sipUUID, or transferUUID must be provided")

        f = File.objects.get(**kwargs)
    else:
        f = File.objects.get(uuid=fileUUID)

    # UPDATE THE CURRENT FILE PATH
    f.currentlocation = dst
    f.save()

    if not createEvent:
        return

    if eventOutcomeDetailNote == "":
        eventOutcomeDetailNote = 'Original name="%s"; cleaned up name="%s"' % (
            src, dst)
    # CREATE THE EVENT
    insertIntoEvents(
        fileUUID=f.uuid,
        eventType=eventType,
        eventDateTime=eventDateTime,
        eventDetail=eventDetail,
        eventOutcome="",
        eventOutcomeDetailNote=eventOutcomeDetailNote,
    )
Example #22
0
def addFileToSIP(filePathRelativeToSIP, fileUUID, sipUUID, taskUUID, date, sourceType="ingestion", use="original"):
    insertIntoFiles(fileUUID, filePathRelativeToSIP, date, sipUUID=sipUUID, use=use)
    insertIntoEvents(fileUUID=fileUUID, \
                   eventIdentifierUUID=taskUUID, \
                   eventType=sourceType, \
                   eventDateTime=date, \
                   eventDetail="", \
                   eventOutcome="", \
                   eventOutcomeDetailNote="")
def onceNormalized(command):
    transcodedFiles = []
    if not command.outputLocation:
        command.outputLocation = ""
    elif os.path.isfile(command.outputLocation):
        transcodedFiles.append(command.outputLocation)
    elif os.path.isdir(command.outputLocation):
        for w in os.walk(command.outputLocation):
            path, directories, files = w
            for p in files:
                p = os.path.join(path, p)
                if os.path.isfile(p):
                    transcodedFiles.append(p)
    elif command.outputLocation:
        print >>sys.stderr, command
        print >>sys.stderr, "Error - output file does not exist [" + command.outputLocation + "]"
        command.exitCode = -2

    derivationEventUUID = uuid.uuid4().__str__()
    for ef in transcodedFiles:
        global outputFileUUID
        global replacementDic
        global opts
        if opts.commandClassifications == "preservation":
            old = """xmlNormalize(outputFileUUID, \
                     ef, \
                     command.eventDetailCommand.stdOut, \
                     opts.fileUUID, \
                     opts.objectsDirectory, \
                     opts.taskUUID, \
                     opts.date, \
                     opts.logsDirectory, \
                     ) #    {normalized; not normalized}"""

            #Add the new file to the sip
            filePathRelativeToSIP = ef.replace(opts.sipPath, "%SIPDirectory%", 1)
            # addFileToSIP(filePathRelativeToSIP, fileUUID, sipUUID, taskUUID, date, sourceType="ingestion"):
            addFileToSIP(filePathRelativeToSIP, outputFileUUID, opts.sipUUID, uuid.uuid4().__str__(), opts.date, sourceType="creation", use="preservation")
            #Calculate new file checksum
            print >>sys.stderr, "TODO: calculate new file checksum"
            #Add event information to current file
            insertIntoEvents(fileUUID=opts.fileUUID, \
               eventIdentifierUUID=derivationEventUUID, \
               eventType="normalization", \
               eventDateTime=opts.date, \
               eventDetail=command.eventDetailCommand.stdOut, \
               eventOutcome="", \
               eventOutcomeDetailNote=filePathRelativeToSIP)

            updateSizeAndChecksum(outputFileUUID, ef, opts.date, uuid.uuid4().__str__())

            #Add linking information between files
            insertIntoDerivations(sourceFileUUID=opts.fileUUID, derivedFileUUID=outputFileUUID, relatedEventUUID=derivationEventUUID)

            outputFileUUID = uuid.uuid4().__str__()
            replacementDic["%postfix%"] = "-" + outputFileUUID
Example #24
0
def addAccessionEvent(fileUUID, transferUUID, date):
    transfer = Transfer.objects.get(uuid=transferUUID)
    if transfer.accessionid:
        eventOutcomeDetailNote = "accession#" + MySQLdb.escape_string(transfer.accessionid)
        insertIntoEvents(fileUUID=fileUUID,
                         eventType="registration",
                         eventDateTime=date,
                         eventDetail="",
                         eventOutcome="",
                         eventOutcomeDetailNote=eventOutcomeDetailNote)
Example #25
0
def addFileToTransfer(filePathRelativeToSIP, fileUUID, transferUUID, taskUUID, date, sourceType="ingestion", eventDetail="", use="original"):
    #print filePathRelativeToSIP, fileUUID, transferUUID, taskUUID, date, sourceType, eventDetail, use
    insertIntoFiles(fileUUID, filePathRelativeToSIP, date, transferUUID=transferUUID, use=use)
    insertIntoEvents(fileUUID=fileUUID, \
                   eventType=sourceType, \
                   eventDateTime=date, \
                   eventDetail=eventDetail, \
                   eventOutcome="", \
                   eventOutcomeDetailNote="")
    addAccessionEvent(fileUUID, transferUUID, date)
def call(jobs):
    event_queue = []

    for job in jobs:
        with job.JobContext(logger=logger):
            job.set_status(scan_file(event_queue, *job.args[1:]))

    with transaction.atomic():
        for e in event_queue:
            insertIntoEvents(**e)
Example #27
0
def addFileToTransfer(filePathRelativeToSIP, fileUUID, transferUUID, taskUUID, date, sourceType="ingestion", eventDetail="", use="original"):
    #print filePathRelativeToSIP, fileUUID, transferUUID, taskUUID, date, sourceType, eventDetail, use
    insertIntoFiles(fileUUID, filePathRelativeToSIP, date, transferUUID=transferUUID, use=use)
    insertIntoEvents(fileUUID=fileUUID, \
                   eventIdentifierUUID=taskUUID, \
                   eventType=sourceType, \
                   eventDateTime=date, \
                   eventDetail=eventDetail, \
                   eventOutcome="", \
                   eventOutcomeDetailNote="")
    addAccessionEvent(fileUUID, transferUUID, date)
Example #28
0
def updateSizeAndChecksum(fileUUID, filePath, date, eventIdentifierUUID):
    fileSize = os.path.getsize(filePath)
    checksum = str(sha_for_file(filePath))

    File.objects.filter(uuid=fileUUID).update(size=fileSize, checksum=checksum)

    insertIntoEvents(fileUUID=fileUUID, \
                     eventType="message digest calculation", \
                     eventDateTime=date, \
                     eventDetail="program=\"python\"; module=\"hashlib.sha256()\"", \
                     eventOutcomeDetailNote=checksum)
Example #29
0
def addAccessionEvent(fileUUID, transferUUID, date):
    
    sql = """SELECT accessionID FROM Transfers WHERE transferUUID = '%s';""" % (transferUUID)
    accessionID=databaseInterface.queryAllSQL(sql)[0][0]
    if accessionID:
        eventIdentifierUUID = uuid.uuid4().__str__()
        eventOutcomeDetailNote =  "accession#" + MySQLdb.escape_string(accessionID) 
        insertIntoEvents(fileUUID=fileUUID, \
               eventIdentifierUUID=eventIdentifierUUID, \
               eventType="registration", \
               eventDateTime=date, \
               eventDetail="", \
               eventOutcome="", \
               eventOutcomeDetailNote=eventOutcomeDetailNote)
def onceNormalized(command, opts, replacementDic):
    transcodedFiles = []
    if not command.outputLocation:
        command.outputLocation = ""
    if os.path.isfile(command.outputLocation):
        transcodedFiles.append(command.outputLocation)
    elif os.path.isdir(command.outputLocation):
        for w in os.walk(command.outputLocation):
            path, directories, files = w
            for p in files:
                p = os.path.join(path, p)
                if os.path.isfile(p):
                    transcodedFiles.append(p)
    elif command.outputLocation:
        print >>sys.stderr, command
        print >>sys.stderr, "Error - output file does not exist [" + command.outputLocation + "]"
        command.exitCode = -2

    derivationEventUUID = uuid.uuid4().__str__()
    eventDetail = "ArchivematicaFPRCommandID=\"%s\"" % (command.pk)
    if command.eventDetailCommand != None:
        eventDetail = '%s; %s' % (eventDetail, command.eventDetailCommand.stdOut)
    for ef in transcodedFiles:
        if opts["commandClassifications"] == "preservation":
            # TODO Add manual normalization for files of same name mapping
            #Add the new file to the sip
            filePathRelativeToSIP = ef.replace(opts["sipPath"], "%SIPDirectory%", 1)
            # addFileToSIP(filePathRelativeToSIP, fileUUID, sipUUID, taskUUID, date, sourceType="ingestion"):
            addFileToSIP(filePathRelativeToSIP, replacementDic["%outputFileUUID%"], opts["sipUUID"], uuid.uuid4().__str__(), opts["date"], sourceType="creation", use="preservation")
            #Calculate new file checksum
            #Add event information to current file
            insertIntoEvents(fileUUID=opts["fileUUID"], \
               eventIdentifierUUID=derivationEventUUID, \
               eventType="normalization", \
               eventDateTime=opts["date"], \
               eventDetail=eventDetail, \
               eventOutcome="", \
               eventOutcomeDetailNote=filePathRelativeToSIP)

            updateSizeAndChecksum(replacementDic["%outputFileUUID%"], ef, opts["date"], uuid.uuid4().__str__())

            #Add linking information between files
            insertIntoDerivations(sourceFileUUID=opts["fileUUID"], derivedFileUUID=replacementDic["%outputFileUUID%"], relatedEventUUID=derivationEventUUID)

            sql = "INSERT INTO FilesIDs (fileUUID, formatName, formatVersion, formatRegistryName, formatRegistryKey) VALUES ('%s', '%s', NULL, NULL, NULL);" % (replacementDic["%outputFileUUID%"], command.outputFormat)
            databaseInterface.runSQL(sql)
            
            replacementDic["%outputFileUUID%"] = uuid.uuid4().__str__()
            replacementDic["%postfix%"] = "-" + replacementDic["%outputFileUUID%"]
Example #31
0
def updateSizeAndChecksum(fileUUID, filePath, date, eventIdentifierUUID):
    fileSize = os.path.getsize(filePath).__str__()
    checksum = sha_for_file(filePath).__str__()

    sql = "UPDATE Files " + \
        "SET fileSize='" + fileSize +"', checksum='" + checksum +  "' " + \
        "WHERE fileUUID='" + fileUUID + "'"
    databaseInterface.runSQL(sql)

    insertIntoEvents(fileUUID=fileUUID, \
                     eventIdentifierUUID=eventIdentifierUUID, \
                     eventType="message digest calculation", \
                     eventDateTime=date, \
                     eventDetail="program=\"python\"; module=\"hashlib.sha256()\"", \
                     eventOutcomeDetailNote=checksum)
Example #32
0
def xmlCreateFileAssociationBetween(originalFileFullPath,
                                    outputFromNormalizationFileFullPath,
                                    SIPFullPath,
                                    sipUUID,
                                    eventDetailText,
                                    eventOutcomeDetailNote,
                                    outputFileUUID=""):
    #assign file UUID

    date = databaseInterface.getUTCDate()
    if outputFileUUID == "":
        outputFileUUID = uuid.uuid4().__str__()

    originalFilePathRelativeToSIP = originalFileFullPath.replace(
        SIPFullPath, "%SIPDirectory%", 1)
    sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string(
        originalFilePathRelativeToSIP
    ) + "' AND Files.sipUUID = '" + sipUUID + "';"
    print sql
    rows = databaseInterface.queryAllSQL(sql)
    print rows
    fileUUID = rows[0][0]

    filePathRelativeToSIP = outputFromNormalizationFileFullPath.replace(
        SIPFullPath, "%SIPDirectory%", 1)
    addFileToSIP(filePathRelativeToSIP,
                 outputFileUUID,
                 sipUUID,
                 uuid.uuid4().__str__(),
                 date,
                 sourceType="creation",
                 use="preservation")
    updateSizeAndChecksum(outputFileUUID, outputFromNormalizationFileFullPath,
                          date,
                          uuid.uuid4().__str__())

    taskUUID = uuid.uuid4().__str__()
    insertIntoEvents(fileUUID=fileUUID, \
               eventIdentifierUUID=taskUUID, \
               eventType="normalization", \
               eventDateTime=date, \
               eventDetail=eventDetailText, \
               eventOutcome="", \
               eventOutcomeDetailNote=eventOutcomeDetailNote)

    insertIntoDerivations(sourceFileUUID=fileUUID,
                          derivedFileUUID=outputFileUUID,
                          relatedEventUUID=taskUUID)
Example #33
0
def write_premis_event(job, sip_uuid, checksum_type, event_outcome,
                       event_outcome_detail_note):
    """Write the AIP-level "fixity check" PREMIS event."""
    try:
        databaseFunctions.insertIntoEvents(
            fileUUID=sip_uuid,
            eventType='fixity check',
            eventDetail='program="python, bag"; module="hashlib.{}()"'.format(
                checksum_type),
            eventOutcome=event_outcome,
            eventOutcomeDetailNote=event_outcome_detail_note)
    except Exception as err:
        job.pyprint(
            'Failed to write PREMIS event to database. Error: {error}'.format(
                error=err))
    else:
        return event_outcome_detail_note
def insert_transcription_event(status, file_uuid, rule, relative_location):
    outcome = "transcribed" if status is 0 else "not transcribed"

    tool = rule.command.tool
    event_detail = u"program={}; version={}; command=\"{}\"".format(tool.description, tool.version, rule.command.command.replace('"', r'\"'))

    event_uuid = str(uuid4())

    databaseFunctions.insertIntoEvents(
        fileUUID=file_uuid,
        eventIdentifierUUID=event_uuid,
        eventType="transcription",
        eventDetail=event_detail,
        eventOutcome=outcome,
        eventOutcomeDetailNote=relative_location
    )

    return event_uuid
def write_identification_event(file_uuid, command, format=None, success=True):
    event_detail_text = 'program="{}"; version="{}"'.format(
        command.tool.description, command.tool.version)
    if success:
        event_outcome_text = "Positive"
    else:
        event_outcome_text = "Not identified"

    if not format:
        format = 'No Matching Format'

    date = getUTCDate()

    insertIntoEvents(fileUUID=file_uuid,
                     eventIdentifierUUID=str(uuid.uuid4()),
                     eventType="format identification",
                     eventDateTime=date,
                     eventDetail=event_detail_text,
                     eventOutcome=event_outcome_text,
                     eventOutcomeDetailNote=format)
Example #36
0
def verify_checksum(
    job, file_uuid, path, checksum, checksumtype, event_id=None, date=None
):
    """
    Verify the checksum of a given file, and create a fixity event.

    :param str file_uuid: UUID of the file to verify
    :param str path: Path of the file to verify
    :param str checksum: Checksum to compare against
    :param str checksumtype: Type of the provided checksum (md5, sha256, etc)
    :param str event_id: Event ID
    :param str date: Date of the event
    """
    if event_id is None:
        event_id = str(uuid.uuid4())
    if date is None:
        date = timezone.now().isoformat(" ")

    checksumtype = checksumtype.lower()
    generated_checksum = get_file_checksum(path, checksumtype)
    event_detail = 'program="python"; ' 'module="hashlib.{}()"'.format(checksumtype)
    if checksum != generated_checksum:
        job.pyprint("Checksum failed")
        event_outcome = "Fail"
        detail_note = "Dataverse checksum %s verification failed" % checksum
    else:
        job.pyprint("Checksum passed")
        event_outcome = "Pass"
        detail_note = "Dataverse checksum %s verified" % checksum

    databaseFunctions.insertIntoEvents(
        fileUUID=file_uuid,
        eventIdentifierUUID=event_id,
        eventType="fixity check",
        eventDateTime=date,
        eventDetail=event_detail,
        eventOutcome=event_outcome,
        eventOutcomeDetailNote=detail_note,
    )
def verifyChecksum(fileUUID, filePath, date, eventIdentifierUUID):
    sql = """SELECT checksum FROM Files WHERE fileUUID = '""" + fileUUID + "'"
    c, sqlLock = databaseInterface.querySQL(sql)
    row = c.fetchone()
    checksumDB = ""
    while row != None:
        checksumDB = row[0]
        row = c.fetchone()
    sqlLock.release()
    if checksumDB == None or checksumDB == "" or checksumDB == "None":
        print >> sys.stderr, "No checksum found in database for file:", fileUUID, filePath
        exit(1)
    checksumFile = sha_for_file(filePath)

    eventOutcome = ""
    eventOutcomeDetailNote = ""
    exitCode = 0
    if checksumFile != checksumDB:
        eventOutcomeDetailNote = checksumFile.__str__(
        ) + " != " + checksumDB.__str__()
        eventOutcome = "Fail"
        exitCode = 2
        print >> sys.stderr, "Checksums do not match:", fileUUID, filePath
        print >> sys.stderr, eventOutcomeDetailNote
    else:
        eventOutcomeDetailNote = "%s %s" % (checksumFile.__str__(), "verified")
        eventOutcome = "Pass"
        exitCode = 0

    #insertIntoEvents(fileUUID="", eventIdentifierUUID="", eventType="", eventDateTime=databaseInterface.getUTCDate(), eventDetail="", eventOutcome="", eventOutcomeDetailNote="")
    databaseFunctions.insertIntoEvents(fileUUID=fileUUID, \
                 eventIdentifierUUID=eventIdentifierUUID, \
                 eventType="fixity check", \
                 eventDateTime=date, \
                 eventOutcome=eventOutcome, \
                 eventOutcomeDetailNote=eventOutcomeDetailNote, \
                 eventDetail="program=\"python\"; module=\"hashlib.sha256()\"")

    exit(exitCode)
def verifyChecksum(fileUUID, filePath, date, eventIdentifierUUID):
    sql = """SELECT checksum FROM Files WHERE fileUUID = '""" + fileUUID + "'"
    c, sqlLock = databaseInterface.querySQL(sql)
    row = c.fetchone()
    checksumDB = ""
    while row != None:
        checksumDB = row[0]
        row = c.fetchone()
    sqlLock.release()
    if checksumDB == None or checksumDB == "" or checksumDB == "None":
        print >>sys.stderr, "No checksum found in database for file:", fileUUID, filePath
        exit(1)
    checksumFile = sha_for_file(filePath)

    eventOutcome=""
    eventOutcomeDetailNote=""
    exitCode = 0
    if checksumFile != checksumDB:
        eventOutcomeDetailNote = checksumFile.__str__() + " != " + checksumDB.__str__()
        eventOutcome="Fail"
        exitCode = 2
        print >>sys.stderr, "Checksums do not match:", fileUUID, filePath
        print >>sys.stderr, eventOutcomeDetailNote
    else:
        eventOutcomeDetailNote = "%s %s" % (checksumFile.__str__(), "verified")
        eventOutcome="Pass"
        exitCode = 0

    #insertIntoEvents(fileUUID="", eventIdentifierUUID="", eventType="", eventDateTime=databaseInterface.getUTCDate(), eventDetail="", eventOutcome="", eventOutcomeDetailNote="")
    databaseFunctions.insertIntoEvents(fileUUID=fileUUID, \
                 eventIdentifierUUID=eventIdentifierUUID, \
                 eventType="fixity check", \
                 eventDateTime=date, \
                 eventOutcome=eventOutcome, \
                 eventOutcomeDetailNote=eventOutcomeDetailNote, \
                 eventDetail="program=\"python\"; module=\"hashlib.sha256()\"")

    exit(exitCode)
def verifyChecksum(fileUUID, filePath, date, eventIdentifierUUID):
    f = File.objects.get(uuid=fileUUID)

    if f.checksum in ('', 'None'):
        print('No checksum found in database for file:',
              fileUUID,
              filePath,
              file=sys.stderr)
        exit(1)

    checksumFile = get_file_checksum(filePath, f.checksumtype)

    eventOutcome = ''
    eventOutcomeDetailNote = ''
    exitCode = 0

    if checksumFile != f.checksum:
        eventOutcomeDetailNote = str(checksumFile) + ' != ' + f.checksum
        eventOutcome = 'Fail'
        exitCode = 2
        print('Checksums do not match:', fileUUID, filePath, file=sys.stderr)
        print(eventOutcomeDetailNote, file=sys.stderr)
    else:
        eventOutcomeDetailNote = '%s %s' % (str(checksumFile), 'verified')
        eventOutcome = 'Pass'
        exitCode = 0

    databaseFunctions.insertIntoEvents(
        fileUUID=fileUUID,
        eventIdentifierUUID=str(uuid.uuid4()),
        eventType='fixity check',
        eventDateTime=date,
        eventOutcome=eventOutcome,
        eventOutcomeDetailNote=eventOutcomeDetailNote,
        eventDetail='program="python"; module="hashlib.{}()"'.format(
            f.checksumtype))

    exit(exitCode)
        except:
            print >>sys.stderr, "Error parsing: ", xmlFilePath 
            exitCode += 1
            continue
        #if extension.lower() != file[i+1:].lower():
        #    print >>sys.stderr, "Warning, extension mismatch(file/xml): ", file[:i], extension , file[i+1:] 
        
        objectMD5 = md5_for_file(filePath)
        
        if objectMD5 == xmlMD5:
            print "File OK: ", xmlMD5, filePath.replace(transferPath, "%TransferDirectory%")
            
            fileID = getFileUUIDLike(filePath, transferPath, transferUUID, "transferUUID", "%transferDirectory%")
            for path, fileUUID in fileID.iteritems():
                eventDetail = "program=\"python\"; module=\"hashlib.md5()\""
                eventOutcome="Pass"
                eventOutcomeDetailNote = "%s %s" % (xmlFile.__str__(), "verified")
                eventIdentifierUUID=uuid.uuid4().__str__()
                databaseFunctions.insertIntoEvents(fileUUID=fileUUID, \
                     eventIdentifierUUID=eventIdentifierUUID, \
                     eventType="fixity check", \
                     eventDateTime=date, \
                     eventOutcome=eventOutcome, \
                     eventOutcomeDetailNote=eventOutcomeDetailNote, \
                     eventDetail=eventDetail)
        else:
            print >>sys.stderr, "Checksum mismatch: ", filePath.replace(transferPath, "%TransferDirectory%")
            exitCode += 1
                 
quit(exitCode)
        if exit != 0:
            print >>sys.stderr, "Failed test: ", command
            print >>sys.stderr, stdErr
            print >>sys.stderr
            exitCode += 1
        else:
            print "Passed test: ", command
    
if __name__ == '__main__':
    target = sys.argv[1]
    transferUUID =  sys.argv[2]
    verifyBag(target)
    if exitCode != 0:
        print >>sys.stderr, "Failed bagit compliance. Not restructuring."
        exit(exitCode) 
    restructureBagForComplianceFileUUIDsAssigned(target, transferUUID, "transferUUID")
    for i in range(len(verificationCommands)):
        print verificationCommands[i]
        print verificationCommandsOutputs[i]
        print
        
    sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation LIKE '\%transferDirectory\%objects/%' AND transferUUID = '" + transferUUID + "';"
    rows = databaseInterface.queryAllSQL(sql)
    for row in rows:
        insertIntoEvents(fileUUID=row[0], \
                     eventType="fixity check", \
                     eventDetail="Bagit - verifypayloadmanifests", \
                     eventOutcome="Pass")
    
    exit(exitCode)
# @package Archivematica
# @subpackage archivematicaClientScript
# @author Joseph Perry <*****@*****.**>
# @version svn: $Id$
from optparse import OptionParser
import sys
sys.path.append("/usr/lib/archivematica/archivematicaCommon")
from databaseFunctions import insertIntoEvents


if __name__ == '__main__':
    parser = OptionParser()
    parser.add_option("-i",  "--fileUUID",          action="store", dest="fileUUID", default="")
    parser.add_option("-t",  "--eventType",        action="store", dest="eventType", default="")
    parser.add_option("-d",  "--eventDateTime",     action="store", dest="eventDateTime", default="")
    parser.add_option("-e",  "--eventDetail",       action="store", dest="eventDetail", default="")
    parser.add_option("-o",  "--eventOutcome",      action="store", dest="eventOutcome", default="")
    parser.add_option("-n",  "--eventOutcomeDetailNote",   action="store", dest="eventOutcomeDetailNote", default="")
    parser.add_option("-u",  "--eventIdentifierUUID",      action="store", dest="eventIdentifierUUID", default="")


    (opts, args) = parser.parse_args()

    insertIntoEvents(fileUUID=opts.fileUUID, \
                     eventIdentifierUUID=opts.eventIdentifierUUID, \
                     eventType=opts.eventType, \
                     eventDateTime=opts.eventDateTime, \
                     eventDetail=opts.eventDetail, \
                     eventOutcome=opts.eventOutcome, \
                     eventOutcomeDetailNote=opts.eventOutcomeDetailNote)
basename = os.path.basename(filePath)
i = basename.rfind(".")
dstFile = basename[:i] + "-" + fileUUID + basename[i:] 
dstDir = os.path.dirname(originalFilePath.replace("%SIPDirectory%", SIPDirectory, 1))
dst = os.path.join(dstDir, dstFile)
dstR = dst.replace(SIPDirectory, "%SIPDirectory%", 1)

if os.path.isfile(dst) or os.path.isdir(dst):
    print >>sys.stderr, "already exists:", dstR
    exit(2)
    
#Rename the file or directory src to dst. If dst is a directory, OSError will be raised. On Unix, if dst exists and is a file, it will be replaced silently if the user has permission. The operation may fail on some Unix flavors if src and dst are on different filesystems.
#see http://docs.python.org/2/library/os.html
os.rename(filePath, dst)
sql =  """UPDATE Files SET currentLocation='%s' WHERE fileUUID='%s';""" % (dstR, fileUUID)
databaseInterface.runSQL(sql)

derivationEventUUID = uuid.uuid4().__str__()
insertIntoEvents(fileUUID=originalFileUUID, \
               eventIdentifierUUID=derivationEventUUID, \
               eventType="normalization", \
               eventDateTime=date, \
               eventDetail="manual normalization", \
               eventOutcome="", \
               eventOutcomeDetailNote=dstR)

#Add linking information between files
insertIntoDerivations(sourceFileUUID=originalFileUUID, derivedFileUUID=fileUUID, relatedEventUUID=derivationEventUUID)


exit(0)
if __name__ == '__main__':
    (opts, args) = parseArgs() 
    while False: #used to stall the mcp and stop the client for testing this module
        import time
        time.sleep(10)
    
    if opts.fileGrpUse in ["DSPACEMETS", "maildirFile"]:
        print "file's fileGrpUse in exclusion list, skipping"
        exit(0)
        
    FidoFileID = getFidoID(opts.filePath)
    FidoVersion = getFidoVersion()
    fileID = getArchivematicaFileID(FidoFileID, FidoVersion)
    print "Found file ID {%s}: %s" % (fileID, FidoFileID) 
    insertIntoFileIds(opts.fileUUID, fileID)
    
    eventDetailText = 'program="Fido"; version="%s"' % (FidoVersion)
    eventOutcomeText='Positive'
    eventOutcomeDetailNote=FidoFileID
    insertIntoEvents(fileUUID=opts.fileUUID, \
                         eventIdentifierUUID=uuid.uuid4().__str__(), \
                         eventType="format identification", \
                         eventDateTime=opts.date, \
                         eventDetail=eventDetailText, \
                         eventOutcome=eventOutcomeText, \
                         eventOutcomeDetailNote=eventOutcomeDetailNote)
    
    
    

    taskUUID = sys.argv[4]

    command = 'clamdscan  - <"' + escapeForCommand(target) + '"'
    print >>sys.stderr, command
    commandVersion = "clamdscan -V"
    eventOutcome = "Pass"

    clamscanOutput = executeOrRun("bashScript", command, printing=False)
    clamscanVersionOutput = executeOrRun("command", commandVersion, printing=False)

    if clamscanOutput[0] or clamscanVersionOutput[0]:
        if clamscanVersionOutput[0]:
            print >>sys.stderr, clamscanVersionOutput
            exit(2)
        else:
            eventOutcome = "Fail"

    if eventOutcome == "Fail" or clamscanOutput[1].find(clamscanResultShouldBe) == -1:
        eventOutcome = "Fail"
        print >>sys.stderr, fileUUID, " - ", os.path.basename(target)
        print >>sys.stderr, clamscanOutput

    version, virusDefs, virusDefsDate = clamscanVersionOutput[1].split("/")
    virusDefs = virusDefs + "/" + virusDefsDate
    eventDetailText = "program=\"Clam AV\"; version=\"" + version + "\"; virusDefinitions=\"" + virusDefs + "\""

    if fileUUID != "None":
        insertIntoEvents(fileUUID=fileUUID, eventIdentifierUUID=taskUUID, eventType="virus check", eventDateTime=date, eventDetail=eventDetailText, eventOutcome=eventOutcome, eventOutcomeDetailNote="")
    if eventOutcome != "Pass":
        exit(3)