def addFileToTransfer( filePathRelativeToSIP, fileUUID, transferUUID, taskUUID, date, sourceType="ingestion", eventDetail="", use="original", originalLocation=None, ): if not originalLocation: originalLocation = filePathRelativeToSIP file_obj = insertIntoFiles( fileUUID, filePathRelativeToSIP, date, transferUUID=transferUUID, use=use, originalLocation=originalLocation, ) insertIntoEvents( fileUUID=fileUUID, eventType=sourceType, eventDateTime=date, eventDetail=eventDetail, eventOutcome="", eventOutcomeDetailNote="", ) addAccessionEvent(fileUUID, transferUUID, date) return file_obj
def create_db_entries(job, mapping, dataverse_agent_id): """ Create event and derivatives entries for the derived tabular data in the database. """ for entry, file_entry in mapping.items(): if entry.derived_from and entry.use == 'derivative': original_uuid = mapping[entry.derived_from].uuid event_uuid = uuid.uuid4() # Add event databaseFunctions.insertIntoEvents( original_uuid, eventIdentifierUUID=event_uuid, eventType="derivation", eventDateTime=None, # From Dataverse? eventDetail="", # From Dataverse? eventOutcome="", # From Dataverse? eventOutcomeDetailNote=file_entry.currentlocation, agents=[dataverse_agent_id], ) # Add derivation databaseFunctions.insertIntoDerivations( sourceFileUUID=original_uuid, derivedFileUUID=file_entry.uuid, relatedEventUUID=event_uuid, ) job.pyprint( 'Added derivation from', original_uuid, 'to', file_entry.uuid)
def includeFits(fits, xmlFile, date, eventUUID, fileUUID): global exitCode #TO DO... Gleam the event outcome information from the output #</CREATE formatIdentificationFITSAssist EVENTS> eventDetailText, eventOutcomeText, eventOutcomeDetailNotes = formatIdentificationFITSAssist(fits, fileUUID) for eventOutcomeDetailNote in eventOutcomeDetailNotes: insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=uuid.uuid4().__str__(), \ eventType="format identification", \ eventDateTime=date, \ eventDetail=eventDetailText, \ eventOutcome=eventOutcomeText, \ eventOutcomeDetailNote=eventOutcomeDetailNote) #</CREATE formatIdentificationFITSAssist EVENTS> try: eventDetailText, eventOutcomeText, eventOutcomeDetailNote = formatValidationFITSAssist(fits) except: eventDetailText = "Failed" eventOutcomeText = "Failed" eventOutcomeDetailNote = "Failed" exitCode += 3 insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=uuid.uuid4().__str__(), \ eventType="validation", \ eventDateTime=date, \ eventDetail=eventDetailText, \ eventOutcome=eventOutcomeText, \ eventOutcomeDetailNote=eventOutcomeDetailNote)
def includeFits(fits, xmlFile, date, eventUUID, fileUUID): global exitCode #TO DO... Gleam the event outcome information from the output #</CREATE formatIdentificationFITSAssist EVENTS> eventDetailText, eventOutcomeText, eventOutcomeDetailNotes = formatIdentificationFITSAssist( fits, fileUUID) for eventOutcomeDetailNote in eventOutcomeDetailNotes: insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=uuid.uuid4().__str__(), \ eventType="format identification", \ eventDateTime=date, \ eventDetail=eventDetailText, \ eventOutcome=eventOutcomeText, \ eventOutcomeDetailNote=eventOutcomeDetailNote) #</CREATE formatIdentificationFITSAssist EVENTS> try: eventDetailText, eventOutcomeText, eventOutcomeDetailNote = formatValidationFITSAssist( fits) except: eventDetailText = "Failed" eventOutcomeText = "Failed" eventOutcomeDetailNote = "Failed" exitCode += 3 insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=uuid.uuid4().__str__(), \ eventType="validation", \ eventDateTime=date, \ eventDetail=eventDetailText, \ eventOutcome=eventOutcomeText, \ eventOutcomeDetailNote=eventOutcomeDetailNote)
def verifyChecksum(fileUUID, filePath, date, eventIdentifierUUID): f = File.objects.get(uuid=fileUUID) if f.checksum in ("", "None"): print >> sys.stderr, "No checksum found in database for file:", fileUUID, filePath exit(1) checksumFile = sha_for_file(filePath) eventOutcome = "" eventOutcomeDetailNote = "" exitCode = 0 if checksumFile != f.checksum: eventOutcomeDetailNote = str(checksumFile) + " != " + f.checksum eventOutcome = "Fail" exitCode = 2 print >> sys.stderr, "Checksums do not match:", fileUUID, filePath print >> sys.stderr, eventOutcomeDetailNote else: eventOutcomeDetailNote = "%s %s" % (str(checksumFile), "verified") eventOutcome = "Pass" exitCode = 0 databaseFunctions.insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=eventIdentifierUUID, \ eventType="fixity check", \ eventDateTime=date, \ eventOutcome=eventOutcome, \ eventOutcomeDetailNote=eventOutcomeDetailNote, \ eventDetail="program=\"python\"; module=\"hashlib.sha256()\"") exit(exitCode)
def create_db_entries(job, mapping, dataverse_agent_id): """ Create derivation event and derivative entries for the tabular bundle data in the transfer. """ for entry, file_entry in mapping.items(): if entry.derived_from and entry.use == "derivative": original_uuid = mapping[entry.derived_from].uuid event_uuid = uuid.uuid4() try: databaseFunctions.insertIntoEvents( original_uuid, eventIdentifierUUID=event_uuid, eventType="derivation", eventDateTime=None, eventDetail="", eventOutcome="", eventOutcomeDetailNote=file_entry.currentlocation, agents=[dataverse_agent_id], ) # Add derivation databaseFunctions.insertIntoDerivations( sourceFileUUID=original_uuid, derivedFileUUID=file_entry.uuid, relatedEventUUID=event_uuid, ) job.pyprint( "Added derivation from", original_uuid, "to", file_entry.uuid ) except django.db.IntegrityError: err_log = "Database integrity error, entry: {} for file {}".format( file_entry.currentlocation, file_entry.originallocation ) raise ParseDataverseError(err_log)
def insert_derivation_event(original_uuid, output_uuid, derivation_uuid, event_detail_output, outcome_detail_note, today=None): """ Add the derivation link for preservation files and the event. """ if today is None: today = timezone.now() # Add event information to current file databaseFunctions.insertIntoEvents( fileUUID=original_uuid, eventIdentifierUUID=derivation_uuid, eventType="normalization", eventDateTime=today, eventDetail=event_detail_output, eventOutcome="", eventOutcomeDetailNote=outcome_detail_note or "", ) # Add linking information between files databaseFunctions.insertIntoDerivations( sourceFileUUID=original_uuid, derivedFileUUID=output_uuid, relatedEventUUID=derivation_uuid, )
def _record_backlog_event(transfer_id, transfer_path, created_at): """Record backlog event in both the database and the transfer METS.""" mets_path = os.path.join( transfer_path, "metadata", "submissionDocumentation", "METS.xml" ) mets = metsrw.METSDocument().fromfile(mets_path) # Run all_files once, convert into a dict for faster lookups. fsentries = {entry.file_uuid: entry for entry in mets.all_files()} # Assuming the same agents apply to all files. agents = _transfer_agents(transfer_id) for file_obj in File.objects.filter(transfer_id=transfer_id).iterator(): try: fsentry = fsentries[file_obj.uuid] except KeyError: continue event_id, event_type = str(uuid.uuid4()), "placement in backlog" fsentry.add_premis_event( _premis_event_data(event_id, event_type, created_at, agents) ) insertIntoEvents( fileUUID=file_obj.uuid, eventIdentifierUUID=event_id, eventType=event_type, eventDateTime=created_at, agents=agents, ) mets.write(mets_path, pretty_print=True)
def main(file_path, file_uuid, sip_uuid): failed = False # Get file format try: fmt = FormatVersion.active.get(fileformatversion__file_uuid=file_uuid) except FormatVersion.DoesNotExist: rules = fmt = None if fmt: rules = FPRule.active.filter(format=fmt.uuid, purpose='validation') # Check for a default rule exists if not rules: rules = FPRule.active.filter(purpose='default_validation') for rule in rules: if rule.command.script_type in ('bashScript', 'command'): command_to_execute = replace_string_values(rule.command.command, file_=file_uuid, sip=sip_uuid, type_='file') args = [] else: command_to_execute = rule.command.command args = [file_path] print('Running', rule.command.description) exitstatus, stdout, stderr = executeOrRun(rule.command.script_type, command_to_execute, arguments=args) if exitstatus != 0: print('Command {} failed with exit status {}; stderr:'.format(rule.command.description, exitstatus), stderr, file=sys.stderr) failed = True continue print('Command {} completed with output {}'.format(rule.command.description, stdout)) # Parse output and generate an Event # Output is JSON in format: # { "eventOutcomeInformation": "pass", # "eventOutcomeDetailNote": "format=\"JPEG\"; version=\"1.01\"; result=\"Well-Formed and valid\"" } # Or # { "eventOutcomeInformation": "fail", # "eventOutcomeDetailNote": "format=\"Not detected\"; result=\"Not well-formed\"" } output = ast.literal_eval(stdout) event_detail = 'program="{tool.description}"; version="{tool.version}"'.format(tool=rule.command.tool) print('Creating validation event for {} ({})'.format(file_path, file_uuid)) databaseFunctions.insertIntoEvents( fileUUID=file_uuid, eventType='validation', eventDetail=event_detail, eventOutcome=output.get('eventOutcomeInformation'), eventOutcomeDetailNote=output.get('eventOutcomeDetailNote'), ) if failed: return -1 else: return 0
def xmlCreateFileAssociationBetween(originalFileFullPath, outputFromNormalizationFileFullPath, SIPFullPath, sipUUID, eventDetailText, eventOutcomeDetailNote, outputFileUUID=""): #assign file UUID date = databaseInterface.getUTCDate() if outputFileUUID == "": outputFileUUID = uuid.uuid4().__str__() originalFilePathRelativeToSIP = originalFileFullPath.replace(SIPFullPath,"%SIPDirectory%", 1) sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string(originalFilePathRelativeToSIP) + "' AND Files.sipUUID = '" + sipUUID + "';" print sql rows = databaseInterface.queryAllSQL(sql) print rows fileUUID = rows[0][0] filePathRelativeToSIP = outputFromNormalizationFileFullPath.replace(SIPFullPath,"%SIPDirectory%", 1) addFileToSIP(filePathRelativeToSIP, outputFileUUID, sipUUID, uuid.uuid4().__str__(), date, sourceType="creation", use="preservation") updateSizeAndChecksum(outputFileUUID, outputFromNormalizationFileFullPath, date, uuid.uuid4().__str__()) taskUUID = uuid.uuid4().__str__() insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=taskUUID, \ eventType="normalization", \ eventDateTime=date, \ eventDetail=eventDetailText, \ eventOutcome="", \ eventOutcomeDetailNote=eventOutcomeDetailNote) insertIntoDerivations(sourceFileUUID=fileUUID, derivedFileUUID=outputFileUUID, relatedEventUUID=taskUUID)
def includeFits(fits, xmlFile, date, eventUUID, fileUUID): global exitCode ##eventOutcome = createOutcomeInformation( eventOutcomeDetailNote = uuid) #TO DO... Gleam the event outcome information from the output #print etree.tostring(fits, pretty_print=True) #</CREATE formatIdentificationFITSAssist EVENTS> #try: eventDetailText, eventOutcomeText, eventOutcomeDetailNotes = formatIdentificationFITSAssist( fits, fileUUID) #except: if 0: eventDetailText = "Failed" eventOutcomeText = "Failed" eventOutcomeDetailNotes = ["Failed"] exitCode += 4 outcomeInformation = createOutcomeInformation("To be removed", eventOutcomeText) #formatIdentificationEvent = createEvent( eventUUID, "format identification", \ # eventDateTime=date, \ # eventDetailText=eventDetailText, \ # eOutcomeInformation=outcomeInformation) #eventOutcomeInformation = getTagged(formatIdentificationEvent, "eventOutcomeInformation")[0] #eventOutcomeDetail = getTagged(eventOutcomeInformation, "eventOutcomeDetail")[0] #eventOutcomeInformation.remove(eventOutcomeDetail) for eventOutcomeDetailNote in eventOutcomeDetailNotes: #eventOutcomeDetail = etree.SubElement(eventOutcomeInformation, "eventOutcomeDetail") #etree.SubElement(eventOutcomeDetail, "eventOutcomeDetailNote").text = eventOutcomeDetailNote insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=uuid.uuid4().__str__(), \ eventType="format identification", \ eventDateTime=date, \ eventDetail=eventDetailText, \ eventOutcome=eventOutcomeText, \ eventOutcomeDetailNote=eventOutcomeDetailNote) #</CREATE formatIdentificationFITSAssist EVENTS> try: eventDetailText, eventOutcomeText, eventOutcomeDetailNote = formatValidationFITSAssist( fits) except: eventDetailText = "Failed" eventOutcomeText = "Failed" eventOutcomeDetailNotes = "Failed" exitCode += 3 #outcomeInformation = createOutcomeInformation( eventOutcomeDetailNote, eventOutcomeText) #formatValidationEvent = createEvent( uuid.uuid4().__str__(), "validation", \ # eventDateTime=date, \ # eventDetailText=eventDetailText, \ # eOutcomeInformation=outcomeInformation) insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=uuid.uuid4().__str__(), \ eventType="validation", \ eventDateTime=date, \ eventDetail=eventDetailText, \ eventOutcome=eventOutcomeText, \ eventOutcomeDetailNote=eventOutcomeDetailNote)
def call(jobs): with transaction.atomic(): for job in jobs: with job.JobContext(): target = job.args[1] transferUUID = job.args[2] exitCode = verify_bag(job, target) if exitCode != 0: job.pyprint("Failed bagit compliance. Not restructuring.", file=sys.stderr) job.set_status(exitCode) else: try: restructureBagForComplianceFileUUIDsAssigned(job, target, transferUUID) except fileOperations.UpdateFileLocationFailed as e: job.set_status(e.code) continue files = File.objects.filter(removedtime__isnull=True, transfer_id=transferUUID, currentlocation__startswith="%transferDirectory%objects/").values_list('uuid') for uuid, in files: insertIntoEvents(fileUUID=uuid, eventType="fixity check", eventDetail="Bagit - verifypayloadmanifests", eventOutcome="Pass") job.set_status(exitCode)
def updateSizeAndChecksum(fileUUID, filePath, date, eventIdentifierUUID, fileSize=None, checksum=None, checksumType=None, add_event=True): """ Update a File with its size, checksum and checksum type. These are parameters that can be either generated or provided via keywords. Finally, insert the corresponding Event. This behavior can be cancelled using the boolean keyword 'add_event'. """ if not fileSize: fileSize = os.path.getsize(filePath) if not checksumType: checksumType = get_setting('checksum_type', 'sha256') if not checksum: checksum = get_file_checksum(filePath, checksumType) File.objects.filter(uuid=fileUUID).update(size=fileSize, checksum=checksum, checksumtype=checksumType) if add_event: insertIntoEvents( fileUUID=fileUUID, eventType='message digest calculation', eventDateTime=date, eventDetail='program="python"; module="hashlib.{}()"'.format( checksumType), eventOutcomeDetailNote=checksum)
def test_insert_into_events(self): assert Event.objects.filter(event_id="new_event").count() == 0 databaseFunctions.insertIntoEvents( fileUUID="88c8f115-80bc-4da4-a1e6-0158f5df13b9", eventIdentifierUUID="new_event", ) assert Event.objects.filter(event_id="new_event").count() == 1
def updateFileLocation(src, dst, eventType, eventDateTime, eventDetail, eventIdentifierUUID = uuid.uuid4().__str__(), fileUUID="None", sipUUID = None, transferUUID=None, eventOutcomeDetailNote = ""): """If the file uuid is not provided, will use the sip uuid and old path to find the file uuid""" src = unicodeToStr(src) dst = unicodeToStr(dst) fileUUID = unicodeToStr(fileUUID) if not fileUUID or fileUUID == "None": sql = "Need to define transferUUID or sipUUID" if sipUUID: sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string(src) + "' AND Files.sipUUID = '" + sipUUID + "';" elif transferUUID: sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string(src) + "' AND Files.transferUUID = '" + transferUUID + "';" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: fileUUID = unicodeToStr(row[0]) row = c.fetchone() sqlLock.release() if eventOutcomeDetailNote == "": eventOutcomeDetailNote = "Original name=\"%s\"; cleaned up name=\"%s\"" %(src, dst) #eventOutcomeDetailNote = eventOutcomeDetailNote.decode('utf-8') #CREATE THE EVENT if not fileUUID: print >>sys.stderr, "Unable to find file uuid for: ", src, " -> ", dst exit(6) insertIntoEvents(fileUUID=fileUUID, eventIdentifierUUID=eventIdentifierUUID, eventType=eventType, eventDateTime=eventDateTime, eventDetail=eventDetail, eventOutcome="", eventOutcomeDetailNote=eventOutcomeDetailNote) #UPDATE THE CURRENT FILE PATH sql = """UPDATE Files SET currentLocation='%s' WHERE fileUUID='%s';""" % (MySQLdb.escape_string(dst), fileUUID) databaseInterface.runSQL(sql)
def _execute_rule_command(self, rule): """Execute the FPR command of FPR rule ``rule`` against the file passed in to this client script. The output of that command determines what we print to stdout and stderr, and the nature of the validation event that we save to the db. We also copy the MediaConch policy file to the logs/ directory of the AIP if it has not already been copied there. """ result = 'passed' command_to_execute, args = self._get_command_to_execute(rule) self.job.pyprint('Running', rule.command.description) exitstatus, stdout, stderr = executeOrRun(rule.command.script_type, command_to_execute, arguments=args, printing=False, capture_output=True) try: output = json.loads(stdout) except ValueError: logger.exception( 'Unable to load an object from the malformed JSON in\n%s', stdout) raise if self.file_type in ('preservation', 'original'): self._save_to_logs_dir(output) if exitstatus == 0: self.job.pyprint('Command {} completed with output {}'.format( rule.command.description, stdout)) else: self.job.print_error( 'Command {} failed with exit status {}; stderr:'.format( rule.command.description, exitstatus), stderr) return 'failed' event_detail = ('program="{tool.description}";' ' version="{tool.version}"'.format( tool=rule.command.tool)) if output.get('eventOutcomeInformation') != 'pass': self.job.print_error( 'Command {descr} returned a non-pass outcome for the policy' ' check;\n\noutcome: {outcome}\n\ndetails: {details}.'.format( descr=rule.command.description, outcome=output.get('eventOutcomeInformation'), details=output.get('eventOutcomeDetailNote'))) result = 'failed' self.job.pyprint('Creating policy checking event for {} ({})'.format( self.file_path, self.file_uuid)) # Manually-normalized access derivatives have no file UUID so we can't # create a validation event for them. TODO/QUESTION: should we use the # UUID that was assigned to the manually normalized derivative during # transfer, i.e., the one that we retrieve in # ``_get_manually_normalized_access_derivative_file_uuid`` above? if not self.is_manually_normalized_access_derivative: databaseFunctions.insertIntoEvents( fileUUID=self.file_uuid, eventType='validation', # From PREMIS controlled vocab. eventDetail=event_detail, eventOutcome=output.get('eventOutcomeInformation'), eventOutcomeDetailNote=output.get('eventOutcomeDetailNote'), ) return result
def onceNormalized(command, opts, replacementDic): transcodedFiles = [] if not command.outputLocation: command.outputLocation = "" if os.path.isfile(command.outputLocation): transcodedFiles.append(command.outputLocation) elif os.path.isdir(command.outputLocation): for w in os.walk(command.outputLocation): path, directories, files = w for p in files: p = os.path.join(path, p) if os.path.isfile(p): transcodedFiles.append(p) elif command.outputLocation: print >> sys.stderr, command print >> sys.stderr, "Error - output file does not exist [" + command.outputLocation + "]" command.exitCode = -2 derivationEventUUID = uuid.uuid4().__str__() eventDetail = "" if command.eventDetailCommand != None: eventDetail = eventDetail = command.eventDetailCommand.stdOut for ef in transcodedFiles: if opts["commandClassifications"] == "preservation": # Add the new file to the sip filePathRelativeToSIP = ef.replace(opts["sipPath"], "%SIPDirectory%", 1) # addFileToSIP(filePathRelativeToSIP, fileUUID, sipUUID, taskUUID, date, sourceType="ingestion"): addFileToSIP( filePathRelativeToSIP, replacementDic["%outputFileUUID%"], opts["sipUUID"], uuid.uuid4().__str__(), opts["date"], sourceType="creation", use="preservation", ) # Calculate new file checksum # Add event information to current file insertIntoEvents( fileUUID=opts["fileUUID"], eventIdentifierUUID=derivationEventUUID, eventType="normalization", eventDateTime=opts["date"], eventDetail=eventDetail, eventOutcome="", eventOutcomeDetailNote=filePathRelativeToSIP, ) updateSizeAndChecksum(replacementDic["%outputFileUUID%"], ef, opts["date"], uuid.uuid4().__str__()) # Add linking information between files insertIntoDerivations( sourceFileUUID=opts["fileUUID"], derivedFileUUID=replacementDic["%outputFileUUID%"], relatedEventUUID=derivationEventUUID, ) replacementDic["%outputFileUUID%"] = uuid.uuid4().__str__() replacementDic["%postfix%"] = "-" + replacementDic["%outputFileUUID%"]
def addFileToSIP(filePathRelativeToSIP, fileUUID, sipUUID, taskUUID, date, sourceType="ingestion", use="original"): insertIntoFiles(fileUUID, filePathRelativeToSIP, date, sipUUID=sipUUID, use=use) insertIntoEvents(fileUUID=fileUUID, \ eventType=sourceType, \ eventDateTime=date, \ eventDetail="", \ eventOutcome="", \ eventOutcomeDetailNote="")
def test_insert_into_event_fetches_correct_agent_from_file(self): databaseFunctions.insertIntoEvents( fileUUID="88c8f115-80bc-4da4-a1e6-0158f5df13b9", eventIdentifierUUID="event_agent_id") agents = Event.objects.get(event_id="event_agent_id").agents assert agents.count() == 3 assert agents.get(id=1) assert agents.get(id=2) assert agents.get(id=5)
def includeFits(fits, xmlFile, date, eventUUID, fileUUID): global exitCode ##eventOutcome = createOutcomeInformation( eventOutcomeDetailNote = uuid) #TO DO... Gleam the event outcome information from the output #print etree.tostring(fits, pretty_print=True) #</CREATE formatIdentificationFITSAssist EVENTS> #try: eventDetailText, eventOutcomeText, eventOutcomeDetailNotes = formatIdentificationFITSAssist(fits, fileUUID) #except: if 0: eventDetailText = "Failed" eventOutcomeText = "Failed" eventOutcomeDetailNotes = ["Failed"] exitCode += 4 outcomeInformation = createOutcomeInformation( "To be removed", eventOutcomeText) #formatIdentificationEvent = createEvent( eventUUID, "format identification", \ # eventDateTime=date, \ # eventDetailText=eventDetailText, \ # eOutcomeInformation=outcomeInformation) #eventOutcomeInformation = getTagged(formatIdentificationEvent, "eventOutcomeInformation")[0] #eventOutcomeDetail = getTagged(eventOutcomeInformation, "eventOutcomeDetail")[0] #eventOutcomeInformation.remove(eventOutcomeDetail) for eventOutcomeDetailNote in eventOutcomeDetailNotes: #eventOutcomeDetail = etree.SubElement(eventOutcomeInformation, "eventOutcomeDetail") #etree.SubElement(eventOutcomeDetail, "eventOutcomeDetailNote").text = eventOutcomeDetailNote insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=uuid.uuid4().__str__(), \ eventType="format identification", \ eventDateTime=date, \ eventDetail=eventDetailText, \ eventOutcome=eventOutcomeText, \ eventOutcomeDetailNote=eventOutcomeDetailNote) #</CREATE formatIdentificationFITSAssist EVENTS> try: eventDetailText, eventOutcomeText, eventOutcomeDetailNote = formatValidationFITSAssist(fits) except: eventDetailText = "Failed" eventOutcomeText = "Failed" eventOutcomeDetailNotes = "Failed" exitCode += 3 #outcomeInformation = createOutcomeInformation( eventOutcomeDetailNote, eventOutcomeText) #formatValidationEvent = createEvent( uuid.uuid4().__str__(), "validation", \ # eventDateTime=date, \ # eventDetailText=eventDetailText, \ # eOutcomeInformation=outcomeInformation) insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=uuid.uuid4().__str__(), \ eventType="validation", \ eventDateTime=date, \ eventDetail=eventDetailText, \ eventOutcome=eventOutcomeText, \ eventOutcomeDetailNote=eventOutcomeDetailNote)
def updateFileLocation( src, dst, eventType="", eventDateTime="", eventDetail="", eventIdentifierUUID=uuid.uuid4().__str__(), fileUUID="None", sipUUID=None, transferUUID=None, eventOutcomeDetailNote="", createEvent=True, ): """ Updates file location in the database, and optionally writes an event for the sanitization to the database. Note that this does not actually move a file on disk. If the file uuid is not provided, will use the SIP uuid and the old path to find the file uuid. To suppress creation of an event, pass the createEvent keyword argument (for example, if the file moved due to the renaming of a parent directory and not the file itself). """ src = unicodeToStr(src) dst = unicodeToStr(dst) fileUUID = unicodeToStr(fileUUID) if not fileUUID or fileUUID == "None": kwargs = {"removedtime__isnull": True, "currentlocation": src} if sipUUID: kwargs["sip_id"] = sipUUID elif transferUUID: kwargs["transfer_id"] = transferUUID else: raise ValueError( "One of fileUUID, sipUUID, or transferUUID must be provided") f = File.objects.get(**kwargs) else: f = File.objects.get(uuid=fileUUID) # UPDATE THE CURRENT FILE PATH f.currentlocation = dst f.save() if not createEvent: return if eventOutcomeDetailNote == "": eventOutcomeDetailNote = 'Original name="%s"; cleaned up name="%s"' % ( src, dst) # CREATE THE EVENT insertIntoEvents( fileUUID=f.uuid, eventType=eventType, eventDateTime=eventDateTime, eventDetail=eventDetail, eventOutcome="", eventOutcomeDetailNote=eventOutcomeDetailNote, )
def addFileToSIP(filePathRelativeToSIP, fileUUID, sipUUID, taskUUID, date, sourceType="ingestion", use="original"): insertIntoFiles(fileUUID, filePathRelativeToSIP, date, sipUUID=sipUUID, use=use) insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=taskUUID, \ eventType=sourceType, \ eventDateTime=date, \ eventDetail="", \ eventOutcome="", \ eventOutcomeDetailNote="")
def onceNormalized(command): transcodedFiles = [] if not command.outputLocation: command.outputLocation = "" elif os.path.isfile(command.outputLocation): transcodedFiles.append(command.outputLocation) elif os.path.isdir(command.outputLocation): for w in os.walk(command.outputLocation): path, directories, files = w for p in files: p = os.path.join(path, p) if os.path.isfile(p): transcodedFiles.append(p) elif command.outputLocation: print >>sys.stderr, command print >>sys.stderr, "Error - output file does not exist [" + command.outputLocation + "]" command.exitCode = -2 derivationEventUUID = uuid.uuid4().__str__() for ef in transcodedFiles: global outputFileUUID global replacementDic global opts if opts.commandClassifications == "preservation": old = """xmlNormalize(outputFileUUID, \ ef, \ command.eventDetailCommand.stdOut, \ opts.fileUUID, \ opts.objectsDirectory, \ opts.taskUUID, \ opts.date, \ opts.logsDirectory, \ ) # {normalized; not normalized}""" #Add the new file to the sip filePathRelativeToSIP = ef.replace(opts.sipPath, "%SIPDirectory%", 1) # addFileToSIP(filePathRelativeToSIP, fileUUID, sipUUID, taskUUID, date, sourceType="ingestion"): addFileToSIP(filePathRelativeToSIP, outputFileUUID, opts.sipUUID, uuid.uuid4().__str__(), opts.date, sourceType="creation", use="preservation") #Calculate new file checksum print >>sys.stderr, "TODO: calculate new file checksum" #Add event information to current file insertIntoEvents(fileUUID=opts.fileUUID, \ eventIdentifierUUID=derivationEventUUID, \ eventType="normalization", \ eventDateTime=opts.date, \ eventDetail=command.eventDetailCommand.stdOut, \ eventOutcome="", \ eventOutcomeDetailNote=filePathRelativeToSIP) updateSizeAndChecksum(outputFileUUID, ef, opts.date, uuid.uuid4().__str__()) #Add linking information between files insertIntoDerivations(sourceFileUUID=opts.fileUUID, derivedFileUUID=outputFileUUID, relatedEventUUID=derivationEventUUID) outputFileUUID = uuid.uuid4().__str__() replacementDic["%postfix%"] = "-" + outputFileUUID
def addAccessionEvent(fileUUID, transferUUID, date): transfer = Transfer.objects.get(uuid=transferUUID) if transfer.accessionid: eventOutcomeDetailNote = "accession#" + MySQLdb.escape_string(transfer.accessionid) insertIntoEvents(fileUUID=fileUUID, eventType="registration", eventDateTime=date, eventDetail="", eventOutcome="", eventOutcomeDetailNote=eventOutcomeDetailNote)
def addFileToTransfer(filePathRelativeToSIP, fileUUID, transferUUID, taskUUID, date, sourceType="ingestion", eventDetail="", use="original"): #print filePathRelativeToSIP, fileUUID, transferUUID, taskUUID, date, sourceType, eventDetail, use insertIntoFiles(fileUUID, filePathRelativeToSIP, date, transferUUID=transferUUID, use=use) insertIntoEvents(fileUUID=fileUUID, \ eventType=sourceType, \ eventDateTime=date, \ eventDetail=eventDetail, \ eventOutcome="", \ eventOutcomeDetailNote="") addAccessionEvent(fileUUID, transferUUID, date)
def call(jobs): event_queue = [] for job in jobs: with job.JobContext(logger=logger): job.set_status(scan_file(event_queue, *job.args[1:])) with transaction.atomic(): for e in event_queue: insertIntoEvents(**e)
def addFileToTransfer(filePathRelativeToSIP, fileUUID, transferUUID, taskUUID, date, sourceType="ingestion", eventDetail="", use="original"): #print filePathRelativeToSIP, fileUUID, transferUUID, taskUUID, date, sourceType, eventDetail, use insertIntoFiles(fileUUID, filePathRelativeToSIP, date, transferUUID=transferUUID, use=use) insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=taskUUID, \ eventType=sourceType, \ eventDateTime=date, \ eventDetail=eventDetail, \ eventOutcome="", \ eventOutcomeDetailNote="") addAccessionEvent(fileUUID, transferUUID, date)
def updateSizeAndChecksum(fileUUID, filePath, date, eventIdentifierUUID): fileSize = os.path.getsize(filePath) checksum = str(sha_for_file(filePath)) File.objects.filter(uuid=fileUUID).update(size=fileSize, checksum=checksum) insertIntoEvents(fileUUID=fileUUID, \ eventType="message digest calculation", \ eventDateTime=date, \ eventDetail="program=\"python\"; module=\"hashlib.sha256()\"", \ eventOutcomeDetailNote=checksum)
def addAccessionEvent(fileUUID, transferUUID, date): sql = """SELECT accessionID FROM Transfers WHERE transferUUID = '%s';""" % (transferUUID) accessionID=databaseInterface.queryAllSQL(sql)[0][0] if accessionID: eventIdentifierUUID = uuid.uuid4().__str__() eventOutcomeDetailNote = "accession#" + MySQLdb.escape_string(accessionID) insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=eventIdentifierUUID, \ eventType="registration", \ eventDateTime=date, \ eventDetail="", \ eventOutcome="", \ eventOutcomeDetailNote=eventOutcomeDetailNote)
def onceNormalized(command, opts, replacementDic): transcodedFiles = [] if not command.outputLocation: command.outputLocation = "" if os.path.isfile(command.outputLocation): transcodedFiles.append(command.outputLocation) elif os.path.isdir(command.outputLocation): for w in os.walk(command.outputLocation): path, directories, files = w for p in files: p = os.path.join(path, p) if os.path.isfile(p): transcodedFiles.append(p) elif command.outputLocation: print >>sys.stderr, command print >>sys.stderr, "Error - output file does not exist [" + command.outputLocation + "]" command.exitCode = -2 derivationEventUUID = uuid.uuid4().__str__() eventDetail = "ArchivematicaFPRCommandID=\"%s\"" % (command.pk) if command.eventDetailCommand != None: eventDetail = '%s; %s' % (eventDetail, command.eventDetailCommand.stdOut) for ef in transcodedFiles: if opts["commandClassifications"] == "preservation": # TODO Add manual normalization for files of same name mapping #Add the new file to the sip filePathRelativeToSIP = ef.replace(opts["sipPath"], "%SIPDirectory%", 1) # addFileToSIP(filePathRelativeToSIP, fileUUID, sipUUID, taskUUID, date, sourceType="ingestion"): addFileToSIP(filePathRelativeToSIP, replacementDic["%outputFileUUID%"], opts["sipUUID"], uuid.uuid4().__str__(), opts["date"], sourceType="creation", use="preservation") #Calculate new file checksum #Add event information to current file insertIntoEvents(fileUUID=opts["fileUUID"], \ eventIdentifierUUID=derivationEventUUID, \ eventType="normalization", \ eventDateTime=opts["date"], \ eventDetail=eventDetail, \ eventOutcome="", \ eventOutcomeDetailNote=filePathRelativeToSIP) updateSizeAndChecksum(replacementDic["%outputFileUUID%"], ef, opts["date"], uuid.uuid4().__str__()) #Add linking information between files insertIntoDerivations(sourceFileUUID=opts["fileUUID"], derivedFileUUID=replacementDic["%outputFileUUID%"], relatedEventUUID=derivationEventUUID) sql = "INSERT INTO FilesIDs (fileUUID, formatName, formatVersion, formatRegistryName, formatRegistryKey) VALUES ('%s', '%s', NULL, NULL, NULL);" % (replacementDic["%outputFileUUID%"], command.outputFormat) databaseInterface.runSQL(sql) replacementDic["%outputFileUUID%"] = uuid.uuid4().__str__() replacementDic["%postfix%"] = "-" + replacementDic["%outputFileUUID%"]
def updateSizeAndChecksum(fileUUID, filePath, date, eventIdentifierUUID): fileSize = os.path.getsize(filePath).__str__() checksum = sha_for_file(filePath).__str__() sql = "UPDATE Files " + \ "SET fileSize='" + fileSize +"', checksum='" + checksum + "' " + \ "WHERE fileUUID='" + fileUUID + "'" databaseInterface.runSQL(sql) insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=eventIdentifierUUID, \ eventType="message digest calculation", \ eventDateTime=date, \ eventDetail="program=\"python\"; module=\"hashlib.sha256()\"", \ eventOutcomeDetailNote=checksum)
def xmlCreateFileAssociationBetween(originalFileFullPath, outputFromNormalizationFileFullPath, SIPFullPath, sipUUID, eventDetailText, eventOutcomeDetailNote, outputFileUUID=""): #assign file UUID date = databaseInterface.getUTCDate() if outputFileUUID == "": outputFileUUID = uuid.uuid4().__str__() originalFilePathRelativeToSIP = originalFileFullPath.replace( SIPFullPath, "%SIPDirectory%", 1) sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation = '" + MySQLdb.escape_string( originalFilePathRelativeToSIP ) + "' AND Files.sipUUID = '" + sipUUID + "';" print sql rows = databaseInterface.queryAllSQL(sql) print rows fileUUID = rows[0][0] filePathRelativeToSIP = outputFromNormalizationFileFullPath.replace( SIPFullPath, "%SIPDirectory%", 1) addFileToSIP(filePathRelativeToSIP, outputFileUUID, sipUUID, uuid.uuid4().__str__(), date, sourceType="creation", use="preservation") updateSizeAndChecksum(outputFileUUID, outputFromNormalizationFileFullPath, date, uuid.uuid4().__str__()) taskUUID = uuid.uuid4().__str__() insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=taskUUID, \ eventType="normalization", \ eventDateTime=date, \ eventDetail=eventDetailText, \ eventOutcome="", \ eventOutcomeDetailNote=eventOutcomeDetailNote) insertIntoDerivations(sourceFileUUID=fileUUID, derivedFileUUID=outputFileUUID, relatedEventUUID=taskUUID)
def write_premis_event(job, sip_uuid, checksum_type, event_outcome, event_outcome_detail_note): """Write the AIP-level "fixity check" PREMIS event.""" try: databaseFunctions.insertIntoEvents( fileUUID=sip_uuid, eventType='fixity check', eventDetail='program="python, bag"; module="hashlib.{}()"'.format( checksum_type), eventOutcome=event_outcome, eventOutcomeDetailNote=event_outcome_detail_note) except Exception as err: job.pyprint( 'Failed to write PREMIS event to database. Error: {error}'.format( error=err)) else: return event_outcome_detail_note
def insert_transcription_event(status, file_uuid, rule, relative_location): outcome = "transcribed" if status is 0 else "not transcribed" tool = rule.command.tool event_detail = u"program={}; version={}; command=\"{}\"".format(tool.description, tool.version, rule.command.command.replace('"', r'\"')) event_uuid = str(uuid4()) databaseFunctions.insertIntoEvents( fileUUID=file_uuid, eventIdentifierUUID=event_uuid, eventType="transcription", eventDetail=event_detail, eventOutcome=outcome, eventOutcomeDetailNote=relative_location ) return event_uuid
def write_identification_event(file_uuid, command, format=None, success=True): event_detail_text = 'program="{}"; version="{}"'.format( command.tool.description, command.tool.version) if success: event_outcome_text = "Positive" else: event_outcome_text = "Not identified" if not format: format = 'No Matching Format' date = getUTCDate() insertIntoEvents(fileUUID=file_uuid, eventIdentifierUUID=str(uuid.uuid4()), eventType="format identification", eventDateTime=date, eventDetail=event_detail_text, eventOutcome=event_outcome_text, eventOutcomeDetailNote=format)
def verify_checksum( job, file_uuid, path, checksum, checksumtype, event_id=None, date=None ): """ Verify the checksum of a given file, and create a fixity event. :param str file_uuid: UUID of the file to verify :param str path: Path of the file to verify :param str checksum: Checksum to compare against :param str checksumtype: Type of the provided checksum (md5, sha256, etc) :param str event_id: Event ID :param str date: Date of the event """ if event_id is None: event_id = str(uuid.uuid4()) if date is None: date = timezone.now().isoformat(" ") checksumtype = checksumtype.lower() generated_checksum = get_file_checksum(path, checksumtype) event_detail = 'program="python"; ' 'module="hashlib.{}()"'.format(checksumtype) if checksum != generated_checksum: job.pyprint("Checksum failed") event_outcome = "Fail" detail_note = "Dataverse checksum %s verification failed" % checksum else: job.pyprint("Checksum passed") event_outcome = "Pass" detail_note = "Dataverse checksum %s verified" % checksum databaseFunctions.insertIntoEvents( fileUUID=file_uuid, eventIdentifierUUID=event_id, eventType="fixity check", eventDateTime=date, eventDetail=event_detail, eventOutcome=event_outcome, eventOutcomeDetailNote=detail_note, )
def verifyChecksum(fileUUID, filePath, date, eventIdentifierUUID): sql = """SELECT checksum FROM Files WHERE fileUUID = '""" + fileUUID + "'" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() checksumDB = "" while row != None: checksumDB = row[0] row = c.fetchone() sqlLock.release() if checksumDB == None or checksumDB == "" or checksumDB == "None": print >> sys.stderr, "No checksum found in database for file:", fileUUID, filePath exit(1) checksumFile = sha_for_file(filePath) eventOutcome = "" eventOutcomeDetailNote = "" exitCode = 0 if checksumFile != checksumDB: eventOutcomeDetailNote = checksumFile.__str__( ) + " != " + checksumDB.__str__() eventOutcome = "Fail" exitCode = 2 print >> sys.stderr, "Checksums do not match:", fileUUID, filePath print >> sys.stderr, eventOutcomeDetailNote else: eventOutcomeDetailNote = "%s %s" % (checksumFile.__str__(), "verified") eventOutcome = "Pass" exitCode = 0 #insertIntoEvents(fileUUID="", eventIdentifierUUID="", eventType="", eventDateTime=databaseInterface.getUTCDate(), eventDetail="", eventOutcome="", eventOutcomeDetailNote="") databaseFunctions.insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=eventIdentifierUUID, \ eventType="fixity check", \ eventDateTime=date, \ eventOutcome=eventOutcome, \ eventOutcomeDetailNote=eventOutcomeDetailNote, \ eventDetail="program=\"python\"; module=\"hashlib.sha256()\"") exit(exitCode)
def verifyChecksum(fileUUID, filePath, date, eventIdentifierUUID): sql = """SELECT checksum FROM Files WHERE fileUUID = '""" + fileUUID + "'" c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() checksumDB = "" while row != None: checksumDB = row[0] row = c.fetchone() sqlLock.release() if checksumDB == None or checksumDB == "" or checksumDB == "None": print >>sys.stderr, "No checksum found in database for file:", fileUUID, filePath exit(1) checksumFile = sha_for_file(filePath) eventOutcome="" eventOutcomeDetailNote="" exitCode = 0 if checksumFile != checksumDB: eventOutcomeDetailNote = checksumFile.__str__() + " != " + checksumDB.__str__() eventOutcome="Fail" exitCode = 2 print >>sys.stderr, "Checksums do not match:", fileUUID, filePath print >>sys.stderr, eventOutcomeDetailNote else: eventOutcomeDetailNote = "%s %s" % (checksumFile.__str__(), "verified") eventOutcome="Pass" exitCode = 0 #insertIntoEvents(fileUUID="", eventIdentifierUUID="", eventType="", eventDateTime=databaseInterface.getUTCDate(), eventDetail="", eventOutcome="", eventOutcomeDetailNote="") databaseFunctions.insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=eventIdentifierUUID, \ eventType="fixity check", \ eventDateTime=date, \ eventOutcome=eventOutcome, \ eventOutcomeDetailNote=eventOutcomeDetailNote, \ eventDetail="program=\"python\"; module=\"hashlib.sha256()\"") exit(exitCode)
def verifyChecksum(fileUUID, filePath, date, eventIdentifierUUID): f = File.objects.get(uuid=fileUUID) if f.checksum in ('', 'None'): print('No checksum found in database for file:', fileUUID, filePath, file=sys.stderr) exit(1) checksumFile = get_file_checksum(filePath, f.checksumtype) eventOutcome = '' eventOutcomeDetailNote = '' exitCode = 0 if checksumFile != f.checksum: eventOutcomeDetailNote = str(checksumFile) + ' != ' + f.checksum eventOutcome = 'Fail' exitCode = 2 print('Checksums do not match:', fileUUID, filePath, file=sys.stderr) print(eventOutcomeDetailNote, file=sys.stderr) else: eventOutcomeDetailNote = '%s %s' % (str(checksumFile), 'verified') eventOutcome = 'Pass' exitCode = 0 databaseFunctions.insertIntoEvents( fileUUID=fileUUID, eventIdentifierUUID=str(uuid.uuid4()), eventType='fixity check', eventDateTime=date, eventOutcome=eventOutcome, eventOutcomeDetailNote=eventOutcomeDetailNote, eventDetail='program="python"; module="hashlib.{}()"'.format( f.checksumtype)) exit(exitCode)
except: print >>sys.stderr, "Error parsing: ", xmlFilePath exitCode += 1 continue #if extension.lower() != file[i+1:].lower(): # print >>sys.stderr, "Warning, extension mismatch(file/xml): ", file[:i], extension , file[i+1:] objectMD5 = md5_for_file(filePath) if objectMD5 == xmlMD5: print "File OK: ", xmlMD5, filePath.replace(transferPath, "%TransferDirectory%") fileID = getFileUUIDLike(filePath, transferPath, transferUUID, "transferUUID", "%transferDirectory%") for path, fileUUID in fileID.iteritems(): eventDetail = "program=\"python\"; module=\"hashlib.md5()\"" eventOutcome="Pass" eventOutcomeDetailNote = "%s %s" % (xmlFile.__str__(), "verified") eventIdentifierUUID=uuid.uuid4().__str__() databaseFunctions.insertIntoEvents(fileUUID=fileUUID, \ eventIdentifierUUID=eventIdentifierUUID, \ eventType="fixity check", \ eventDateTime=date, \ eventOutcome=eventOutcome, \ eventOutcomeDetailNote=eventOutcomeDetailNote, \ eventDetail=eventDetail) else: print >>sys.stderr, "Checksum mismatch: ", filePath.replace(transferPath, "%TransferDirectory%") exitCode += 1 quit(exitCode)
if exit != 0: print >>sys.stderr, "Failed test: ", command print >>sys.stderr, stdErr print >>sys.stderr exitCode += 1 else: print "Passed test: ", command if __name__ == '__main__': target = sys.argv[1] transferUUID = sys.argv[2] verifyBag(target) if exitCode != 0: print >>sys.stderr, "Failed bagit compliance. Not restructuring." exit(exitCode) restructureBagForComplianceFileUUIDsAssigned(target, transferUUID, "transferUUID") for i in range(len(verificationCommands)): print verificationCommands[i] print verificationCommandsOutputs[i] print sql = "SELECT Files.fileUUID FROM Files WHERE removedTime = 0 AND Files.currentLocation LIKE '\%transferDirectory\%objects/%' AND transferUUID = '" + transferUUID + "';" rows = databaseInterface.queryAllSQL(sql) for row in rows: insertIntoEvents(fileUUID=row[0], \ eventType="fixity check", \ eventDetail="Bagit - verifypayloadmanifests", \ eventOutcome="Pass") exit(exitCode)
# @package Archivematica # @subpackage archivematicaClientScript # @author Joseph Perry <*****@*****.**> # @version svn: $Id$ from optparse import OptionParser import sys sys.path.append("/usr/lib/archivematica/archivematicaCommon") from databaseFunctions import insertIntoEvents if __name__ == '__main__': parser = OptionParser() parser.add_option("-i", "--fileUUID", action="store", dest="fileUUID", default="") parser.add_option("-t", "--eventType", action="store", dest="eventType", default="") parser.add_option("-d", "--eventDateTime", action="store", dest="eventDateTime", default="") parser.add_option("-e", "--eventDetail", action="store", dest="eventDetail", default="") parser.add_option("-o", "--eventOutcome", action="store", dest="eventOutcome", default="") parser.add_option("-n", "--eventOutcomeDetailNote", action="store", dest="eventOutcomeDetailNote", default="") parser.add_option("-u", "--eventIdentifierUUID", action="store", dest="eventIdentifierUUID", default="") (opts, args) = parser.parse_args() insertIntoEvents(fileUUID=opts.fileUUID, \ eventIdentifierUUID=opts.eventIdentifierUUID, \ eventType=opts.eventType, \ eventDateTime=opts.eventDateTime, \ eventDetail=opts.eventDetail, \ eventOutcome=opts.eventOutcome, \ eventOutcomeDetailNote=opts.eventOutcomeDetailNote)
basename = os.path.basename(filePath) i = basename.rfind(".") dstFile = basename[:i] + "-" + fileUUID + basename[i:] dstDir = os.path.dirname(originalFilePath.replace("%SIPDirectory%", SIPDirectory, 1)) dst = os.path.join(dstDir, dstFile) dstR = dst.replace(SIPDirectory, "%SIPDirectory%", 1) if os.path.isfile(dst) or os.path.isdir(dst): print >>sys.stderr, "already exists:", dstR exit(2) #Rename the file or directory src to dst. If dst is a directory, OSError will be raised. On Unix, if dst exists and is a file, it will be replaced silently if the user has permission. The operation may fail on some Unix flavors if src and dst are on different filesystems. #see http://docs.python.org/2/library/os.html os.rename(filePath, dst) sql = """UPDATE Files SET currentLocation='%s' WHERE fileUUID='%s';""" % (dstR, fileUUID) databaseInterface.runSQL(sql) derivationEventUUID = uuid.uuid4().__str__() insertIntoEvents(fileUUID=originalFileUUID, \ eventIdentifierUUID=derivationEventUUID, \ eventType="normalization", \ eventDateTime=date, \ eventDetail="manual normalization", \ eventOutcome="", \ eventOutcomeDetailNote=dstR) #Add linking information between files insertIntoDerivations(sourceFileUUID=originalFileUUID, derivedFileUUID=fileUUID, relatedEventUUID=derivationEventUUID) exit(0)
if __name__ == '__main__': (opts, args) = parseArgs() while False: #used to stall the mcp and stop the client for testing this module import time time.sleep(10) if opts.fileGrpUse in ["DSPACEMETS", "maildirFile"]: print "file's fileGrpUse in exclusion list, skipping" exit(0) FidoFileID = getFidoID(opts.filePath) FidoVersion = getFidoVersion() fileID = getArchivematicaFileID(FidoFileID, FidoVersion) print "Found file ID {%s}: %s" % (fileID, FidoFileID) insertIntoFileIds(opts.fileUUID, fileID) eventDetailText = 'program="Fido"; version="%s"' % (FidoVersion) eventOutcomeText='Positive' eventOutcomeDetailNote=FidoFileID insertIntoEvents(fileUUID=opts.fileUUID, \ eventIdentifierUUID=uuid.uuid4().__str__(), \ eventType="format identification", \ eventDateTime=opts.date, \ eventDetail=eventDetailText, \ eventOutcome=eventOutcomeText, \ eventOutcomeDetailNote=eventOutcomeDetailNote)
taskUUID = sys.argv[4] command = 'clamdscan - <"' + escapeForCommand(target) + '"' print >>sys.stderr, command commandVersion = "clamdscan -V" eventOutcome = "Pass" clamscanOutput = executeOrRun("bashScript", command, printing=False) clamscanVersionOutput = executeOrRun("command", commandVersion, printing=False) if clamscanOutput[0] or clamscanVersionOutput[0]: if clamscanVersionOutput[0]: print >>sys.stderr, clamscanVersionOutput exit(2) else: eventOutcome = "Fail" if eventOutcome == "Fail" or clamscanOutput[1].find(clamscanResultShouldBe) == -1: eventOutcome = "Fail" print >>sys.stderr, fileUUID, " - ", os.path.basename(target) print >>sys.stderr, clamscanOutput version, virusDefs, virusDefsDate = clamscanVersionOutput[1].split("/") virusDefs = virusDefs + "/" + virusDefsDate eventDetailText = "program=\"Clam AV\"; version=\"" + version + "\"; virusDefinitions=\"" + virusDefs + "\"" if fileUUID != "None": insertIntoEvents(fileUUID=fileUUID, eventIdentifierUUID=taskUUID, eventType="virus check", eventDateTime=date, eventDetail=eventDetailText, eventOutcome=eventOutcome, eventOutcomeDetailNote="") if eventOutcome != "Pass": exit(3)