def postprocess(dbPath): ''' This is the post process module ''' if not os.path.exists(dbPath): logging.debug('PreProcess: can\'t find database at path') return datastore = DataStore(dbPath) loopcount = 0 while True: sleep(5) if loopcount % 10 == 0: logging.debug('PostProcess is alive') loopcount += 1 #calculate checksums on decrypted files data = datastore.recordsForReHashing() processRecordsReadyToBeHashed(data, datastore) #delete associated files as the job was successful amRecords = datastore.archiveManagerJobsReadyToComplete() for amRecord in amRecords: dataStoreRecords = datastore.recordsForUUID(amRecord.uuid) for record in dataStoreRecords: recordPath = record.fileName if configurationOptions().shouldDeleteOriginal == True: try: os.remove(recordPath) except OSError as e: info = 'PostProcess: Unable to delete the file %s' % (recordPath,) logging.debug(info) datastore.updateArchiveManagerJobAsComplete(amRecord) #move the associated files to the error box as the job had problems amRecords = datastore.archiveManagerJobsThatErrored() for amRecord in amRecords: logging.debug('performing clean up with ' + amRecord.amNumber) batchName = amRecord.amNumber destinationAMFolder = '' errorPath = '' dataStoreRecords = datastore.recordsForUUID(amRecord.uuid) for record in dataStoreRecords: pathStructureName = record.pathStructureName filePath = record.fileName currentPathStructure = configurationOptions().pathStructureWithName(pathStructureName) errorPath = currentPathStructure['errorBox'] print filePath destinationAMFolder = os.path.join(os.path.dirname(filePath), batchName) print 'This is where the working files will go.', destinationAMFolder if not os.path.exists(destinationAMFolder): try: os.mkdir(destinationAMFolder) except OSError as e: pass originalFileName = os.path.basename(filePath).split((batchName + "_"))[1] proposedAMPath = os.path.join(destinationAMFolder, originalFileName) try: # newPath = pathAfterSafelyMovingFileToDestinationFile(filePath, proposedAMPath) print filePath, proposedAMPath shutil.move(filePath, proposedAMPath) except Exception as e: info = 'There was an error moving a file at %s for Archive Manager job %s. This will need to be manually addressed.' % (filePath, batchName) sendFailureEmail(info) continue currentFiles = os.listdir(destinationAMFolder) filesInJob = amRecord.allFilesInRecord() areAllFilesInPlace = True for nFile in filesInJob: if nFile not in currentFiles: areAllFilesInPlace = False if areAllFilesInPlace == True: print "moving files to the error path" try: pathAfterSafelyMovingFolderToDestinationFolder(destinationAMFolder,errorPath) except Exception as e: info = 'PostProcess: Unable to move the file %s' % (filePath,) logging.debug(info) info = 'There was an error moving the folder %s into the outbox at %s' % (destinationAMFolder, errorPath) info = info + '\n' + 'This will need to be addressed manually' sendFailureEmail(info) continue datastore.updateArchiveManagerJobAsComplete(amRecord)
def checkArchiveManagerJobs(dbPath): logging = DefaultLogger() datastore = DataStore(dbPath) amRecords = datastore.archiveManagerJobsReadyToStart() for amRecord in amRecords: areAllFilesAvailableAndReady = True recordsInAMRecord = datastore.recordsForUUID(amRecord.uuid) filesInAMRecord = [x.fileName for x in recordsInAMRecord] filesInCurrentFolder = [] try: filesInCurrentFolder = os.listdir(amRecord.amPath) except Exception as e: pass isThereAnUnknownFilePresent = False for currentFile in filesInCurrentFolder: if currentFile not in filesInAMRecord: isThereAnUnknownFilePresent = True if isThereAnUnknownFilePresent == True: logging.debug('Unknown files are present') pass #report error for currentFile in filesInAMRecord: logging.debug('%s' % (currentFile)) lastComponent = os.path.basename(currentFile) if lastComponent not in filesInCurrentFolder: logging.debug('The following file is not yet available: %s' % (lastComponent)) areAllFilesAvailableAndReady = False if areAllFilesAvailableAndReady == False: logging.debug('Not all of the files are staged yet') continue canLockAllRecords = True data = datastore.recordsForUUID(amRecord.uuid) for record in data: filePath = record.fileName try: fileToCheck = open(filePath, 'rb') portalocker.lock(fileToCheck, portalocker.LOCK_EX) fileToCheck.close() logging.debug( 'Acquire File: proceeding to update the file status knowing that no one else is using it...' ) except Exception as e: logging.debug( 'Acquire File: unable to lock file as it is likely in use') canLockAllRecords = False if canLockAllRecords == False: logging.debug('Can not lock all of the records yet') continue for record in data: key_id = record.id filePath = record.fileName recordSize = int(record.fileSize) dateModifiedString = record.dateModified pathStructureName = record.pathStructureName operationType = record.operationType isBatch = record.isBatch batchName = record.batchName pathStructureName = record.pathStructureName newPath = filePath workingPath = configurationOptions().pathStructureWithName( pathStructureName)['workingBox'] proposedBatchName = batchName + "_" + os.path.basename(filePath) proposedPath = os.path.join(os.path.dirname(filePath), proposedBatchName) #we prepend the job name to the file here as it belongs to a batch try: if os.path.exists(proposedPath): raise Exception('file already exists') os.rename(filePath, proposedPath) filePath = proposedPath except Exception as e: #this is an unlikely occurrence info = 'There is a duplicate file in the queue for: ' + os.path.basename( filePath) + " " + e.message logging.debug(info) sendFailureEmail(info) continue #at this point, I need to subtract the file's main folder from the pathStructure['inBox'] #this moves the file from the inbox to the working path try: newPath = pathAfterSafelyMovingFileToDestinationFolder( filePath, workingPath) except Exception as e: logging.debug( 'This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved' ) logging.debug('Acquire File: Error moving file') info = 'There was a problem moving the file into into the queue for: ' + os.path.basename( filePath) info = info + '\n' + 'This will require manual intervention as the occurrence is unique.' sendFailureEmail(info) continue logging.debug( 'Acquire File: updating record file status and path....') datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
def checkArchiveManagerJobs(dbPath): logging = DefaultLogger() datastore = DataStore(dbPath) amRecords = datastore.archiveManagerJobsReadyToStart() for amRecord in amRecords: areAllFilesAvailableAndReady = True recordsInAMRecord = datastore.recordsForUUID(amRecord.uuid) filesInAMRecord = [x.fileName for x in recordsInAMRecord] filesInCurrentFolder = [] try: filesInCurrentFolder = os.listdir(amRecord.amPath) except Exception as e: pass isThereAnUnknownFilePresent = False for currentFile in filesInCurrentFolder: if currentFile not in filesInAMRecord: isThereAnUnknownFilePresent = True if isThereAnUnknownFilePresent == True: logging.debug('Unknown files are present') pass #report error for currentFile in filesInAMRecord: logging.debug('%s' % (currentFile)) lastComponent = os.path.basename(currentFile) if lastComponent not in filesInCurrentFolder: logging.debug('The following file is not yet available: %s' % (lastComponent)) areAllFilesAvailableAndReady = False if areAllFilesAvailableAndReady == False: logging.debug('Not all of the files are staged yet') continue canLockAllRecords = True data = datastore.recordsForUUID(amRecord.uuid) for record in data: filePath = record.fileName try: fileToCheck = open(filePath, 'rb') portalocker.lock(fileToCheck, portalocker.LOCK_EX) fileToCheck.close() logging.debug('Acquire File: proceeding to update the file status knowing that no one else is using it...') except Exception as e: logging.debug('Acquire File: unable to lock file as it is likely in use') canLockAllRecords = False if canLockAllRecords == False: logging.debug('Can not lock all of the records yet') continue for record in data: key_id = record.id filePath = record.fileName recordSize = int(record.fileSize) dateModifiedString = record.dateModified pathStructureName = record.pathStructureName operationType = record.operationType isBatch = record.isBatch batchName = record.batchName pathStructureName = record.pathStructureName newPath = filePath workingPath = configurationOptions().pathStructureWithName(pathStructureName)['workingBox'] proposedBatchName = batchName + "_" + os.path.basename(filePath) proposedPath = os.path.join(os.path.dirname(filePath), proposedBatchName) #we prepend the job name to the file here as it belongs to a batch try: if os.path.exists(proposedPath): raise Exception('file already exists') os.rename(filePath, proposedPath) filePath = proposedPath except Exception as e: #this is an unlikely occurrence info = 'There is a duplicate file in the queue for: ' + os.path.basename(filePath) + " " + e.message logging.debug(info) sendFailureEmail(info) continue #at this point, I need to subtract the file's main folder from the pathStructure['inBox'] #this moves the file from the inbox to the working path try: newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, workingPath) except Exception as e: logging.debug('This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved') logging.debug('Acquire File: Error moving file') info = 'There was a problem moving the file into into the queue for: ' + os.path.basename(filePath) info = info + '\n' + 'This will require manual intervention as the occurrence is unique.' sendFailureEmail(info) continue logging.debug('Acquire File: updating record file status and path....') datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
def postprocess(dbPath): ''' This is the post process module ''' if not os.path.exists(dbPath): logging.debug('PreProcess: can\'t find database at path') return datastore = DataStore(dbPath) loopcount = 0 while True: sleep(5) if loopcount % 10 == 0: logging.debug('PostProcess is alive') loopcount += 1 #calculate checksums on decrypted files data = datastore.recordsForReHashing() processRecordsReadyToBeHashed(data, datastore) #delete associated files as the job was successful amRecords = datastore.archiveManagerJobsReadyToComplete() for amRecord in amRecords: dataStoreRecords = datastore.recordsForUUID(amRecord.uuid) for record in dataStoreRecords: recordPath = record.fileName if configurationOptions().shouldDeleteOriginal == True: try: os.remove(recordPath) except OSError as e: info = 'PostProcess: Unable to delete the file %s' % ( recordPath, ) logging.debug(info) datastore.updateArchiveManagerJobAsComplete(amRecord) #move the associated files to the error box as the job had problems amRecords = datastore.archiveManagerJobsThatErrored() for amRecord in amRecords: logging.debug('performing clean up with ' + amRecord.amNumber) batchName = amRecord.amNumber destinationAMFolder = '' errorPath = '' dataStoreRecords = datastore.recordsForUUID(amRecord.uuid) for record in dataStoreRecords: pathStructureName = record.pathStructureName filePath = record.fileName currentPathStructure = configurationOptions( ).pathStructureWithName(pathStructureName) errorPath = currentPathStructure['errorBox'] print filePath destinationAMFolder = os.path.join(os.path.dirname(filePath), batchName) print 'This is where the working files will go.', destinationAMFolder if not os.path.exists(destinationAMFolder): try: os.mkdir(destinationAMFolder) except OSError as e: pass originalFileName = os.path.basename(filePath).split( (batchName + "_"))[1] proposedAMPath = os.path.join(destinationAMFolder, originalFileName) try: # newPath = pathAfterSafelyMovingFileToDestinationFile(filePath, proposedAMPath) print filePath, proposedAMPath shutil.move(filePath, proposedAMPath) except Exception as e: info = 'There was an error moving a file at %s for Archive Manager job %s. This will need to be manually addressed.' % ( filePath, batchName) sendFailureEmail(info) continue currentFiles = os.listdir(destinationAMFolder) filesInJob = amRecord.allFilesInRecord() areAllFilesInPlace = True for nFile in filesInJob: if nFile not in currentFiles: areAllFilesInPlace = False if areAllFilesInPlace == True: print "moving files to the error path" try: pathAfterSafelyMovingFolderToDestinationFolder( destinationAMFolder, errorPath) except Exception as e: info = 'PostProcess: Unable to move the file %s' % ( filePath, ) logging.debug(info) info = 'There was an error moving the folder %s into the outbox at %s' % ( destinationAMFolder, errorPath) info = info + '\n' + 'This will need to be addressed manually' sendFailureEmail(info) continue datastore.updateArchiveManagerJobAsComplete(amRecord)