def preprocess(dbPath): ''' This is a preprocess module ''' logging = DefaultLogger() if not os.path.exists(dbPath): logging.debug('PreProcess: can\'t find database at path') return datastore = DataStore(dbPath) loopcount = 0 while True: sleep(5) if loopcount % 10 == 0: logging.debug('PreProcess is alive') loopcount += 1 data = datastore.recordsForHashing() for record in data: logging.debug(record) key_id = record.id filePath = record.fileName if not os.path.exists(filePath): logging.debug( 'PreProcess: Will update record status as the file no longer exists' ) datastore.updateRecordAsMissingWithID(key_id) continue try: logging.debug('PreProcess: locking file to calculate hash...') ##UPDATE HASH OPERATION START HERE startTime = datetime.datetime.now().strftime( "%Y-%m-%d %H:%M:%S") datastore.updateRecordWithHashStart(startTime, key_id) fileToHash = open(filePath, 'rb') portalocker.lock(fileToHash, portalocker.LOCK_SH) hashString = "NO_OP" #hashForFile(fileToHash) endTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") fileToHash.close() logging.debug('PreProcess: unlocking file...') logging.debug( 'PreProcess: Will update record status with Hash string and times' ) datastore.updateRecordWithHashForStartTimeAndEndTime( hashString, startTime, endTime, key_id) except Exception as e: info = 'PreProcess: There was an error when calculating the hash for file: ' + os.path.basename( filePath) + ' ' + e.message sendFailureEmail(info) logging.error(e.message)
def preprocess(dbPath): ''' This is a preprocess module ''' logging = DefaultLogger() if not os.path.exists(dbPath): logging.debug('PreProcess: can\'t find database at path') return datastore = DataStore(dbPath) loopcount = 0 while True: sleep(5) if loopcount % 10 == 0: logging.debug('PreProcess is alive') loopcount += 1 data = datastore.recordsForHashing() for record in data: logging.debug(record) key_id = record.id filePath = record.fileName if not os.path.exists(filePath): logging.debug('PreProcess: Will update record status as the file no longer exists') datastore.updateRecordAsMissingWithID(key_id) continue try: logging.debug('PreProcess: locking file to calculate hash...') ##UPDATE HASH OPERATION START HERE startTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") datastore.updateRecordWithHashStart(startTime, key_id) fileToHash = open(filePath, 'rb') portalocker.lock(fileToHash, portalocker.LOCK_SH) hashString = "NO_OP"#hashForFile(fileToHash) endTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") fileToHash.close() logging.debug('PreProcess: unlocking file...') logging.debug('PreProcess: Will update record status with Hash string and times') datastore.updateRecordWithHashForStartTimeAndEndTime(hashString, startTime, endTime, key_id) except Exception as e: info = 'PreProcess: There was an error when calculating the hash for file: ' + os.path.basename(filePath) + ' ' + e.message sendFailureEmail(info) logging.error(e.message)
def jsonStringForAMNumber(amNumber): if amNumber == 'X9X9X9X9X9': return testDataForX9X9X9X9X9() try: address = 'http://archivemanager.mts.inbcu.com/ArchiveManager/api/requests?folder=' + str(amNumber) r = requests.get(address) if r.status_code == 200: return r.text except Exception as e: info = 'The server is unable to retrieve data from Archive Manager at %s for the Archive Manager entry %s. This likely means that the number is incorrect or the connection to Archive Manager is faulty.' info = info % (address, amNumber) sendFailureEmail(info) return None
def jsonStringForAMNumber(amNumber): if amNumber == 'X9X9X9X9X9': return testDataForX9X9X9X9X9() try: address = 'http://archivemanager.mts.inbcu.com/ArchiveManager/api/requests?folder=' + str( amNumber) r = requests.get(address) if r.status_code == 200: return r.text except Exception as e: info = 'The server is unable to retrieve data from Archive Manager at %s for the Archive Manager entry %s. This likely means that the number is incorrect or the connection to Archive Manager is faulty.' info = info % (address, amNumber) sendFailureEmail(info) return None
def metaDataStringForNumber(self, numberString): if self.isDevMode == True: return self.mockDataAsString(numberString) try: # address = 'http://daisy.inbcu.com/daisy/asset/metadata/' + str(numberString) address = "http://qa.daisy.nbcuni.ge.com/daisy/asset/metadata/" + str(numberString) r = requests.get(address) if r.status_code == 200: return r.text except Exception as e: print e.message errorString = 'Unable to retrive daisy asset metadata for ' + str(numberString) + "." errorString = errorString + " " + "Additional information: " + str(e.message) sendFailureEmail(errorString) return '[]'
def metaDataStringForNumber(self, numberString): if self.isDevMode == True: return self.mockDataAsString(numberString) try: # address = 'http://daisy.inbcu.com/daisy/asset/metadata/' + str(numberString) address = "http://qa.daisy.nbcuni.ge.com/daisy/asset/metadata/" + str( numberString) r = requests.get(address) if r.status_code == 200: return r.text except Exception as e: print e.message errorString = 'Unable to retrive daisy asset metadata for ' + str( numberString) + "." errorString = errorString + " " + "Additional information: " + str( e.message) sendFailureEmail(errorString) return '[]'
def processRecordsReadyToBeHashed(data, datastore): logging = DefaultLogger() for record in data: logging.debug(record) key_id = record.id sourceFilePath = record.fileName filePath = record.operationFileName recordOperationType = record.operationType pathStructureName = record.pathStructureName isBatch = record.isBatch batchName = record.batchName currentPathStructure = configurationOptions().pathStructureWithName(pathStructureName) finalPath = currentPathStructure['outBox'] finalOriginalDestinationPath = currentPathStructure['originalBox'] errorPath = currentPathStructure['errorBox'] if not os.path.exists(filePath): # if the processed file doesn't exist, then move update the record and move to the error box # ADD LOGIC FOR BATCH PROCESSING logging.debug('PostProcess: Will update record status as the encrypted file does not exist') newPath = pathAfterSafelyMovingFileToDestinationFolder(sourceFilePath, errorPath) datastore.updateRecordAsMissingWithFileNameAndID(newPath, key_id) continue #CALCULATE HASH startTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") datastore.updateRecordWithReHashStart(startTime, key_id) hashString = 'NO_HASH' #only hash files being decrypyted if recordOperationType == 'Decrypt': try: fileToHash = open(filePath, 'rb') logging.debug('PostProcess: locked file to calculate hash...') portalocker.lock(fileToHash, portalocker.LOCK_SH) hashString = hashForFile(fileToHash) logging.debug('PostProcess Hasher: unlocking file...') fileToHash.close() except Exception as e: hashString = 'HASH_GEN_ERROR' else: hashString = "NO_HASH_FOR_ENCRYPTED_FILES" endTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") #ONLY DECRYPTED FILES' HASH IS CHECKED didChecksumFail = False checkSumErrorString = None if recordOperationType == 'Decrypt': fileBaseName = os.path.basename(filePath) if isBatch: fileBaseName = os.path.basename(filePath).split((batchName + "_"))[1] daisyNumber = getDaisyNumber(fileBaseName) try: errorRecordStatus = 0 if daisyNumber == None: errorString = 'There was an error Decrypting the file: ' + fileBaseName + '.\n' errorString = errorString + 'Unable to retrieve Daisy Number for ' + filePath + ' ' + batchName logging.debug(errorString) errorRecordStatus = datastore.daisyEntryNotFoundStatusCode() raise Exception(errorString) originalChecksum = DaisyMetadataLookup(daisyNumber).checksumForFile(fileBaseName) if originalChecksum == None: errorString = 'There was an error Decrypting the file: ' + fileBaseName + '.\n' errorString = errorString + 'Unable to retrieve Checksum for ' + filePath + ' ' + batchName logging.debug(errorString) errorRecordStatus = datastore.checksumLookupFailedStatusCode() raise Exception(errorString) if originalChecksum.upper() != hashString.upper(): errorString = 'Checksums do not match for file ' + filePath + '\n' errorString = errorString + ' ' + batchName + " expected the checksum: " + originalChecksum + '\n' errorString = errorString + " but found this checksum instead:" + hashString logging.debug(errorString) errorRecordStatus = datastore.checksumComparisonFailedStatusCode() raise Exception(errorString) except Exception as checksumException: #we have an error, so we must create a new folder in the error path #if the file is non-batch, then logging.debug('PostProcess: The checksum failed. Please see the appropriate Error Box') checkSumErrorString = 'There was a checksum error.' + '\n' + checksumException.message didChecksumFail = True #If the file failed a checksum and is not a bacth file, then move it to the error box if didChecksumFail == True and isBatch == False: errorPathInformation = '' try: logging.debug('PostProcess: creating a Decrypted Checksum folder') errorDestination = createSafeFolderInDestinationFolder(errorPath, 'DECRYPTED_CHECKSUM_ERROR') try: info = 'Moving the file that errored into the folder at ' + errorDestination logging.debug(info) shutil.move(filePath, os.path.join(errorDestination,fileBaseName)) errorPathInformation = info except Exception as e: info = "PostProcess: " + e.message + ' an error occurred moving the file: ' + fileBaseName + ' to ' + errorDestination logging.debug(info) except Exception as e: info = 'PostProcess: An error occurred when moving the decrypted file in to the Error box' logging.debug(info) #THEN MOVE THE ENCRYPTED FILE ASIDE TO THE ERROR BOX try: info = 'Moving the source file into the error box at ' + errorPath logging.debug(info) newPath = pathAfterSafelyMovingFileToDestinationFolder(sourceFilePath, errorPath) errorPathInformation = errorPathInformation + '\n' + info except Exception as e: info = "PostProcess: " + e.message + ' an error occurred moving the file: ' + sourceFilePath logging.debug(info) datastore.updateRecordStatusWithID(errorRecordStatus, key_id) info = checksumException.message + '\n' + errorPathInformation logging.debug(info) sendFailureEmail(info) continue #Lets now address the batch decrypted files newPath = filePath success = False if isBatch == True and recordOperationType == 'Decrypt': #create the destination folder for the Archive Manager Job amRecord = datastore.recordWithNumberFromAMJobsTable(batchName) if amRecord is None: #This should not happen as we don't even allow for the logic to proceed to this point without #a valid Archive Manager Record info = 'An error occurred where no data was found for the Archive Manager job ' + batchName + '\n' info = info + 'This error should not happen. Please check ' + os.path.dirname(filePath) + '\n' info = info + 'The files will need to be manually removed from the Decryption Queue.' logging.debug(info) sendFailureEmail(info) continue if didChecksumFail == True: #add checksum error string to archive manager job amRecord.errorString = amRecord.errorString + '\n' + checkSumErrorString datastore.updateArchiveManagerJobErrorString(amRecord, amRecord.errorString) #create the new folder in interim where we will push all of the batch files destinationAMFolder = os.path.join(os.path.dirname(filePath), batchName) if not os.path.exists(destinationAMFolder): try: os.mkdir(destinationAMFolder) except OSError as e: pass #get the file name, strip leading archive manager number originalFileName = os.path.basename(filePath) if isBatch == True: originalFileName = os.path.basename(filePath).split((batchName + "_"))[1] #this is where we will move the interim file, a new folder with its original name proposedAMPath = os.path.join(destinationAMFolder, originalFileName) #at this point the file should be in the a folder named after the batch try: newPath = pathAfterSafelyMovingFileToDestinationFile(filePath, proposedAMPath) except Exception as e: info = 'There was an error moving a file at %s for Archive Manager job %s. This will need to be manually addressed.' % (filePath, batchName) sendFailureEmail(info) continue if os.path.basename(originalFileName) != os.path.basename(newPath): #there was a collision, there really is no reason why this should happen, but lets account for it errorString = 'For some reason, there already exists a file in %s labeled %s' % (destinationAMFolder, originalFileName) + '\n' amRecord.errorString = amRecord.errorString + '\n' + errorString datastore.updateArchiveManagerJobErrorString(amRecord, amRecord.errorString) success = datastore.updateRecordWithFinalEncryptedPathAndHashForStartTimeAndEndTime(newPath, hashString, startTime, endTime, key_id) currentFiles = visibleFilesInFolder(destinationAMFolder) amPath = amRecord.amPath filesInJob = amRecord.allFilesInRecord() #are we finished, are all the files in place or the batch job? try: areAllFilesInPlace = True for nFile in filesInJob: if nFile not in currentFiles: areAllFilesInPlace = False if areAllFilesInPlace == False: continue logging.debug('All files are in place') try: #remove old source folder logging.debug('PostProcess: removing original inbox') shutil.rmtree(amPath) except OSError as e: info = "PostProcess: " + e.message logging.debug(info) info = 'There was a problem removing the folder %s from the inbox after decrypting all of the files in the job.' % (amPath) sendFailureEmail(info) #refresh the record amRecord = datastore.recordWithNumberFromAMJobsTable(batchName) if amRecord is None: #This should not happen as we don't even allow for the logic to proceed to this point without #a valid Archive Manager Record info = 'An error occurred where no data was found for the Archive Manager job ' + batchName + '\n' info = info + 'This error should not happen. Please check ' + destinationAMFolder + '\n' info = info + 'The files will need to be manually removed from the Decryption Queue.' logging.debug(info) sendFailureEmail(info) continue #if there is an error, the redirect to the error box if amRecord.errorString != '': finalPath = errorPath #move the error files into a folder that indicates they are errors, it will live in the error box try: if datastore.updateArchiveManagerJobAsErrored(amRecord) == True: logging.debug('Job has finished, but there were some errors') logging.debug('PostProcess: will send email') info = 'Job %s has some errors! Please see the ErrorBox at %s' % (batchName, errorPath) info = info + '\n' + amRecord.errorString sendFailureEmail(info) else: logging.debug('PostProcess: Error saving Job') errDirname = os.path.dirname(destinationAMFolder) errBasename = os.path.basename(destinationAMFolder) + '_DECRYPTED_ERROR' os.rename(destinationAMFolder, os.path.join(errDirname, errBasename)) destinationAMFolder = os.path.join(errDirname, errBasename) # shutil.move(destinationAMFolder, errorPath) pathAfterSafelyMovingFolderToDestinationFolder(destinationAMFolder, errorPath) except Exception as e: info = 'An error occurred when moving the errored files to %s.' % (errorPath,) logging.debug(info) sendFailureEmail(info) else: #No errors, move the files to the appropriate place print "No Errors finalPath", finalPath try: logging.debug('PostProcess: moving archive mananger folder to final destination') if os.path.exists(os.path.join(finalPath, os.path.basename(destinationAMFolder))): logging.debug('PostProcess: collision moving to duplicate box') altPath = pathAfterSafelyMovingFileToDestinationFolder(destinationAMFolder, finalPath) else: shutil.move(destinationAMFolder, finalPath) if datastore.updateArchiveManagerJobAsReadyToComplete(amRecord) == True: logging.debug('PostProcess: job is ready to complete') logging.debug('PostProcess: moving files and sending email') info = 'Job %s is complete! All of the files are decrypted and have appropriate matching checksums.' % (batchName) sendSuccessEmail(info) else: logging.debug('PostProcess: Error saving Job') except OSError as e: #again, I am accounting for this error, I just don't know why I would ever encounter a situation like this info = 'There was a problem moving the folder %s to the outbox. You will have to move the file manually.' % (destinationAMFolder) info = info + " " + e.message sendFailureEmail(info) logging.debug(info) continue except Exception as e: info = 'An error occurred. Please see check the Decryption Queue for job %s. See Error: %s' % (batchName, e.message) logging.debug(info) sendFailureEmail(info) else: #LAST CASE FOR SINGLE MODE FILES LIKE ENCRYPTION AND SINGLE MODE DECRYPTION newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, finalPath) if not os.path.exists(newPath): logging.debug('PostProcess: Error moving file') continue logging.debug('PostProcess: Will update record status with Hash string and times') success = datastore.updateRecordWithFinalEncryptedPathAndHashForStartTimeAndEndTime(newPath, hashString, startTime, endTime, key_id) if success == True: # move original file to original box try: newPath = pathAfterSafelyMovingFileToDestinationFolder(sourceFilePath, finalOriginalDestinationPath) except Exception as e: logging.debug('There was an error moving the file into place') info = 'There was an error moving file %s into the outbox at %s' % (sourceFilePath, finalOriginalDestinationPath) sendFailureEmail(info) if configurationOptions().shouldDeleteOriginal == True: try: os.remove(newPath) except OSError as e: logging.debug('PostProcess: Unable to delete the file', newPath)
def postprocess(dbPath): ''' This is the post process module ''' if not os.path.exists(dbPath): logging.debug('PreProcess: can\'t find database at path') return datastore = DataStore(dbPath) loopcount = 0 while True: sleep(5) if loopcount % 10 == 0: logging.debug('PostProcess is alive') loopcount += 1 #calculate checksums on decrypted files data = datastore.recordsForReHashing() processRecordsReadyToBeHashed(data, datastore) #delete associated files as the job was successful amRecords = datastore.archiveManagerJobsReadyToComplete() for amRecord in amRecords: dataStoreRecords = datastore.recordsForUUID(amRecord.uuid) for record in dataStoreRecords: recordPath = record.fileName if configurationOptions().shouldDeleteOriginal == True: try: os.remove(recordPath) except OSError as e: info = 'PostProcess: Unable to delete the file %s' % (recordPath,) logging.debug(info) datastore.updateArchiveManagerJobAsComplete(amRecord) #move the associated files to the error box as the job had problems amRecords = datastore.archiveManagerJobsThatErrored() for amRecord in amRecords: logging.debug('performing clean up with ' + amRecord.amNumber) batchName = amRecord.amNumber destinationAMFolder = '' errorPath = '' dataStoreRecords = datastore.recordsForUUID(amRecord.uuid) for record in dataStoreRecords: pathStructureName = record.pathStructureName filePath = record.fileName currentPathStructure = configurationOptions().pathStructureWithName(pathStructureName) errorPath = currentPathStructure['errorBox'] print filePath destinationAMFolder = os.path.join(os.path.dirname(filePath), batchName) print 'This is where the working files will go.', destinationAMFolder if not os.path.exists(destinationAMFolder): try: os.mkdir(destinationAMFolder) except OSError as e: pass originalFileName = os.path.basename(filePath).split((batchName + "_"))[1] proposedAMPath = os.path.join(destinationAMFolder, originalFileName) try: # newPath = pathAfterSafelyMovingFileToDestinationFile(filePath, proposedAMPath) print filePath, proposedAMPath shutil.move(filePath, proposedAMPath) except Exception as e: info = 'There was an error moving a file at %s for Archive Manager job %s. This will need to be manually addressed.' % (filePath, batchName) sendFailureEmail(info) continue currentFiles = os.listdir(destinationAMFolder) filesInJob = amRecord.allFilesInRecord() areAllFilesInPlace = True for nFile in filesInJob: if nFile not in currentFiles: areAllFilesInPlace = False if areAllFilesInPlace == True: print "moving files to the error path" try: pathAfterSafelyMovingFolderToDestinationFolder(destinationAMFolder,errorPath) except Exception as e: info = 'PostProcess: Unable to move the file %s' % (filePath,) logging.debug(info) info = 'There was an error moving the folder %s into the outbox at %s' % (destinationAMFolder, errorPath) info = info + '\n' + 'This will need to be addressed manually' sendFailureEmail(info) continue datastore.updateArchiveManagerJobAsComplete(amRecord)
def checkSingleFiles(dbPath): logging = DefaultLogger() if not os.path.exists(dbPath): logging.debug('Acquire File: can\'t find database at path') return datastore = DataStore(dbPath) data = datastore.recordsForVerifying() for record in data: key_id = record.id filePath = record.fileName recordSize = int(record.fileSize) dateModifiedString = record.dateModified pathStructureName = record.pathStructureName operationType = record.operationType isBatch = record.isBatch batchName = record.batchName dateLastModified = datetime.datetime.strptime(dateModifiedString, '%Y-%m-%d %H:%M:%S') timeDifference = datetime.datetime.now() - dateLastModified #This can change with an if/else should I decide I want to put temp files to be decrypted in another place sourcePath = configurationOptions().pathStructureWithName(pathStructureName)['inBox'] workingPath = configurationOptions().pathStructureWithName(pathStructureName)['workingBox'] if timeDifference.seconds < verificationWaitTime: continue lastSize = recordSize currentSize = 0 if not os.path.exists(filePath): logging.debug('Acquire File: Will update record status as the file no longer exists') datastore.updateRecordAsMissingWithID(key_id) continue currentSize = os.path.getsize(filePath) if lastSize != currentSize: logging.debug(record) logging.debug('Acquire File: attempting db modify as file size has changed...') datastore.updateRecordWithCurrentSizeAndDateModifiedWithID(currentSize, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), key_id) continue if currentSize == 0: continue # if the current size is zero, then continue until it isn't or never will be # its likely the file has been queued to copy but no data has been moved yet (actual OSX case) logging.debug('Acquire File: attempting to lock the file to see if I own the file yet...') try: fileToCheck = open(filePath, 'rb') portalocker.lock(fileToCheck, portalocker.LOCK_EX) fileToCheck.close() logging.debug('Acquire File: proceeding to update the file status knowing that no one else is using it...') except Exception as e: logging.debug('Acquire File: unable to lock file as it is likely in use') continue if datastore.doesTheFilePathExistElseWhereInThePathStructure(filePath, operationType, pathStructureName) == True: duplicatePath = configurationOptions().pathStructureWithName(pathStructureName)['duplicateBox'] newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, duplicatePath) datastore.updateRecordAsDuplicateWithNewPath(newPath, key_id) continue newPath = filePath #Only update if isBatch == 1 and operationType == 'Decrypt': amRecord = None uuidString = fileNameForUUIDFileWithPath(os.path.dirname(filePath)) if uuidString == None: #if I can't resolve the UUID, then resovle it though an AM Record #Does file's Archive Manager have data associated with it amRecord = datastore.recordWithNumberFromAMJobsTable(batchName) if amRecord == None: info = "Acquire File: Archive Manager data doesn't exist for " + filePath info = info + " " + "Marking file as having no AM Data. File will not be moved through the processing queue." logging.debug(info) datastore.updateRecordStatusWithID(datastore.noArchiveManagerDataExistsForRecord(), key_id) continue else: logging.debug('Updating record %s with UUID %s' % (filePath, uuidString)) amRecord = datastore.archiveManagerJobsTableRecordWithUUID(uuidString) datastore.updateRecordAWithBatchUUIDReference(uuidString, key_id) else: #at this point, I need to subtract the file's main folder from the pathStructure['inBox'] #this moves the file from the inbox to the working path try: newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, workingPath) except Exception as e: logging.debug('This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved') logging.debug('Acquire File: Error moving file') info = 'There was a problem moving the file into into the queue for: ' + os.path.basename(filePath) info = info + '\n' + 'This will require manual intervention as the occurrence is unique.' sendFailureEmail(info) continue logging.debug('Acquire File: updating record file status and path....') datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
def checkArchiveManagerJobs(dbPath): logging = DefaultLogger() datastore = DataStore(dbPath) amRecords = datastore.archiveManagerJobsReadyToStart() for amRecord in amRecords: areAllFilesAvailableAndReady = True recordsInAMRecord = datastore.recordsForUUID(amRecord.uuid) filesInAMRecord = [x.fileName for x in recordsInAMRecord] filesInCurrentFolder = [] try: filesInCurrentFolder = os.listdir(amRecord.amPath) except Exception as e: pass isThereAnUnknownFilePresent = False for currentFile in filesInCurrentFolder: if currentFile not in filesInAMRecord: isThereAnUnknownFilePresent = True if isThereAnUnknownFilePresent == True: logging.debug('Unknown files are present') pass #report error for currentFile in filesInAMRecord: logging.debug('%s' % (currentFile)) lastComponent = os.path.basename(currentFile) if lastComponent not in filesInCurrentFolder: logging.debug('The following file is not yet available: %s' % (lastComponent)) areAllFilesAvailableAndReady = False if areAllFilesAvailableAndReady == False: logging.debug('Not all of the files are staged yet') continue canLockAllRecords = True data = datastore.recordsForUUID(amRecord.uuid) for record in data: filePath = record.fileName try: fileToCheck = open(filePath, 'rb') portalocker.lock(fileToCheck, portalocker.LOCK_EX) fileToCheck.close() logging.debug('Acquire File: proceeding to update the file status knowing that no one else is using it...') except Exception as e: logging.debug('Acquire File: unable to lock file as it is likely in use') canLockAllRecords = False if canLockAllRecords == False: logging.debug('Can not lock all of the records yet') continue for record in data: key_id = record.id filePath = record.fileName recordSize = int(record.fileSize) dateModifiedString = record.dateModified pathStructureName = record.pathStructureName operationType = record.operationType isBatch = record.isBatch batchName = record.batchName pathStructureName = record.pathStructureName newPath = filePath workingPath = configurationOptions().pathStructureWithName(pathStructureName)['workingBox'] proposedBatchName = batchName + "_" + os.path.basename(filePath) proposedPath = os.path.join(os.path.dirname(filePath), proposedBatchName) #we prepend the job name to the file here as it belongs to a batch try: if os.path.exists(proposedPath): raise Exception('file already exists') os.rename(filePath, proposedPath) filePath = proposedPath except Exception as e: #this is an unlikely occurrence info = 'There is a duplicate file in the queue for: ' + os.path.basename(filePath) + " " + e.message logging.debug(info) sendFailureEmail(info) continue #at this point, I need to subtract the file's main folder from the pathStructure['inBox'] #this moves the file from the inbox to the working path try: newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, workingPath) except Exception as e: logging.debug('This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved') logging.debug('Acquire File: Error moving file') info = 'There was a problem moving the file into into the queue for: ' + os.path.basename(filePath) info = info + '\n' + 'This will require manual intervention as the occurrence is unique.' sendFailureEmail(info) continue logging.debug('Acquire File: updating record file status and path....') datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
def checkSingleFiles(dbPath): logging = DefaultLogger() if not os.path.exists(dbPath): logging.debug('Acquire File: can\'t find database at path') return datastore = DataStore(dbPath) data = datastore.recordsForVerifying() for record in data: key_id = record.id filePath = record.fileName recordSize = int(record.fileSize) dateModifiedString = record.dateModified pathStructureName = record.pathStructureName operationType = record.operationType isBatch = record.isBatch batchName = record.batchName dateLastModified = datetime.datetime.strptime(dateModifiedString, '%Y-%m-%d %H:%M:%S') timeDifference = datetime.datetime.now() - dateLastModified #This can change with an if/else should I decide I want to put temp files to be decrypted in another place sourcePath = configurationOptions().pathStructureWithName( pathStructureName)['inBox'] workingPath = configurationOptions().pathStructureWithName( pathStructureName)['workingBox'] if timeDifference.seconds < verificationWaitTime: continue lastSize = recordSize currentSize = 0 if not os.path.exists(filePath): logging.debug( 'Acquire File: Will update record status as the file no longer exists' ) datastore.updateRecordAsMissingWithID(key_id) continue currentSize = os.path.getsize(filePath) if lastSize != currentSize: logging.debug(record) logging.debug( 'Acquire File: attempting db modify as file size has changed...' ) datastore.updateRecordWithCurrentSizeAndDateModifiedWithID( currentSize, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), key_id) continue if currentSize == 0: continue # if the current size is zero, then continue until it isn't or never will be # its likely the file has been queued to copy but no data has been moved yet (actual OSX case) logging.debug( 'Acquire File: attempting to lock the file to see if I own the file yet...' ) try: fileToCheck = open(filePath, 'rb') portalocker.lock(fileToCheck, portalocker.LOCK_EX) fileToCheck.close() logging.debug( 'Acquire File: proceeding to update the file status knowing that no one else is using it...' ) except Exception as e: logging.debug( 'Acquire File: unable to lock file as it is likely in use') continue if datastore.doesTheFilePathExistElseWhereInThePathStructure( filePath, operationType, pathStructureName) == True: duplicatePath = configurationOptions().pathStructureWithName( pathStructureName)['duplicateBox'] newPath = pathAfterSafelyMovingFileToDestinationFolder( filePath, duplicatePath) datastore.updateRecordAsDuplicateWithNewPath(newPath, key_id) continue newPath = filePath #Only update if isBatch == 1 and operationType == 'Decrypt': amRecord = None uuidString = fileNameForUUIDFileWithPath(os.path.dirname(filePath)) if uuidString == None: #if I can't resolve the UUID, then resovle it though an AM Record #Does file's Archive Manager have data associated with it amRecord = datastore.recordWithNumberFromAMJobsTable(batchName) if amRecord == None: info = "Acquire File: Archive Manager data doesn't exist for " + filePath info = info + " " + "Marking file as having no AM Data. File will not be moved through the processing queue." logging.debug(info) datastore.updateRecordStatusWithID( datastore.noArchiveManagerDataExistsForRecord(), key_id) continue else: logging.debug('Updating record %s with UUID %s' % (filePath, uuidString)) amRecord = datastore.archiveManagerJobsTableRecordWithUUID( uuidString) datastore.updateRecordAWithBatchUUIDReference( uuidString, key_id) else: #at this point, I need to subtract the file's main folder from the pathStructure['inBox'] #this moves the file from the inbox to the working path try: newPath = pathAfterSafelyMovingFileToDestinationFolder( filePath, workingPath) except Exception as e: logging.debug( 'This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved' ) logging.debug('Acquire File: Error moving file') info = 'There was a problem moving the file into into the queue for: ' + os.path.basename( filePath) info = info + '\n' + 'This will require manual intervention as the occurrence is unique.' sendFailureEmail(info) continue logging.debug( 'Acquire File: updating record file status and path....') datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
def checkArchiveManagerJobs(dbPath): logging = DefaultLogger() datastore = DataStore(dbPath) amRecords = datastore.archiveManagerJobsReadyToStart() for amRecord in amRecords: areAllFilesAvailableAndReady = True recordsInAMRecord = datastore.recordsForUUID(amRecord.uuid) filesInAMRecord = [x.fileName for x in recordsInAMRecord] filesInCurrentFolder = [] try: filesInCurrentFolder = os.listdir(amRecord.amPath) except Exception as e: pass isThereAnUnknownFilePresent = False for currentFile in filesInCurrentFolder: if currentFile not in filesInAMRecord: isThereAnUnknownFilePresent = True if isThereAnUnknownFilePresent == True: logging.debug('Unknown files are present') pass #report error for currentFile in filesInAMRecord: logging.debug('%s' % (currentFile)) lastComponent = os.path.basename(currentFile) if lastComponent not in filesInCurrentFolder: logging.debug('The following file is not yet available: %s' % (lastComponent)) areAllFilesAvailableAndReady = False if areAllFilesAvailableAndReady == False: logging.debug('Not all of the files are staged yet') continue canLockAllRecords = True data = datastore.recordsForUUID(amRecord.uuid) for record in data: filePath = record.fileName try: fileToCheck = open(filePath, 'rb') portalocker.lock(fileToCheck, portalocker.LOCK_EX) fileToCheck.close() logging.debug( 'Acquire File: proceeding to update the file status knowing that no one else is using it...' ) except Exception as e: logging.debug( 'Acquire File: unable to lock file as it is likely in use') canLockAllRecords = False if canLockAllRecords == False: logging.debug('Can not lock all of the records yet') continue for record in data: key_id = record.id filePath = record.fileName recordSize = int(record.fileSize) dateModifiedString = record.dateModified pathStructureName = record.pathStructureName operationType = record.operationType isBatch = record.isBatch batchName = record.batchName pathStructureName = record.pathStructureName newPath = filePath workingPath = configurationOptions().pathStructureWithName( pathStructureName)['workingBox'] proposedBatchName = batchName + "_" + os.path.basename(filePath) proposedPath = os.path.join(os.path.dirname(filePath), proposedBatchName) #we prepend the job name to the file here as it belongs to a batch try: if os.path.exists(proposedPath): raise Exception('file already exists') os.rename(filePath, proposedPath) filePath = proposedPath except Exception as e: #this is an unlikely occurrence info = 'There is a duplicate file in the queue for: ' + os.path.basename( filePath) + " " + e.message logging.debug(info) sendFailureEmail(info) continue #at this point, I need to subtract the file's main folder from the pathStructure['inBox'] #this moves the file from the inbox to the working path try: newPath = pathAfterSafelyMovingFileToDestinationFolder( filePath, workingPath) except Exception as e: logging.debug( 'This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved' ) logging.debug('Acquire File: Error moving file') info = 'There was a problem moving the file into into the queue for: ' + os.path.basename( filePath) info = info + '\n' + 'This will require manual intervention as the occurrence is unique.' sendFailureEmail(info) continue logging.debug( 'Acquire File: updating record file status and path....') datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
def on_created(self, event): ''' if the path is a folder, then retrieve the archive manager request, save the the result to a new table called amjobs. the table will have the following columns id, data, complete. The data will have the json data from which i can retrieve all of the files. ''' if os.path.isdir(os.path.abspath(event.src_path)): info = "Created Folder: " + event.src_path + " " + str(getsizeFolder(event.src_path)) logging.debug(info) try: droppedFolder = event.src_path.split(self.pathStructure['inBox'])[1].split(os.sep)[1] pathComponents = [elem for elem in droppedFolder.split(os.sep) if elem != ''] if len(pathComponents) == 1: info = "will add " + droppedFolder + " to path" logging.debug(info) amDataAsString = jsonStringForAMNumber(droppedFolder) if isAMDataValid(amDataAsString) == False: self.dataStore.addAndMarkArchiveManagerJobToDataBaseAsUnkown(droppedFolder, event.src_path) errorString = '''A folder was added to the Decrypt Path %s for which no Archive Manager Data was found. Check the name of the folder that was dropped and make sure that the Archive Manager request exists and that the Archive Manager is accessible. Files added to this folder will not be Decrypted until the error is resolved.''' errorString = errorString % self.pathStructure['inBox'] raise Exception(errorString) uuid = createFileWithUUIDatPath(event.src_path) self.dataStore.addArchiveManagerJobToDataBaseWithUUID(droppedFolder, amDataAsString, event.src_path, uuid) elif len(pathComponents) > 1: logging.debug('This folder path is nested and will not be accepted') raise Exception('failed to get data from server') except Exception as e: info = e.message logging.debug(info) sendFailureEmail(info) else: #file try: droppedFile = event.src_path.split(self.pathStructure['inBox'])[1] pathComponents = [elem for elem in droppedFile.split(os.sep) if elem != ''] if os.path.basename(event.src_path) in ['Thumbs.db', '.DS_Store']: pass elif os.path.basename(event.src_path).startswith('UUID_'): pass elif len(pathComponents) == 1: #single file pathToAdd = pathComponents[0] self.dataStore.addFilePathToDataBaseStoreWithType(os.path.abspath(event.src_path), self.pathStructure['watchType'], self.pathStructure['name']) info = "Created: " + pathToAdd + " " + str(os.path.getsize(event.src_path)) logging.debug(info) elif len(pathComponents) == 2: #ADD BATCH FLAG AND AM FOLDER NAME batchName = pathComponents[0] self.dataStore.addBatchFilePathToDataBaseStoreWithType(os.path.abspath(event.src_path), self.pathStructure['watchType'], self.pathStructure['name'], batchName) info = "Created File: " + event.src_path + " " + str(os.path.getsize(event.src_path)) logging.debug(info) info = "will add " + str(pathComponents) + " to path" logging.debug(info) else: raise Exception('This file path is nested OR incomplete and will not be accepted') except Exception as e: #GENERATE ERROR EMAIL info = e.message logging.debug(info) sendFailureEmail(info)
def encrypt(dbPath): ''' This process examines the database pointed to by dbPath. It Looks for any records which have status 2 and has had a hash value calculated for it. ''' logging = DefaultLogger() if not os.path.exists(dbPath): logging.debug('can\'t find database at path') return datastore = DataStore(dbPath) loopcount = 0 while True: sleep(5) if loopcount % 10 == 0: logging.debug('Encryptor Process is alive') loopcount += 1 data = datastore.recordsReadyToEncrypt() for record in data: logging.debug(record) key_id = record.id filePath = record.fileName pathStructureName = record.pathStructureName if not os.path.exists(filePath): logging.debug('Encryptor: will update record status as the file no longer exists') datastore.updateRecordAsMissingWithID(key_id) else: options = configurationOptions() currentPathStructure = options.pathStructureWithName(pathStructureName) encryptionErrorPath = currentPathStructure['errorBox'] encryptionInterimPath = currentPathStructure['interimBox'] encryptedFilePath = os.path.join(encryptionInterimPath, os.path.basename(filePath)) encryptionStart = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") nextStatusValue = datastore.operationCompleteStatusCode() options.inputPath = filePath options.destinationPath = os.path.dirname(encryptedFilePath) ##UPDATE THAT ENCRYPTION STARTS HERE datastore.updateRecordStatusWithOperationStart(encryptionStart, key_id) message = 'Encryptor: encrypting file ' + filePath logging.debug(message) #there is a bug with MediaSeal when encrypting an encrypted file, #this checks for this so that MediaSeal doesn't blow away the file. returnCode = -7 fileToEncrypt = None try: fileToEncrypt = open(filePath, 'rb') portalocker.lock(fileToEncrypt, portalocker.LOCK_SH) returnCode = singleShotEncryptor(options) except Exception as e: logging.debug('unable to lock file') if fileToEncrypt is not None: fileToEncrypt.close() message = 'Encryptor: encrypted file with return code ' + str(returnCode) logging.debug(message) encryptionStop = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") #try again should the connection be bad if returnCode == 2: sleep(5) returnCode = singleShotEncryptor(options) encryptionStart = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") #as we are encrypting single files, we can leave this logic the same if returnCode != 0: info = "There was a problem encrypting " + filePath + ". Encountered Error Code: " + str(returnCode) + ". The file will be moved to the path's Error box: " + encryptionErrorPath sendFailureEmail(info) nextStatusValue = datastore.operationFailedStatusCode() encryptionStart = datetime.datetime(2000,1,1) encryptionStop = datetime.datetime(2000,1,1) if os.path.abspath(os.path.dirname(filePath)) != os.path.abspath(encryptionErrorPath): logging.debug('moving file to error path') if os.path.exists(encryptionErrorPath): try: newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, encryptionErrorPath) except Exception as e: logging.debug('Encryptor: Error moving file') nextStatusValue = datastore.errorMovingFileStatusCode() else: logging.debug('Encryptor: encryptionErrorPath doesnt exist') nextStatusValue = datastore.errorPathDoesntExistStatusCode() datastore.updateRecordStatusWithEncryptedFileNameAndStartAndEndTime(nextStatusValue, encryptedFilePath, encryptionStart, encryptionStop, key_id)
def processRecordsReadyToBeHashed(data, datastore): logging = DefaultLogger() for record in data: logging.debug(record) key_id = record.id sourceFilePath = record.fileName filePath = record.operationFileName recordOperationType = record.operationType pathStructureName = record.pathStructureName isBatch = record.isBatch batchName = record.batchName currentPathStructure = configurationOptions().pathStructureWithName( pathStructureName) finalPath = currentPathStructure['outBox'] finalOriginalDestinationPath = currentPathStructure['originalBox'] errorPath = currentPathStructure['errorBox'] if not os.path.exists(filePath): # if the processed file doesn't exist, then move update the record and move to the error box # ADD LOGIC FOR BATCH PROCESSING logging.debug( 'PostProcess: Will update record status as the encrypted file does not exist' ) newPath = pathAfterSafelyMovingFileToDestinationFolder( sourceFilePath, errorPath) datastore.updateRecordAsMissingWithFileNameAndID(newPath, key_id) continue #CALCULATE HASH startTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") datastore.updateRecordWithReHashStart(startTime, key_id) hashString = 'NO_HASH' #only hash files being decrypyted if recordOperationType == 'Decrypt': try: fileToHash = open(filePath, 'rb') logging.debug('PostProcess: locked file to calculate hash...') portalocker.lock(fileToHash, portalocker.LOCK_SH) hashString = hashForFile(fileToHash) logging.debug('PostProcess Hasher: unlocking file...') fileToHash.close() except Exception as e: hashString = 'HASH_GEN_ERROR' else: hashString = "NO_HASH_FOR_ENCRYPTED_FILES" endTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") #ONLY DECRYPTED FILES' HASH IS CHECKED didChecksumFail = False checkSumErrorString = None if recordOperationType == 'Decrypt': fileBaseName = os.path.basename(filePath) if isBatch: fileBaseName = os.path.basename(filePath).split( (batchName + "_"))[1] daisyNumber = getDaisyNumber(fileBaseName) try: errorRecordStatus = 0 if daisyNumber == None: errorString = 'There was an error Decrypting the file: ' + fileBaseName + '.\n' errorString = errorString + 'Unable to retrieve Daisy Number for ' + filePath + ' ' + batchName logging.debug(errorString) errorRecordStatus = datastore.daisyEntryNotFoundStatusCode( ) raise Exception(errorString) originalChecksum = DaisyMetadataLookup( daisyNumber).checksumForFile(fileBaseName) if originalChecksum == None: errorString = 'There was an error Decrypting the file: ' + fileBaseName + '.\n' errorString = errorString + 'Unable to retrieve Checksum for ' + filePath + ' ' + batchName logging.debug(errorString) errorRecordStatus = datastore.checksumLookupFailedStatusCode( ) raise Exception(errorString) if originalChecksum.upper() != hashString.upper(): errorString = 'Checksums do not match for file ' + filePath + '\n' errorString = errorString + ' ' + batchName + " expected the checksum: " + originalChecksum + '\n' errorString = errorString + " but found this checksum instead:" + hashString logging.debug(errorString) errorRecordStatus = datastore.checksumComparisonFailedStatusCode( ) raise Exception(errorString) except Exception as checksumException: #we have an error, so we must create a new folder in the error path #if the file is non-batch, then logging.debug( 'PostProcess: The checksum failed. Please see the appropriate Error Box' ) checkSumErrorString = 'There was a checksum error.' + '\n' + checksumException.message didChecksumFail = True #If the file failed a checksum and is not a bacth file, then move it to the error box if didChecksumFail == True and isBatch == False: errorPathInformation = '' try: logging.debug( 'PostProcess: creating a Decrypted Checksum folder') errorDestination = createSafeFolderInDestinationFolder( errorPath, 'DECRYPTED_CHECKSUM_ERROR') try: info = 'Moving the file that errored into the folder at ' + errorDestination logging.debug(info) shutil.move( filePath, os.path.join(errorDestination, fileBaseName)) errorPathInformation = info except Exception as e: info = "PostProcess: " + e.message + ' an error occurred moving the file: ' + fileBaseName + ' to ' + errorDestination logging.debug(info) except Exception as e: info = 'PostProcess: An error occurred when moving the decrypted file in to the Error box' logging.debug(info) #THEN MOVE THE ENCRYPTED FILE ASIDE TO THE ERROR BOX try: info = 'Moving the source file into the error box at ' + errorPath logging.debug(info) newPath = pathAfterSafelyMovingFileToDestinationFolder( sourceFilePath, errorPath) errorPathInformation = errorPathInformation + '\n' + info except Exception as e: info = "PostProcess: " + e.message + ' an error occurred moving the file: ' + sourceFilePath logging.debug(info) datastore.updateRecordStatusWithID(errorRecordStatus, key_id) info = checksumException.message + '\n' + errorPathInformation logging.debug(info) sendFailureEmail(info) continue #Lets now address the batch decrypted files newPath = filePath success = False if isBatch == True and recordOperationType == 'Decrypt': #create the destination folder for the Archive Manager Job amRecord = datastore.recordWithNumberFromAMJobsTable(batchName) if amRecord is None: #This should not happen as we don't even allow for the logic to proceed to this point without #a valid Archive Manager Record info = 'An error occurred where no data was found for the Archive Manager job ' + batchName + '\n' info = info + 'This error should not happen. Please check ' + os.path.dirname( filePath) + '\n' info = info + 'The files will need to be manually removed from the Decryption Queue.' logging.debug(info) sendFailureEmail(info) continue if didChecksumFail == True: #add checksum error string to archive manager job amRecord.errorString = amRecord.errorString + '\n' + checkSumErrorString datastore.updateArchiveManagerJobErrorString( amRecord, amRecord.errorString) #create the new folder in interim where we will push all of the batch files destinationAMFolder = os.path.join(os.path.dirname(filePath), batchName) if not os.path.exists(destinationAMFolder): try: os.mkdir(destinationAMFolder) except OSError as e: pass #get the file name, strip leading archive manager number originalFileName = os.path.basename(filePath) if isBatch == True: originalFileName = os.path.basename(filePath).split( (batchName + "_"))[1] #this is where we will move the interim file, a new folder with its original name proposedAMPath = os.path.join(destinationAMFolder, originalFileName) #at this point the file should be in the a folder named after the batch try: newPath = pathAfterSafelyMovingFileToDestinationFile( filePath, proposedAMPath) except Exception as e: info = 'There was an error moving a file at %s for Archive Manager job %s. This will need to be manually addressed.' % ( filePath, batchName) sendFailureEmail(info) continue if os.path.basename(originalFileName) != os.path.basename(newPath): #there was a collision, there really is no reason why this should happen, but lets account for it errorString = 'For some reason, there already exists a file in %s labeled %s' % ( destinationAMFolder, originalFileName) + '\n' amRecord.errorString = amRecord.errorString + '\n' + errorString datastore.updateArchiveManagerJobErrorString( amRecord, amRecord.errorString) success = datastore.updateRecordWithFinalEncryptedPathAndHashForStartTimeAndEndTime( newPath, hashString, startTime, endTime, key_id) currentFiles = visibleFilesInFolder(destinationAMFolder) amPath = amRecord.amPath filesInJob = amRecord.allFilesInRecord() #are we finished, are all the files in place or the batch job? try: areAllFilesInPlace = True for nFile in filesInJob: if nFile not in currentFiles: areAllFilesInPlace = False if areAllFilesInPlace == False: continue logging.debug('All files are in place') try: #remove old source folder logging.debug('PostProcess: removing original inbox') shutil.rmtree(amPath) except OSError as e: info = "PostProcess: " + e.message logging.debug(info) info = 'There was a problem removing the folder %s from the inbox after decrypting all of the files in the job.' % ( amPath) sendFailureEmail(info) #refresh the record amRecord = datastore.recordWithNumberFromAMJobsTable(batchName) if amRecord is None: #This should not happen as we don't even allow for the logic to proceed to this point without #a valid Archive Manager Record info = 'An error occurred where no data was found for the Archive Manager job ' + batchName + '\n' info = info + 'This error should not happen. Please check ' + destinationAMFolder + '\n' info = info + 'The files will need to be manually removed from the Decryption Queue.' logging.debug(info) sendFailureEmail(info) continue #if there is an error, the redirect to the error box if amRecord.errorString != '': finalPath = errorPath #move the error files into a folder that indicates they are errors, it will live in the error box try: if datastore.updateArchiveManagerJobAsErrored( amRecord) == True: logging.debug( 'Job has finished, but there were some errors') logging.debug('PostProcess: will send email') info = 'Job %s has some errors! Please see the ErrorBox at %s' % ( batchName, errorPath) info = info + '\n' + amRecord.errorString sendFailureEmail(info) else: logging.debug('PostProcess: Error saving Job') errDirname = os.path.dirname(destinationAMFolder) errBasename = os.path.basename( destinationAMFolder) + '_DECRYPTED_ERROR' os.rename(destinationAMFolder, os.path.join(errDirname, errBasename)) destinationAMFolder = os.path.join( errDirname, errBasename) # shutil.move(destinationAMFolder, errorPath) pathAfterSafelyMovingFolderToDestinationFolder( destinationAMFolder, errorPath) except Exception as e: info = 'An error occurred when moving the errored files to %s.' % ( errorPath, ) logging.debug(info) sendFailureEmail(info) else: #No errors, move the files to the appropriate place print "No Errors finalPath", finalPath try: logging.debug( 'PostProcess: moving archive mananger folder to final destination' ) if os.path.exists( os.path.join( finalPath, os.path.basename(destinationAMFolder))): logging.debug( 'PostProcess: collision moving to duplicate box' ) altPath = pathAfterSafelyMovingFileToDestinationFolder( destinationAMFolder, finalPath) else: shutil.move(destinationAMFolder, finalPath) if datastore.updateArchiveManagerJobAsReadyToComplete( amRecord) == True: logging.debug( 'PostProcess: job is ready to complete') logging.debug( 'PostProcess: moving files and sending email') info = 'Job %s is complete! All of the files are decrypted and have appropriate matching checksums.' % ( batchName) sendSuccessEmail(info) else: logging.debug('PostProcess: Error saving Job') except OSError as e: #again, I am accounting for this error, I just don't know why I would ever encounter a situation like this info = 'There was a problem moving the folder %s to the outbox. You will have to move the file manually.' % ( destinationAMFolder) info = info + " " + e.message sendFailureEmail(info) logging.debug(info) continue except Exception as e: info = 'An error occurred. Please see check the Decryption Queue for job %s. See Error: %s' % ( batchName, e.message) logging.debug(info) sendFailureEmail(info) else: #LAST CASE FOR SINGLE MODE FILES LIKE ENCRYPTION AND SINGLE MODE DECRYPTION newPath = pathAfterSafelyMovingFileToDestinationFolder( filePath, finalPath) if not os.path.exists(newPath): logging.debug('PostProcess: Error moving file') continue logging.debug( 'PostProcess: Will update record status with Hash string and times' ) success = datastore.updateRecordWithFinalEncryptedPathAndHashForStartTimeAndEndTime( newPath, hashString, startTime, endTime, key_id) if success == True: # move original file to original box try: newPath = pathAfterSafelyMovingFileToDestinationFolder( sourceFilePath, finalOriginalDestinationPath) except Exception as e: logging.debug( 'There was an error moving the file into place') info = 'There was an error moving file %s into the outbox at %s' % ( sourceFilePath, finalOriginalDestinationPath) sendFailureEmail(info) if configurationOptions().shouldDeleteOriginal == True: try: os.remove(newPath) except OSError as e: logging.debug('PostProcess: Unable to delete the file', newPath)
def postprocess(dbPath): ''' This is the post process module ''' if not os.path.exists(dbPath): logging.debug('PreProcess: can\'t find database at path') return datastore = DataStore(dbPath) loopcount = 0 while True: sleep(5) if loopcount % 10 == 0: logging.debug('PostProcess is alive') loopcount += 1 #calculate checksums on decrypted files data = datastore.recordsForReHashing() processRecordsReadyToBeHashed(data, datastore) #delete associated files as the job was successful amRecords = datastore.archiveManagerJobsReadyToComplete() for amRecord in amRecords: dataStoreRecords = datastore.recordsForUUID(amRecord.uuid) for record in dataStoreRecords: recordPath = record.fileName if configurationOptions().shouldDeleteOriginal == True: try: os.remove(recordPath) except OSError as e: info = 'PostProcess: Unable to delete the file %s' % ( recordPath, ) logging.debug(info) datastore.updateArchiveManagerJobAsComplete(amRecord) #move the associated files to the error box as the job had problems amRecords = datastore.archiveManagerJobsThatErrored() for amRecord in amRecords: logging.debug('performing clean up with ' + amRecord.amNumber) batchName = amRecord.amNumber destinationAMFolder = '' errorPath = '' dataStoreRecords = datastore.recordsForUUID(amRecord.uuid) for record in dataStoreRecords: pathStructureName = record.pathStructureName filePath = record.fileName currentPathStructure = configurationOptions( ).pathStructureWithName(pathStructureName) errorPath = currentPathStructure['errorBox'] print filePath destinationAMFolder = os.path.join(os.path.dirname(filePath), batchName) print 'This is where the working files will go.', destinationAMFolder if not os.path.exists(destinationAMFolder): try: os.mkdir(destinationAMFolder) except OSError as e: pass originalFileName = os.path.basename(filePath).split( (batchName + "_"))[1] proposedAMPath = os.path.join(destinationAMFolder, originalFileName) try: # newPath = pathAfterSafelyMovingFileToDestinationFile(filePath, proposedAMPath) print filePath, proposedAMPath shutil.move(filePath, proposedAMPath) except Exception as e: info = 'There was an error moving a file at %s for Archive Manager job %s. This will need to be manually addressed.' % ( filePath, batchName) sendFailureEmail(info) continue currentFiles = os.listdir(destinationAMFolder) filesInJob = amRecord.allFilesInRecord() areAllFilesInPlace = True for nFile in filesInJob: if nFile not in currentFiles: areAllFilesInPlace = False if areAllFilesInPlace == True: print "moving files to the error path" try: pathAfterSafelyMovingFolderToDestinationFolder( destinationAMFolder, errorPath) except Exception as e: info = 'PostProcess: Unable to move the file %s' % ( filePath, ) logging.debug(info) info = 'There was an error moving the folder %s into the outbox at %s' % ( destinationAMFolder, errorPath) info = info + '\n' + 'This will need to be addressed manually' sendFailureEmail(info) continue datastore.updateArchiveManagerJobAsComplete(amRecord)