def dbConnection(self): db = None try: db = sqlite3.connect(self.storePath) except Exception as e: DefaultLogger().debug(e.message) return db
def preprocess(dbPath): ''' This is a preprocess module ''' logging = DefaultLogger() if not os.path.exists(dbPath): logging.debug('PreProcess: can\'t find database at path') return datastore = DataStore(dbPath) loopcount = 0 while True: sleep(5) if loopcount % 10 == 0: logging.debug('PreProcess is alive') loopcount += 1 data = datastore.recordsForHashing() for record in data: logging.debug(record) key_id = record.id filePath = record.fileName if not os.path.exists(filePath): logging.debug( 'PreProcess: Will update record status as the file no longer exists' ) datastore.updateRecordAsMissingWithID(key_id) continue try: logging.debug('PreProcess: locking file to calculate hash...') ##UPDATE HASH OPERATION START HERE startTime = datetime.datetime.now().strftime( "%Y-%m-%d %H:%M:%S") datastore.updateRecordWithHashStart(startTime, key_id) fileToHash = open(filePath, 'rb') portalocker.lock(fileToHash, portalocker.LOCK_SH) hashString = "NO_OP" #hashForFile(fileToHash) endTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") fileToHash.close() logging.debug('PreProcess: unlocking file...') logging.debug( 'PreProcess: Will update record status with Hash string and times' ) datastore.updateRecordWithHashForStartTimeAndEndTime( hashString, startTime, endTime, key_id) except Exception as e: info = 'PreProcess: There was an error when calculating the hash for file: ' + os.path.basename( filePath) + ' ' + e.message sendFailureEmail(info) logging.error(e.message)
def simpleFailureEmail(message=""): smtpServer = configurationOptions().smtpServer recipients = configurationOptions().emailRecipients try: s = smtplib.SMTP(smtpServer) msg = "Subject: %s\n\n%s" % ('MediaSeal Process Error', message) s.sendmail("*****@*****.**", recipients, msg) s.quit() except Exception as e: info = 'There was an Error sending a sendSuccessEmail email:' + message + " :" + e.message DefaultLogger().debug(info)
def acquirefile(dbPath): ''' This process examines the database pointed to by dbPath. It looks for any records which have status 0 and looks at the dateModified time. If the elapsed time since the record was modified is greater than two minutes, then it checks the record's size. If the file pointed to by the record doesn't exist, then I change this to another status like -1. If the file size is the same, then the databse marks the file status as 1 or ready to hash. If the filesize is different, then the file's dateModified is updated and the status is unchanged, resulting in the file being re-examined in the next loop. Once the file is verified, the file is moved to a path defined by workingPath (that is if the path is different). ''' logging = DefaultLogger() loopcount = 0 while True: sleep(5) if loopcount % 10 == 0: logging.debug('acquire loop is active...') loopcount += 1 checkSingleFiles(dbPath)
def failureEmail(message=""): smtpServer = configurationOptions().smtpServer recipients = configurationOptions().emailRecipients try: s = smtplib.SMTP(smtpServer) msg = MIMEText("""MediaSeal Process Error""") sender = '*****@*****.**' msg['Subject'] = "MediaSeal Process Error" + "\n" + message msg['From'] = sender msg['To'] = ", ".join(recipients) s.sendmail(sender, recipients, msg.as_string()) s.quit() except Exception as e: info = 'There was an Error sending a sendFailureEmail email:' + message + " :" + e.message DefaultLogger().debug(info)
def singleShotEncryptor(options): logging = DefaultLogger() destinationPath = options.destinationPath jobStatusOuputPath = options.jobStatusOuputPath if not os.path.exists(destinationPath): logging.debug('Encryptor: error as destination path doesn\'t exist') return if not os.path.exists(os.path.dirname(jobStatusOuputPath)): os.mkdir(os.path.dirname(jobStatusOuputPath)) if os.path.exists(jobStatusOuputPath): os.remove(jobStatusOuputPath) queue = Queue() encryptor = EncryptorProcess(options.encryptorArguments()) reader = StatusReader(jobStatusOuputPath, queue) encryptThread = Thread(target=encryptor.run) readerThread = Thread(target=reader.run) encryptThread.start() readerThread.start() while encryptor.isRunning == True: sleep(0.5) if not queue.empty(): status = queue.get() logging.debug(status) while not queue.empty(): status = queue.get() logging.debug(status) reader.terminate() if encryptor.returncode == 2: logging.debug('unable to communicate with studio server') statusString = "Encryption Process Return Code", str(encryptor.returncode) logging.debug(statusString) return encryptor.returncode
def main(): options = configurationOptions() if not options.isValid(): return logging = DefaultLogger() dbPath = DefaultDatabasePath() cleanUpLockFiles() pathToWatch = options.defaultPathStructure() processObjects = [] #paths processObjects.append({"target":folderObserver, "args":(pathToWatch, dbPath), "info":'recreating folder observer process...'}) #Operations processObjects.append({"target":acquirefile, "args":(dbPath,), "info":'recreating verifier process...'}) #Processors for x in range(0,8): processObjects.append({"target":analyzeBWFFile, "args":(dbPath,('process ' + str(x))), "info":('recreating analyzer process %s...' % (str(x),))}) for processObject in processObjects: processObject["process"] = Process(target=processObject['target'], args=processObject['args']) for processObject in processObjects: processObject["process"].start() try: while True: sleep(2) for processObject in processObjects: if not processObject['process'].is_alive() or processObject['process'].exitcode is not None: logging.debug(processObject['info']) processObject['process'].terminate() processObject['process'] = Process(target=processObject['target'], args=processObject['args']) processObject['process'].start() except KeyboardInterrupt: for processObject in processObjects: processObject['process'].stop() for processObject in processObjects: processObject['process'].join()
def run(self): logging = DefaultLogger() logging.debug('Beginning Encryption') try: process = subprocess.Popen(self.args) out, err = process.communicate() self.returncode = process.returncode except Exception as e: info = e.message logging.debug(info) self.returncode = -10 message = 'Process Encryption Finished with code' + str( process.returncode) logging.debug(message) self.isRunning = False
def folderObserver(pathStructure, dbPath): logging = DefaultLogger() if pathStructure == None or pathStructure['inBox'] == None: message = 'Watch: Unable to run as pathStructure is undefined' logging.debug(message) return event_handler = singleFileWatcher(pathStructure, dbPath) observer = PollingObserver() observer.schedule(event_handler, pathStructure['inBox'], recursive=False) observer.start() try: while True and observer.is_alive(): time.sleep(1) except KeyboardInterrupt: observer.stop() observer.join()
def createJobsTable(self, pathToDBFolder): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''CREATE TABLE IF NOT EXISTS jobs(id INTEGER PRIMARY KEY, fileName TEXT, fileSize INTEGER, dateAdded DATETIME, dateModified DATETIME, status INTEGER, analysis TEXT, vendor TEXT, daisyComments TEXT, commentsUpdatedToDaisy INTEGER, daisyStatus TEXT, statusUpdatedToDaisy INTEGER)''') db.commit() except Exception as e: info = 'Error: Unable to call createJobsTable' + e.message logger = DefaultLogger() self.debugLog.debug(info) db.close()
def __init__(self, storePath): self.debugLog = DefaultLogger() self.storePath = storePath self.createJobsTable(storePath)
class DataStore(): def __init__(self, storePath): self.storePath = storePath self.createJobsTable(storePath) self.createArchiveManagerJobsTable(storePath) self.debugLog = DefaultLogger() def dbConnection(self): db = None try: db = sqlite3.connect(self.storePath) except Exception as e: self.debugLog.debug(e.message) return db def createJobsTable(self, pathToDBFolder): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''CREATE TABLE IF NOT EXISTS jobs(id INTEGER PRIMARY KEY, fileName TEXT, fileSize INTEGER, fileHash TEXT, operationFileHash TEXT, dateAdded DATETIME, dateModified DATETIME, dateOperationStart DATETIME, dateOperationEnd DATETIME, dateHashStart DATETIME, dateHashEnd DATETIME, dateOperationHashStart DATETIME, dateOperationHashEnd DATETIME, operationFileName TEXT, operationFileSize INTEGER, status INTEGER, processComplete INTEGER, operationType TEXT, pathStructureName TEXT, isBatch INTEGER, batchName TEXT, batchUUID TEXT)''') db.commit() except Exception as e: print 'Error: Unable to call createJobsTable' self.debugLog.debug(e.message) db.close() def createArchiveManagerJobsTable(self, pathToDBFolder): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''CREATE TABLE IF NOT EXISTS amjobs(id INTEGER PRIMARY KEY, amNumber TEXT, amData TEXT, amPath TEXT, complete INTEGER, errorString TEXT, uuid TEXT)''') db.commit() except Exception as e: self.debugLog.debug(e.message) db.close() def addArchiveManagerJobToDataBaseWithUUID(self, amNumber, dataString, amPath, uuid): ''' add an archive manager job to the database and mark its completion status as zero ''' db = self.dbConnection() try: cursor = db.cursor() cursor.execute('''INSERT INTO amjobs( amNumber, amData, amPath, complete, errorString, uuid) VALUES (?,?,?,?,?,?)''', (amNumber, dataString, amPath, 0, '', uuid)) db.commit() except Exception as e: self.debugLog.debug(e.message) db.rollback() db.close() def addAndMarkArchiveManagerJobToDataBaseAsUnkown(self, amNumber, amPath): ''' add an archive manager job to the database, but as we can't retrive any information about the job mark its completion status as -1 and data as unknown ''' db = self.dbConnection() try: cursor = db.cursor() cursor.execute('''INSERT INTO amjobs( amNumber, amData, amPath, complete, errorString, uuid) VALUES (?,?,?,?,?,?)''', (amNumber, 'unknown', amPath, -1, '', '')) db.commit() except Exception as e: self.debugLog.debug(e.message) db.rollback() db.close() def updateArchiveManagerJobAsErrored(self, amRecord): ''' update an archive manager job in the database and mark its completion status as errored, since it has error strings ''' key_id = amRecord.id status = False db = self.dbConnection() try: cursor = db.cursor() cursor.execute('''UPDATE amJobs SET complete=? WHERE id=?;''',(-2, key_id)) db.commit() status = True except Exception as e: self.debugLog.debug(e.message) db.rollback() db.close() return status def updateArchiveManagerJobAsReadyToComplete(self, amRecord): ''' update an archive manager job in the database and mark its completion status as ready to finish ''' key_id = amRecord.id status = False db = self.dbConnection() try: cursor = db.cursor() cursor.execute('''UPDATE amJobs SET complete=? WHERE id=?;''',(2, key_id)) db.commit() status = True except Exception as e: self.debugLog.debug(e.message) db.rollback() db.close() return status def updateArchiveManagerJobAsComplete(self, amRecord): ''' update an archive manager job in the database and mark its completion status as finished ''' key_id = amRecord.id status = False db = self.dbConnection() try: cursor = db.cursor() cursor.execute('''UPDATE amJobs SET complete=? WHERE id=?;''',(1, key_id)) db.commit() status = True except Exception as e: self.debugLog.debug(e.message) db.rollback() db.close() return status def updateArchiveManagerJobErrorString(self, amRecord, errorString): ''' update an archive manager job's error string in the database ''' key_id = amRecord.id status = False db = self.dbConnection() try: cursor = db.cursor() cursor.execute('''UPDATE amJobs SET errorString=? WHERE id=?;''',(errorString, key_id)) db.commit() status = True except Exception as e: self.debugLog.debug(e.message) db.rollback() db.close() return status def doesTheFilePathExistElseWhereInThePathStructure(self, filePath, operationType, pathStructureName): ''' Checks to make sure the file isn't already in the queue, if is, then it moves to to a duplicate folder ''' result = 0 currentPathStructure = configurationOptions().pathStructureWithName(pathStructureName) #exlcude inBox for path in configurationOptions().pathStructurePathsToCheckForDuplicates(): if os.path.exists(os.path.join(currentPathStructure[path], os.path.basename(filePath))): result += 1 if result == 0: return False return True def addBatchFilePathToDataBaseStoreWithType(self, filePath, operationType, pathStructureName, batchName): ''' add a batch file to the database and mark its status as zero, if the file doesn't exist (which is unlikely) then return, but I should log this ''' self.addFilePathToDataBaseStoreWithType(filePath, operationType, pathStructureName, isBatch=1, batchName=batchName) def addFilePathToDataBaseStoreWithType(self, filePath, operationType, pathStructureName, isBatch=0, batchName=''): ''' add a file to the database and mark its status as zero, if the file doesn't exist (which is unlikely) then return, but I should log this ''' if not os.path.exists(filePath): return fileSize = os.path.getsize(filePath) db = self.dbConnection() try: cursor = db.cursor() cursor.execute('''INSERT INTO jobs( fileName, fileSize, fileHash, operationFileHash, dateAdded, dateModified, dateOperationStart, dateOperationEnd, dateHashStart, dateHashEnd, dateOperationHashStart, dateOperationHashEnd, operationFileName, operationFileSize, status, processComplete, operationType, pathStructureName, isBatch, batchName, batchUUID) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)''', (filePath, fileSize, 'HASH','OPER_HASH', datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), '0', '0', '0', '0', '0','0', 'operationFilePath', 0, 0, 0, operationType, pathStructureName,isBatch, batchName, 'NO_UUID')) db.commit() except Exception as e: print 'addFilePathToDataBaseStoreWithType Error' self.debugLog.debug(e.message) db.rollback() db.close() def updateModificationDateForFilePath(self, filePath): db = self.dbConnection() cursor = db.cursor() try: cursor.execute('''SELECT * FROM jobs WHERE fileName=? AND status=?''',(filePath,0)) data = cursor.fetchall() except Exception as e: self.debugLog.debug(e.message) return if len(data) > 1: #logging self.debugLog.debug('Error: record collision') else: try: key_id = data[0][0] cursor.execute('''UPDATE jobs SET dateModified=? WHERE id=?;''',(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), key_id)) db.commit() except Exception as e: self.debugLog.debug('Error: record collision') db.rollback() db.close() def dataStoreRecordsForDataBaseRecords(self, records): dataStoreRecords = [] for record in records: dataStoreRecords.append(DataStoreRecord(record)) return dataStoreRecords def displayRecordForFile(self, filePath): db = self.dbConnection() cursor = db.cursor() cursor.execute('''SELECT * FROM jobs WHERE fileName=?''',(filePath,)) data = cursor.fetchall() db.close() def noArchiveManagerDataExistsForRecord(self): return -90 def errorGeneratingHash(self): return -90 def daisyEntryNotFoundStatusCode(self): return -80 def checksumLookupFailedStatusCode(self): return -70 def checksumComparisonFailedStatusCode(self): return -60 def errorFileExistsInPathChain(self): return -50 def errorMovingFileStatusCode(self): return -40 def errorPathDoesntExistStatusCode(self): return -30 def operationFailedStatusCode(self): return -20 def missingRecordStatusCode(self): return -10 def addedStatusCode(self): return 0 def verifyStatusCode(self): return 10 def hashStartStatusCode(self): return 15 def hashStatusCode(self): return 20 def operationStartedStatusCode(self): return 25 def operationCompleteStatusCode(self): return 30 def reHashStartStatusCode(self): return 35 def reHashStatusCode(self): return 40 def recordsForHashing(self): return self.recordsForStatus(self.verifyStatusCode()) def recordsForReHashing(self): return self.recordsForStatus(self.operationCompleteStatusCode()) def recordsForVerifying(self): return self.recordsForStatus(self.addedStatusCode()) def recordsReadyToEncrypt(self): return self.recordsForEncryptionStatus(self.hashStatusCode()) def recordsReadyToDecrypt(self): return self.recordsForDecryptionStatus(self.hashStatusCode()) def recordWithNumberFromAMJobsTable(self, amNumber): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''SELECT * FROM amjobs WHERE complete=? AND amNumber=?''', (0,amNumber)) dbRecords = cursor.fetchall() amRecord = None if len(dbRecords) > 0: self.debugLog.debug('More than Zero Records') amRecord = ArchiveManagerRecord(dbRecords[0]) elif len(dbRecords) > 1: self.debugLog.debug('More than 1 Record') db.close() return amRecord except Exception as e: self.debugLog.debug('Error recordWithNumberFromAMJobsTable') self.debugLog.debug(e.message) return [] def archiveManagerJobsTableRecordWithUUID(self, uuidString): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''SELECT * FROM amjobs WHERE complete=? AND uuid=?''', (0, uuidString)) dbRecords = cursor.fetchall() amRecord = None if len(dbRecords) > 0: amRecord = ArchiveManagerRecord(dbRecords[0]) if len(dbRecords) > 1: logging('found one too many records for amNumber request!!!') db.close() return amRecord except Exception as e: self.debugLog.debug(e.message) return [] def archiveManagerJobsReadyToStart(self): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''SELECT * FROM amjobs WHERE complete=?''', (0,)) dbRecords = cursor.fetchall() records = [ArchiveManagerRecord(record) for record in dbRecords] db.close() return records except Exception as e: self.debugLog.debug(e.message) return [] def archiveManagerJobsReadyToComplete(self): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''SELECT * FROM amjobs WHERE complete=?''', (2,)) dbRecords = cursor.fetchall() records = [ArchiveManagerRecord(record) for record in dbRecords] db.close() return records except Exception as e: self.debugLog.debug(e.message) return [] def archiveManagerJobsThatErrored(self): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''SELECT * FROM amjobs WHERE complete=?''', (-2,)) dbRecords = cursor.fetchall() records = [ArchiveManagerRecord(record) for record in dbRecords] db.close() return records except Exception as e: self.debugLog.debug(e.message) return [] def recordsForStatus(self, status): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''SELECT * FROM jobs WHERE status=?''', (status,)) dbRecords = cursor.fetchall() records = [DataStoreRecord(record) for record in dbRecords] db.close() return records except Exception as e: self.debugLog.debug(e.message) return [] def recordsForUUID(self, uuid): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''SELECT * FROM jobs WHERE batchUUID=?''', (uuid,)) dbRecords = cursor.fetchall() records = [DataStoreRecord(record) for record in dbRecords] db.close() return records except Exception as e: self.debugLog.debug(e.message) return [] def recordsForEncryptionStatus(self, status): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''SELECT * FROM jobs WHERE status=? AND operationType=?''', (status,'Encrypt')) dbRecords = cursor.fetchall() records = [DataStoreRecord(record) for record in dbRecords] db.close() return records except Exception as e: self.debugLog.debug(e.message) return [] def recordsForDecryptionStatus(self, status): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''SELECT * FROM jobs WHERE status=? AND operationType=?''', (status,'Decrypt')) dbRecords = cursor.fetchall() records = [DataStoreRecord(record) for record in dbRecords] db.close() return records except Exception as e: self.debugLog.debug(e.message) return [] def updateRecordStatusWithID(self, status, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET status=? WHERE id=?;''',(status, key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordAsMissingWithID(self, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET status=? WHERE id=?;''',(self.missingRecordStatusCode(), key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordAsMissingWithFileNameAndID(self, filePath, key_id): #we update the name in case any source file that gets moved collides with another file try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET status=?, fileName=? WHERE id=?;''',(self.missingRecordStatusCode(), filePath,key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordWithCurrentSizeAndDateModifiedWithID(self, currentSize, dateModified, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET fileSize=?, dateModified=? WHERE id=?;''', (currentSize, dateModified, key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordAsStaticWithNewPath(self, newPath, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET fileName=?, status=? WHERE id=?;''',(newPath, self.verifyStatusCode(), key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordAWithBatchUUIDReference(self, uuidReference, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET batchUUID=? WHERE id=?;''',(uuidReference, key_id)) db.commit() db.close() except Exception as e: print 'Error in updateRecordAWithBatchUUIDReference' self.debugLog.debug(e.message) db.rollback() def updateRecordAsDuplicateWithNewPath(self, newPath, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET fileName=?, status=? WHERE id=?;''',(newPath, self.errorFileExistsInPathChain(), key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordWithHashStart(self, startTime, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET dateHashStart=?, status=? WHERE id=?;''', ( startTime, self.hashStartStatusCode(), key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordWithReHashStart(self, startTime, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET dateOperationHashStart=?, status=? WHERE id=?;''', ( startTime, self.reHashStartStatusCode(), key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordWithHashForStartTimeAndEndTime(self, hashString, startTime, endTime, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET fileHash=?, dateHashStart=?, dateHashEnd=?, status=? WHERE id=?;''',(hashString, startTime, endTime, self.hashStatusCode(), key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordWithFinalEncryptedPathAndHashForStartTimeAndEndTime(self, newPath, hashString, startTime, endTime, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET operationFileName=?, operationFileHash=?, dateOperationHashStart=?, dateOperationHashEnd=?, status=? WHERE id=?;''', (newPath, hashString, startTime, endTime, self.reHashStatusCode(), key_id)) db.commit() db.close() return True except Exception as e: self.debugLog.debug(e.message) db.rollback() return False def updateRecordStatusWithOperationStart(self, startTime, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET status=?, dateOperationStart=? WHERE id=?;''', (self.operationStartedStatusCode(), startTime, key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordStatusWithEncryptedFileNameAndStartAndEndTime(self, statusValue, encryptedFilePath, startTime, endTime, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET status=?, operationFileName=?, dateOperationStart=?, dateOperationEnd=? WHERE id=?;''', (statusValue, encryptedFilePath, startTime, endTime, key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordStatusWithDecryptedFileNameAndStartAndEndTime(self, statusValue, decryptedFilePath, startTime, endTime, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET status=?, operationFileName=?, dateOperationStart=?, dateOperationEnd=? WHERE id=?;''', (statusValue, decryptedFilePath, startTime, endTime, key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback()
def encrypt(dbPath): ''' This process examines the database pointed to by dbPath. It Looks for any records which have status 2 and has had a hash value calculated for it. ''' logging = DefaultLogger() if not os.path.exists(dbPath): logging.debug('can\'t find database at path') return datastore = DataStore(dbPath) loopcount = 0 while True: sleep(5) if loopcount % 10 == 0: logging.debug('Encryptor Process is alive') loopcount += 1 data = datastore.recordsReadyToEncrypt() for record in data: logging.debug(record) key_id = record.id filePath = record.fileName pathStructureName = record.pathStructureName if not os.path.exists(filePath): logging.debug('Encryptor: will update record status as the file no longer exists') datastore.updateRecordAsMissingWithID(key_id) else: options = configurationOptions() currentPathStructure = options.pathStructureWithName(pathStructureName) encryptionErrorPath = currentPathStructure['errorBox'] encryptionInterimPath = currentPathStructure['interimBox'] encryptedFilePath = os.path.join(encryptionInterimPath, os.path.basename(filePath)) encryptionStart = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") nextStatusValue = datastore.operationCompleteStatusCode() options.inputPath = filePath options.destinationPath = os.path.dirname(encryptedFilePath) ##UPDATE THAT ENCRYPTION STARTS HERE datastore.updateRecordStatusWithOperationStart(encryptionStart, key_id) message = 'Encryptor: encrypting file ' + filePath logging.debug(message) #there is a bug with MediaSeal when encrypting an encrypted file, #this checks for this so that MediaSeal doesn't blow away the file. returnCode = -7 fileToEncrypt = None try: fileToEncrypt = open(filePath, 'rb') portalocker.lock(fileToEncrypt, portalocker.LOCK_SH) returnCode = singleShotEncryptor(options) except Exception as e: logging.debug('unable to lock file') if fileToEncrypt is not None: fileToEncrypt.close() message = 'Encryptor: encrypted file with return code ' + str(returnCode) logging.debug(message) encryptionStop = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") #try again should the connection be bad if returnCode == 2: sleep(5) returnCode = singleShotEncryptor(options) encryptionStart = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") #as we are encrypting single files, we can leave this logic the same if returnCode != 0: info = "There was a problem encrypting " + filePath + ". Encountered Error Code: " + str(returnCode) + ". The file will be moved to the path's Error box: " + encryptionErrorPath sendFailureEmail(info) nextStatusValue = datastore.operationFailedStatusCode() encryptionStart = datetime.datetime(2000,1,1) encryptionStop = datetime.datetime(2000,1,1) if os.path.abspath(os.path.dirname(filePath)) != os.path.abspath(encryptionErrorPath): logging.debug('moving file to error path') if os.path.exists(encryptionErrorPath): try: newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, encryptionErrorPath) except Exception as e: logging.debug('Encryptor: Error moving file') nextStatusValue = datastore.errorMovingFileStatusCode() else: logging.debug('Encryptor: encryptionErrorPath doesnt exist') nextStatusValue = datastore.errorPathDoesntExistStatusCode() datastore.updateRecordStatusWithEncryptedFileNameAndStartAndEndTime(nextStatusValue, encryptedFilePath, encryptionStart, encryptionStop, key_id)
def main(): options = configurationOptions() if not options.isValid(): return logging = DefaultLogger() dbPath = DefaultDatabasePath() encryptionPathToWatch = options.pathStructureWithName('ArchivePath') decryptionPathToWatch = options.pathStructureWithName('DecryptPath') decryptionPathToWatch2 = options.pathStructureWithName('DecryptPath2') decryptionPathToWatch3 = options.pathStructureWithName('DecryptPath3') decryptionPathToWatch4 = options.pathStructureWithName('DecryptPath4') decryptionPathToWatch5 = options.pathStructureWithName('DecryptPath5') processObjects = [] #Paths processObjects.append({ "target": encryptorWatch, "args": (encryptionPathToWatch, dbPath), "info": 'recreating encryptorWatcher process...' }) processObjects.append({ "target": decryptorWatch, "args": (decryptionPathToWatch, dbPath), "info": 'recreating decryptionWatcher process...' }) processObjects.append({ "target": decryptorWatch, "args": (decryptionPathToWatch2, dbPath), "info": 'recreating decryptionWatcher process...' }) processObjects.append({ "target": decryptorWatch, "args": (decryptionPathToWatch3, dbPath), "info": 'recreating decryptionWatcher process...' }) processObjects.append({ "target": decryptorWatch, "args": (decryptionPathToWatch4, dbPath), "info": 'recreating decryptionWatcher process...' }) processObjects.append({ "target": decryptorWatch, "args": (decryptionPathToWatch5, dbPath), "info": 'recreating decryptionWatcher process...' }) #Operations processObjects.append({ "target": acquirefile, "args": (dbPath, ), "info": 'recreating verifier process...' }) processObjects.append({ "target": preprocess, "args": (dbPath, ), "info": 'recreating the preprocess process...' }) processObjects.append({ "target": encrypt, "args": (dbPath, ), "info": 'recreating encrypt process...' }) processObjects.append({ "target": decrypt, "args": (dbPath, ), "info": 'recreating decrypt process...' }) processObjects.append({ "target": postprocess, "args": (dbPath, ), "info": 'recreating the postProcess process...' }) for processObject in processObjects: processObject["process"] = Process(target=processObject['target'], args=processObject['args']) for processObject in processObjects: processObject["process"].start() try: while True: sleep(2) options.updateProcessStatus("MainProcess is up") for processObject in processObjects: if not processObject['process'].is_alive( ) or processObject['process'].exitcode is not None: options.updateProcessStatus(processObject['info']) logging.debug(processObject['info']) processObject['process'].terminate() processObject['process'] = Process( target=processObject['target'], args=processObject['args']) processObject['process'].start() except KeyboardInterrupt: for processObject in processObjects: processObject['process'].stop() for processObject in processObjects: processObject['process'].join()
def checkSingleFiles(dbPath): logging = DefaultLogger() if not os.path.exists(dbPath): logging.debug('Acquire File: can\'t find database at path') return datastore = DataStore(dbPath) data = datastore.recordsForVerifying() for record in data: key_id = record.id filePath = record.fileName recordSize = int(record.fileSize) dateModifiedString = record.dateModified pathStructureName = record.pathStructureName operationType = record.operationType isBatch = record.isBatch batchName = record.batchName dateLastModified = datetime.datetime.strptime(dateModifiedString, '%Y-%m-%d %H:%M:%S') timeDifference = datetime.datetime.now() - dateLastModified #This can change with an if/else should I decide I want to put temp files to be decrypted in another place sourcePath = configurationOptions().pathStructureWithName( pathStructureName)['inBox'] workingPath = configurationOptions().pathStructureWithName( pathStructureName)['workingBox'] if timeDifference.seconds < verificationWaitTime: continue lastSize = recordSize currentSize = 0 if not os.path.exists(filePath): logging.debug( 'Acquire File: Will update record status as the file no longer exists' ) datastore.updateRecordAsMissingWithID(key_id) continue currentSize = os.path.getsize(filePath) if lastSize != currentSize: logging.debug(record) logging.debug( 'Acquire File: attempting db modify as file size has changed...' ) datastore.updateRecordWithCurrentSizeAndDateModifiedWithID( currentSize, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), key_id) continue if currentSize == 0: continue # if the current size is zero, then continue until it isn't or never will be # its likely the file has been queued to copy but no data has been moved yet (actual OSX case) logging.debug( 'Acquire File: attempting to lock the file to see if I own the file yet...' ) try: fileToCheck = open(filePath, 'rb') portalocker.lock(fileToCheck, portalocker.LOCK_EX) fileToCheck.close() logging.debug( 'Acquire File: proceeding to update the file status knowing that no one else is using it...' ) except Exception as e: logging.debug( 'Acquire File: unable to lock file as it is likely in use') continue if datastore.doesTheFilePathExistElseWhereInThePathStructure( filePath, operationType, pathStructureName) == True: duplicatePath = configurationOptions().pathStructureWithName( pathStructureName)['duplicateBox'] newPath = pathAfterSafelyMovingFileToDestinationFolder( filePath, duplicatePath) datastore.updateRecordAsDuplicateWithNewPath(newPath, key_id) continue newPath = filePath #Only update if isBatch == 1 and operationType == 'Decrypt': amRecord = None uuidString = fileNameForUUIDFileWithPath(os.path.dirname(filePath)) if uuidString == None: #if I can't resolve the UUID, then resovle it though an AM Record #Does file's Archive Manager have data associated with it amRecord = datastore.recordWithNumberFromAMJobsTable(batchName) if amRecord == None: info = "Acquire File: Archive Manager data doesn't exist for " + filePath info = info + " " + "Marking file as having no AM Data. File will not be moved through the processing queue." logging.debug(info) datastore.updateRecordStatusWithID( datastore.noArchiveManagerDataExistsForRecord(), key_id) continue else: logging.debug('Updating record %s with UUID %s' % (filePath, uuidString)) amRecord = datastore.archiveManagerJobsTableRecordWithUUID( uuidString) datastore.updateRecordAWithBatchUUIDReference( uuidString, key_id) else: #at this point, I need to subtract the file's main folder from the pathStructure['inBox'] #this moves the file from the inbox to the working path try: newPath = pathAfterSafelyMovingFileToDestinationFolder( filePath, workingPath) except Exception as e: logging.debug( 'This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved' ) logging.debug('Acquire File: Error moving file') info = 'There was a problem moving the file into into the queue for: ' + os.path.basename( filePath) info = info + '\n' + 'This will require manual intervention as the occurrence is unique.' sendFailureEmail(info) continue logging.debug( 'Acquire File: updating record file status and path....') datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
def checkArchiveManagerJobs(dbPath): logging = DefaultLogger() datastore = DataStore(dbPath) amRecords = datastore.archiveManagerJobsReadyToStart() for amRecord in amRecords: areAllFilesAvailableAndReady = True recordsInAMRecord = datastore.recordsForUUID(amRecord.uuid) filesInAMRecord = [x.fileName for x in recordsInAMRecord] filesInCurrentFolder = [] try: filesInCurrentFolder = os.listdir(amRecord.amPath) except Exception as e: pass isThereAnUnknownFilePresent = False for currentFile in filesInCurrentFolder: if currentFile not in filesInAMRecord: isThereAnUnknownFilePresent = True if isThereAnUnknownFilePresent == True: logging.debug('Unknown files are present') pass #report error for currentFile in filesInAMRecord: logging.debug('%s' % (currentFile)) lastComponent = os.path.basename(currentFile) if lastComponent not in filesInCurrentFolder: logging.debug('The following file is not yet available: %s' % (lastComponent)) areAllFilesAvailableAndReady = False if areAllFilesAvailableAndReady == False: logging.debug('Not all of the files are staged yet') continue canLockAllRecords = True data = datastore.recordsForUUID(amRecord.uuid) for record in data: filePath = record.fileName try: fileToCheck = open(filePath, 'rb') portalocker.lock(fileToCheck, portalocker.LOCK_EX) fileToCheck.close() logging.debug( 'Acquire File: proceeding to update the file status knowing that no one else is using it...' ) except Exception as e: logging.debug( 'Acquire File: unable to lock file as it is likely in use') canLockAllRecords = False if canLockAllRecords == False: logging.debug('Can not lock all of the records yet') continue for record in data: key_id = record.id filePath = record.fileName recordSize = int(record.fileSize) dateModifiedString = record.dateModified pathStructureName = record.pathStructureName operationType = record.operationType isBatch = record.isBatch batchName = record.batchName pathStructureName = record.pathStructureName newPath = filePath workingPath = configurationOptions().pathStructureWithName( pathStructureName)['workingBox'] proposedBatchName = batchName + "_" + os.path.basename(filePath) proposedPath = os.path.join(os.path.dirname(filePath), proposedBatchName) #we prepend the job name to the file here as it belongs to a batch try: if os.path.exists(proposedPath): raise Exception('file already exists') os.rename(filePath, proposedPath) filePath = proposedPath except Exception as e: #this is an unlikely occurrence info = 'There is a duplicate file in the queue for: ' + os.path.basename( filePath) + " " + e.message logging.debug(info) sendFailureEmail(info) continue #at this point, I need to subtract the file's main folder from the pathStructure['inBox'] #this moves the file from the inbox to the working path try: newPath = pathAfterSafelyMovingFileToDestinationFolder( filePath, workingPath) except Exception as e: logging.debug( 'This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved' ) logging.debug('Acquire File: Error moving file') info = 'There was a problem moving the file into into the queue for: ' + os.path.basename( filePath) info = info + '\n' + 'This will require manual intervention as the occurrence is unique.' sendFailureEmail(info) continue logging.debug( 'Acquire File: updating record file status and path....') datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
def decrypt(dbPath): ''' This process examines the database pointed to by dbPath. It Looks for any records which have status 2 and has had a hash value calculated for it. ''' logging = DefaultLogger() if not os.path.exists(dbPath): logging.debug('Decryptor: can\'t find database at path') return datastore = DataStore(dbPath) loopcount = 0 while True: sleep(5) if loopcount % 10 == 0: logging.debug('Decryptor Process is alive') loopcount += 1 data = datastore.recordsReadyToDecrypt() for record in data: logging.debug(record) key_id = record.id filePath = record.fileName pathStructureName = record.pathStructureName isBatch = record.isBatch batchName = record.batchName if not os.path.exists(filePath): logging.debug('Decryptor: will update record status as the file no longer exists') datastore.updateRecordAsMissingWithID(key_id) else: options = configurationOptions() currentPathStructure = options.pathStructureWithName(pathStructureName) decryptionErrorPath = currentPathStructure['errorBox'] decryptionInterimPath = currentPathStructure['interimBox'] options.inputPath = filePath decryptedFilePath = os.path.join(decryptionInterimPath, os.path.basename(filePath)) operationStart = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") nextStatusValue = datastore.operationCompleteStatusCode() message = 'Decryptor: decrypting file ' + filePath logging.debug(message) ##UPDATE OPERATION START datastore.updateRecordStatusWithOperationStart(operationStart, key_id) args = [options.decryptorApplicationPath, filePath, decryptedFilePath] process = subprocess.Popen(args) out, err = process.communicate() returnCode = process.returncode message = 'Decryptor: decrypted file with return code ' + str(returnCode) logging.debug(message) operationStop = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") if returnCode != 0: info = 'An error occurred with the Decryption operation when decrypting %s.' % (filePath) logging.debug(info) operationStart = datetime.datetime(2000,1,1) operationStop = datetime.datetime(2000,1,1) if isBatch == 0: nextStatusValue = datastore.operationFailedStatusCode() if os.path.abspath(os.path.dirname(filePath)) != os.path.abspath(decryptionErrorPath): logging.debug('moving file to error path') if os.path.exists(decryptionErrorPath): # shutil.move(filePath, decryptionErrorPath) # newPath = os.path.join(decryptionErrorPath, os.path.basename(filePath)) newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, decryptionErrorPath) if not os.path.exists(newPath): logging.debug('Decryptor: Error moving file') nextStatusValue = datastore.errorMovingFileStatusCode() else: logging.debug('Decryptor: decryptionErrorPath doesnt exist') nextStatusValue = datastore.errorPathDoesntExistStatusCode() else: #don't move batch files, just update the batch's am errorString to reflect the problem #the file's checksum will fail #we don't update the batch file's status amRecord = datastore.recordWithNumberFromAMJobsTable(batchName) if amRecord == None: #This should not happen as we don't even allow for the logic to proceed to this point without #a valid Archive Manager Record info = 'An error occurred where no data was found for the Archive Manager job ' + batchName + '\n' info = info + 'This error should not happen. Please check ' + os.path.dirname(filePath) + '\n' info = info + 'The files will need to be manually removed from the Decryption Queue.' logging.debug(info) sendFailureEmail(info) continue errorString = 'A problem was encountered while decrypting %s.' % (filePath) errorString = errorString + 'The file\'s checksum will be calculated and compared against that in Daisy should the error have occurred ater the file was decrypted.' if amRecord.errorString != '': amRecord.errorString = amRecord.errorString + '\n' + errorString else: amRecord.errorString = errorString datastore.updateArchiveManagerJobErrorString(amRecord, amRecord.errorString) # we update the status value datastore.updateRecordStatusWithDecryptedFileNameAndStartAndEndTime(nextStatusValue, decryptedFilePath, operationStart, operationStop, key_id)
def analyzeBWFFile(dbPath, identifier = 1): logging = DefaultLogger() loopcount = 0 datastore = DataStore(dbPath) try: while True: sleep(60+random.randint(1,10)) if loopcount % 20 == 0: logging.debug('bwf analyzer loop {} is active...'.format(identifier)) loopcount += 1 if not os.path.exists(dbPath): logging.debug('Acquire File: can not find database at path') return record = None #if daisy is not up then just wait until it is if isDaisyUp() == False: logging.debug('Daisy does not appear to be up') continue #get a lock on the file lock = lockWithFile() try: lock.acquire(timeout=-1) if lock.i_am_locking(): record = datastore.oneRecordReadyForProcessing() if record != None: logging.debug('process {} is acquiring the lock'.format(identifier)) datastore.updateRecordAsInProcess(record.id) lock.release() except Exception as e: pass if record == None: continue filePath = record.fileName #lets check that is has a genuine Daisy Number if getDaisyNumber(os.path.basename(filePath)) == None: errorBox = configurationOptions().defaultPathStructure()['errorBox'] errorBox = os.path.expanduser(errorBox) sendProcessFailureMessage({'subject':'BWF Error: file added that has no DANumber', 'message':'A file, %s, was deposited that does not have a Daisy Number' % (os.path.basename(filePath))}) #move to errorBox try: print "Moving file %s into %s" % (filePath, errorBox) newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, errorBox) except Exception as e: logging.debug('Analyze File: Error moving file') info = '''This should not happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved''' logging.debug(info) info = 'There was a problem moving the file into into the errorBox for: ' + os.path.basename(filePath) info = info + '\n' + 'This will require manual intervention as the occurrence is unique.' sendProcessFailureMessage({'subject':'BWF Error', 'message':info}) logging.debug(info) datastore.updateRecordAsNotHavingADaisyNumber(record.id) continue #lets look up metadata before we even proceed, if can't get the metadata we don't want to analyze files dataTuple = retrieveDataForDANumber(os.path.basename(filePath), identifier) logging.debug('Data for {} Before: {}'.format(os.path.basename(filePath), dataTuple)) if dataTuple == None: #ok, lets send an email that will be sent at a maximum of 1 per 4 hours result = "Process Error: Daisy Information not Available:" + e.message sendPeriodicFailureMessage(result) logging.debug('A Periodic Failure Message attempt was made.') continue result = None resultObject = None vendor = dataTuple[0] comments = dataTuple[1] status = dataTuple[2] #once we have the metadata, lets examine the file try: logging.debug('Will examine %s in loop %s' % (filePath, str(identifier))) resultObject = multiChannelBWFFileAnalysis(filePath) result = json.dumps(resultObject) if resultObject == None: logging.debug('The analysis of the file %s is "None". This should not occur.' % (filePath)) raise Exception('The analysis of the file %s is "None". This should not occur.' % (filePath)) except Exception as e: logging.debug('An exception occurred with %s in identifier %s.' % (filePath, str(identifier))) #mark as error datastore.updateRecordWithAnalysisError(record.id) errorBox = configurationOptions().defaultPathStructure()['errorBox'] errorBox = os.path.expanduser(errorBox) #send email result = "Process Error: An Exception occurred when processing the file: %s. The file will be moved to %s" % (e.message, errorBox) logging.debug(result) sendProcessFailureMessage({'subject':'Process Error', 'message':result}) #move to errorBox try: print "Moving file %s into %s" % (filePath, errorBox) logging.debug("Moving file %s into %s" % (filePath, errorBox)) newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, errorBox) except Exception as e: logging.debug('Analyze File: Error moving file') info = '''This should not happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved''' logging.debug(info) info = 'There was a problem moving the file into into the errorBox for: ' + os.path.basename(filePath) info = info + '\n' + 'This will require manual intervention as the occurrence is unique.' sendProcessFailureMessage({'subject':'Process Error Moving File', 'message':info}) logging.debug(info) continue info = 'PreExisting Data for the following file %s: %s %s %s' % (os.path.basename(filePath), comments, vendor, status) logging.debug(info) resultObject['vendor'] = vendor #The result object is not None as we would have bailed otherwise if resultObject['result'] == 'success': if comments == None: comments = '' #update Comments comments = stringMinusBWFAnalyzerInfo(comments) if comments != '': comments += " " comments += BWFAnalyzerInfoForSuccess(os.path.basename(filePath)) success = setComments(getDaisyNumber(os.path.basename(filePath)), comments) #update local datastore datastore.updateRecordWithComments(comments, record.id) #did we successfully update the comments? if success == True: #update comments field in db and set to success logging.debug('updating comments successfully') datastore.successfullyUpdatedDaisyComments(record.id) else: #put infor in db that you couldn't update Daisy logging.debug('not updating comments successfully') datastore.failedToUpdateDaisyComments(record.id) #update the status to pending fix #only if the status is Needs Attention, otherwise we don't have any further knowledge of what is going on nextStatus = 'NO CHANGE' success = True if status == "Needs Attention": #ok to update status success = setStatusAsPendingFix(getDaisyNumber(os.path.basename(filePath))) nextStatus = 'Pending Fix' if status in ['Being Made', 'Ordered']: #ok to update status success = setStatusAsPendingArchive(getDaisyNumber(os.path.basename(filePath))) nextStatus = 'Pending Archive' datastore.updateRecordWithDaisyStatus(nextStatus, record.id) if success == True: #update staus field in db and set to success logging.debug('updating status successfully') datastore.successfullyUpdatedDaisyStatus(record.id) else: #put infor in db that you couldn't update status in Daisy logging.debug('not updating status successfully') datastore.failedToUpdateDaisyStatus(record.id) else: sendAnalysisFailure(resultObject) if comments == None: comments = '' #update Comments comments = stringMinusBWFAnalyzerInfo(comments) if comments != '': comments += " " comments += BWFAnalyzerInfoForErrors(resultObject['errors']) success = setComments(getDaisyNumber(os.path.basename(filePath)), comments) #update local datastore datastore.updateRecordWithComments(comments, record.id) if success == True: #update comments field in db and set to success logging.debug('updating comments successfully') datastore.successfullyUpdatedDaisyComments(record.id) else: #put infor in db that you couldn't update Daisy logging.debug('not updating comments successfully') datastore.failedToUpdateDaisyComments(record.id) #update Status if status not in ['Being Made', 'Ordered', 'Pending Archive']: #ok to update status success = setStatusAsNeedsAttention(getDaisyNumber(os.path.basename(filePath))) datastore.updateRecordWithDaisyStatus('Needs Attention', record.id) if success == True: #update staus field in db and set to success logging.debug('updating status successfully') datastore.successfullyUpdatedDaisyStatus(record.id) else: #put infor in db that you couldn't update status in Daisy logging.debug('not updating status successfully') datastore.failedToUpdateDaisyStatus(record.id) else: success = setStatusAsPendingArchive(getDaisyNumber(os.path.basename(filePath))) datastore.updateRecordWithDaisyStatus('Pending Archive', record.id) if success == True: #update status field in db and set to success logging.debug('updating status successfully') datastore.successfullyUpdatedDaisyStatus(record.id) else: #put infor in db that you couldn't update status in Daisy logging.debug('not updating status successfully') datastore.failedToUpdateDaisyStatus(record.id) if datastore.updateRecordWithAnalysisData(result, record.id) == False: info = 'Unable to save record %d %s' % (record.id, result) sendProcessFailureMessage({'subject':'Process Error Unable To Save Record', 'message':info}) continue #update vendor info datastore.updateRecordWithVendor(vendor, record.id) dataTuple = retrieveDataForDANumber(os.path.basename(filePath), identifier) logging.debug('Data for {} After: {}'.format(os.path.basename(filePath),dataTuple)) #now that we have saved the data, we are ready to move the file nextBox = configurationOptions().defaultPathStructure()['outBox'] if resultObject['result'] != 'success': nextBox = configurationOptions().defaultPathStructure()['failBox'] nextBox = os.path.expanduser(nextBox) newPath = filePath try: newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, nextBox) except Exception as e: logging.debug('Analyze File: Error moving file') info = '''This should not happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved''' logging.debug(info) info = 'There was a problem moving the file into into ' + nextBox + ' for: ' + os.path.basename(filePath) info = info + '\n' + 'This will require manual intervention as the occurrence is unique.' sendProcessFailureMessage({'subject':'Process Error Moving File', 'message':info}) continue logging.debug('Analyze File: preparing to move file to final path...') datastore.updateRecordAsCompleteWithFinalPath(newPath, record.id) except Exception as e: info = 'Exception in analyzeBWFFile' + e.message logging.debug(info) sendProcessFailureMessage({'subject':'Exception!', 'message':info})
class DataStore(): def __init__(self, storePath): self.debugLog = DefaultLogger() self.storePath = storePath self.createJobsTable(storePath) def fileHasNoDaisyNumber(self): return -70 def errorAnalyzingFile(self): return -60 def errorFileExistsInPathChain(self): return -50 def errorMovingFileStatusCode(self): return -40 def errorPathDoesntExistStatusCode(self): return -30 def operationFailedStatusCode(self): return -20 def missingRecordStatusCode(self): return -10 def addedStatusCode(self): return 0 def verifyStatusCode(self): return 10 def inProcessStatusCode(self): return 15 def fileHasBeenAnalyzedStatusCode(self): return 20 def fileHasBeenMovedToFinalLocation(self): return 30 def dbConnection(self): db = None try: db = sqlite3.connect(self.storePath) except Exception as e: DefaultLogger().debug(e.message) return db def createJobsTable(self, pathToDBFolder): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''CREATE TABLE IF NOT EXISTS jobs(id INTEGER PRIMARY KEY, fileName TEXT, fileSize INTEGER, dateAdded DATETIME, dateModified DATETIME, status INTEGER, analysis TEXT, vendor TEXT, daisyComments TEXT, commentsUpdatedToDaisy INTEGER, daisyStatus TEXT, statusUpdatedToDaisy INTEGER)''') db.commit() except Exception as e: info = 'Error: Unable to call createJobsTable' + e.message logger = DefaultLogger() self.debugLog.debug(info) db.close() def doesTheFilePathExistElseWhereInThePathStructure( self, filePath, operationType, pathStructureName): ''' Checks to make sure the file isn't already in the queue, if is, then it moves to to a duplicate folder ''' result = 0 currentPathStructure = configurationOptions().pathStructureWithName( pathStructureName) #exlcude inBox for path in configurationOptions( ).pathStructurePathsToCheckForDuplicates(): if os.path.exists( os.path.join(currentPathStructure[path], os.path.basename(filePath))): result += 1 if result == 0: return False return True def addFileToDatabase(self, filePath): ''' add a file to the database and mark its status as zero, if the file doesn't exist (which is unlikely) then return, but I should log this ''' if not os.path.exists(filePath): return fileSize = os.path.getsize(filePath) db = self.dbConnection() try: cursor = db.cursor() cursor.execute( '''INSERT INTO jobs( fileName, fileSize, dateAdded, dateModified, status, analysis, vendor, daisyComments, commentsUpdatedToDaisy, daisyStatus, statusUpdatedToDaisy) VALUES (?,?,?,?,?,?,?,?,?,?,?)''', (filePath, fileSize, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 0, '', 'UNKNOWN', '', '0', '', '0')) db.commit() except Exception as e: print 'addBWFFileToDatabase Error' self.debugLog.debug(e.message) db.rollback() db.close() def updateModificationDateForFilePath(self, filePath): db = self.dbConnection() cursor = db.cursor() try: cursor.execute( '''SELECT * FROM jobs WHERE fileName=? AND status=?''', (filePath, 0)) data = cursor.fetchall() except Exception as e: self.debugLog.debug(e.message) return if len(data) > 1: #logging self.debugLog.debug('Error: record collision') else: try: key_id = data[0][0] cursor.execute( '''UPDATE jobs SET dateModified=? WHERE id=?;''', (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), key_id)) db.commit() except Exception as e: self.debugLog.debug('Error: record collision') db.rollback() db.close() def recordsForVerifying(self): return self.recordsForStatus(self.addedStatusCode()) def recordsReadyForProcessing(self): return self.recordsForStatus(self.verifyStatusCode()) def oneRecordReadyForProcessing(self): return self.oneRecordForStatus(self.verifyStatusCode()) def recordsForStatus(self, status): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''SELECT * FROM jobs WHERE status=?''', (status, )) dbRecords = cursor.fetchall() records = [DataStoreRecord(record) for record in dbRecords] db.close() return records except Exception as e: self.debugLog.debug(e.message) return [] def oneRecordForStatus(self, status): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''SELECT * FROM jobs WHERE status=? LIMIT 1''', (status, )) dbRecords = cursor.fetchall() records = [DataStoreRecord(record) for record in dbRecords] db.close() if len(records) == 0: return None return records[0] except Exception as e: self.debugLog.debug(e.message) return [] def updateRecordStatusWithID(self, status, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET status=? WHERE id=?;''', (status, key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordAsStaticWithNewPath(self, newPath, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute( '''UPDATE jobs SET fileName=?, status=? WHERE id=?;''', (newPath, self.verifyStatusCode(), key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordAsCompleteWithFinalPath(self, newPath, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute( '''UPDATE jobs SET fileName=?, status=?, dateModified=? WHERE id=?;''', (newPath, self.fileHasBeenMovedToFinalLocation(), datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordAsMissingWithID(self, key_id): self.updateRecordStatusWithID(self.missingRecordStatusCode(), key_id) def updateRecordAsInProcess(self, key_id): self.updateRecordStatusWithID(self.inProcessStatusCode(), key_id) def updateRecordWithAnalysisError(self, key_id): self.updateRecordStatusWithID(self.errorAnalyzingFile(), key_id) def updateRecordAsNotHavingADaisyNumber(self, key_id): self.updateRecordStatusWithID(self.fileHasNoDaisyNumber(), key_id) def updateRecordWithAnalysisData(self, analysisData, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute( '''UPDATE jobs SET analysis=?, status=? WHERE id=?;''', (analysisData, self.fileHasBeenAnalyzedStatusCode(), key_id)) db.commit() db.close() return True except Exception as e: self.debugLog.debug(e.message) db.rollback() return False def updateRecordAsMissingWithFileNameAndID(self, filePath, key_id): #we update the name in case any source file that gets moved collides with another file try: db = self.dbConnection() cursor = db.cursor() cursor.execute( '''UPDATE jobs SET status=?, fileName=? WHERE id=?;''', (self.missingRecordStatusCode(), filePath, key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordWithCurrentSizeAndDateModifiedWithID( self, currentSize, dateModified, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute( '''UPDATE jobs SET fileSize=?, dateModified=? WHERE id=?;''', (currentSize, dateModified, key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordWithVendor(self, vendor, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET vendor=? WHERE id=?;''', (vendor, key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordWithComments(self, nComments, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET daisyComments=? WHERE id=?;''', (nComments, key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordWithDaisyStatus(self, nStatus, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET daisyStatus=? WHERE id=?;''', (nStatus, key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def successfullyUpdatedDaisyComments(self, key_id): self.setDaisyCommentsPosted(1, key_id) def failedToUpdateDaisyComments(self, key_id): self.setDaisyCommentsPosted(-1, key_id) def setDaisyCommentsPosted(self, posted, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute( '''UPDATE jobs SET commentsUpdatedToDaisy=? WHERE id=?;''', (posted, key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def successfullyUpdatedDaisyStatus(self, key_id): self.setDaisyStatusPosted(1, key_id) def failedToUpdateDaisyStatus(self, key_id): self.setDaisyStatusPosted(-1, key_id) def setDaisyStatusPosted(self, posted, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute( '''UPDATE jobs SET statusUpdatedToDaisy=? WHERE id=?;''', (posted, key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback()
def __init__(self, storePath): self.debugLog = DefaultLogger() self.storePath = storePath self.createJobsTable(storePath)
class DataStore(): def __init__(self, storePath): self.debugLog = DefaultLogger() self.storePath = storePath self.createJobsTable(storePath) def fileHasNoDaisyNumber(self): return -70 def errorAnalyzingFile(self): return -60 def errorFileExistsInPathChain(self): return -50 def errorMovingFileStatusCode(self): return -40 def errorPathDoesntExistStatusCode(self): return -30 def operationFailedStatusCode(self): return -20 def missingRecordStatusCode(self): return -10 def addedStatusCode(self): return 0 def verifyStatusCode(self): return 10 def inProcessStatusCode(self): return 15 def fileHasBeenAnalyzedStatusCode(self): return 20 def fileHasBeenMovedToFinalLocation(self): return 30 def dbConnection(self): db = None try: db = sqlite3.connect(self.storePath) except Exception as e: DefaultLogger().debug(e.message) return db def createJobsTable(self, pathToDBFolder): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''CREATE TABLE IF NOT EXISTS jobs(id INTEGER PRIMARY KEY, fileName TEXT, fileSize INTEGER, dateAdded DATETIME, dateModified DATETIME, status INTEGER, analysis TEXT, vendor TEXT, daisyComments TEXT, commentsUpdatedToDaisy INTEGER, daisyStatus TEXT, statusUpdatedToDaisy INTEGER)''') db.commit() except Exception as e: info = 'Error: Unable to call createJobsTable' + e.message logger = DefaultLogger() self.debugLog.debug(info) db.close() def doesTheFilePathExistElseWhereInThePathStructure(self, filePath, operationType, pathStructureName): ''' Checks to make sure the file isn't already in the queue, if is, then it moves to to a duplicate folder ''' result = 0 currentPathStructure = configurationOptions().pathStructureWithName(pathStructureName) #exlcude inBox for path in configurationOptions().pathStructurePathsToCheckForDuplicates(): if os.path.exists(os.path.join(currentPathStructure[path], os.path.basename(filePath))): result += 1 if result == 0: return False return True def addFileToDatabase(self, filePath): ''' add a file to the database and mark its status as zero, if the file doesn't exist (which is unlikely) then return, but I should log this ''' if not os.path.exists(filePath): return fileSize = os.path.getsize(filePath) db = self.dbConnection() try: cursor = db.cursor() cursor.execute('''INSERT INTO jobs( fileName, fileSize, dateAdded, dateModified, status, analysis, vendor, daisyComments, commentsUpdatedToDaisy, daisyStatus, statusUpdatedToDaisy) VALUES (?,?,?,?,?,?,?,?,?,?,?)''', (filePath, fileSize, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),0,'','UNKNOWN', '', '0','','0')) db.commit() except Exception as e: print 'addBWFFileToDatabase Error' self.debugLog.debug(e.message) db.rollback() db.close() def updateModificationDateForFilePath(self, filePath): db = self.dbConnection() cursor = db.cursor() try: cursor.execute('''SELECT * FROM jobs WHERE fileName=? AND status=?''',(filePath,0)) data = cursor.fetchall() except Exception as e: self.debugLog.debug(e.message) return if len(data) > 1: #logging self.debugLog.debug('Error: record collision') else: try: key_id = data[0][0] cursor.execute('''UPDATE jobs SET dateModified=? WHERE id=?;''',(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), key_id)) db.commit() except Exception as e: self.debugLog.debug('Error: record collision') db.rollback() db.close() def recordsForVerifying(self): return self.recordsForStatus(self.addedStatusCode()) def recordsReadyForProcessing(self): return self.recordsForStatus(self.verifyStatusCode()) def oneRecordReadyForProcessing(self): return self.oneRecordForStatus(self.verifyStatusCode()) def recordsForStatus(self, status): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''SELECT * FROM jobs WHERE status=?''', (status,)) dbRecords = cursor.fetchall() records = [DataStoreRecord(record) for record in dbRecords] db.close() return records except Exception as e: self.debugLog.debug(e.message) return [] def oneRecordForStatus(self, status): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''SELECT * FROM jobs WHERE status=? LIMIT 1''', (status,)) dbRecords = cursor.fetchall() records = [DataStoreRecord(record) for record in dbRecords] db.close() if len(records) == 0: return None return records[0] except Exception as e: self.debugLog.debug(e.message) return [] def updateRecordStatusWithID(self, status, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET status=? WHERE id=?;''',(status, key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordAsStaticWithNewPath(self, newPath, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET fileName=?, status=? WHERE id=?;''',(newPath, self.verifyStatusCode(), key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordAsCompleteWithFinalPath(self, newPath, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET fileName=?, status=?, dateModified=? WHERE id=?;''',(newPath, self.fileHasBeenMovedToFinalLocation(), datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordAsMissingWithID(self, key_id): self.updateRecordStatusWithID(self.missingRecordStatusCode(), key_id) def updateRecordAsInProcess(self, key_id): self.updateRecordStatusWithID(self.inProcessStatusCode(), key_id) def updateRecordWithAnalysisError(self, key_id): self.updateRecordStatusWithID(self.errorAnalyzingFile(), key_id) def updateRecordAsNotHavingADaisyNumber(self, key_id): self.updateRecordStatusWithID(self.fileHasNoDaisyNumber(), key_id) def updateRecordWithAnalysisData(self, analysisData, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET analysis=?, status=? WHERE id=?;''', (analysisData, self.fileHasBeenAnalyzedStatusCode(), key_id)) db.commit() db.close() return True except Exception as e: self.debugLog.debug(e.message) db.rollback() return False def updateRecordAsMissingWithFileNameAndID(self, filePath, key_id): #we update the name in case any source file that gets moved collides with another file try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET status=?, fileName=? WHERE id=?;''',(self.missingRecordStatusCode(), filePath,key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordWithCurrentSizeAndDateModifiedWithID(self, currentSize, dateModified, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET fileSize=?, dateModified=? WHERE id=?;''', (currentSize, dateModified, key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordWithVendor(self, vendor, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET vendor=? WHERE id=?;''', (vendor, key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordWithComments(self, nComments, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET daisyComments=? WHERE id=?;''', (nComments, key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def updateRecordWithDaisyStatus(self, nStatus, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET daisyStatus=? WHERE id=?;''', (nStatus, key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def successfullyUpdatedDaisyComments(self, key_id): self.setDaisyCommentsPosted(1, key_id) def failedToUpdateDaisyComments(self, key_id): self.setDaisyCommentsPosted(-1, key_id) def setDaisyCommentsPosted(self, posted, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET commentsUpdatedToDaisy=? WHERE id=?;''', (posted, key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback() def successfullyUpdatedDaisyStatus(self, key_id): self.setDaisyStatusPosted(1, key_id) def failedToUpdateDaisyStatus(self, key_id): self.setDaisyStatusPosted(-1, key_id) def setDaisyStatusPosted(self, posted, key_id): try: db = self.dbConnection() cursor = db.cursor() cursor.execute('''UPDATE jobs SET statusUpdatedToDaisy=? WHERE id=?;''', (posted, key_id)) db.commit() db.close() except Exception as e: self.debugLog.debug(e.message) db.rollback()
def processRecordsReadyToBeHashed(data, datastore): logging = DefaultLogger() for record in data: logging.debug(record) key_id = record.id sourceFilePath = record.fileName filePath = record.operationFileName recordOperationType = record.operationType pathStructureName = record.pathStructureName isBatch = record.isBatch batchName = record.batchName currentPathStructure = configurationOptions().pathStructureWithName( pathStructureName) finalPath = currentPathStructure['outBox'] finalOriginalDestinationPath = currentPathStructure['originalBox'] errorPath = currentPathStructure['errorBox'] if not os.path.exists(filePath): # if the processed file doesn't exist, then move update the record and move to the error box # ADD LOGIC FOR BATCH PROCESSING logging.debug( 'PostProcess: Will update record status as the encrypted file does not exist' ) newPath = pathAfterSafelyMovingFileToDestinationFolder( sourceFilePath, errorPath) datastore.updateRecordAsMissingWithFileNameAndID(newPath, key_id) continue #CALCULATE HASH startTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") datastore.updateRecordWithReHashStart(startTime, key_id) hashString = 'NO_HASH' #only hash files being decrypyted if recordOperationType == 'Decrypt': try: fileToHash = open(filePath, 'rb') logging.debug('PostProcess: locked file to calculate hash...') portalocker.lock(fileToHash, portalocker.LOCK_SH) hashString = hashForFile(fileToHash) logging.debug('PostProcess Hasher: unlocking file...') fileToHash.close() except Exception as e: hashString = 'HASH_GEN_ERROR' else: hashString = "NO_HASH_FOR_ENCRYPTED_FILES" endTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") #ONLY DECRYPTED FILES' HASH IS CHECKED didChecksumFail = False checkSumErrorString = None if recordOperationType == 'Decrypt': fileBaseName = os.path.basename(filePath) if isBatch: fileBaseName = os.path.basename(filePath).split( (batchName + "_"))[1] daisyNumber = getDaisyNumber(fileBaseName) try: errorRecordStatus = 0 if daisyNumber == None: errorString = 'There was an error Decrypting the file: ' + fileBaseName + '.\n' errorString = errorString + 'Unable to retrieve Daisy Number for ' + filePath + ' ' + batchName logging.debug(errorString) errorRecordStatus = datastore.daisyEntryNotFoundStatusCode( ) raise Exception(errorString) originalChecksum = DaisyMetadataLookup( daisyNumber).checksumForFile(fileBaseName) if originalChecksum == None: errorString = 'There was an error Decrypting the file: ' + fileBaseName + '.\n' errorString = errorString + 'Unable to retrieve Checksum for ' + filePath + ' ' + batchName logging.debug(errorString) errorRecordStatus = datastore.checksumLookupFailedStatusCode( ) raise Exception(errorString) if originalChecksum.upper() != hashString.upper(): errorString = 'Checksums do not match for file ' + filePath + '\n' errorString = errorString + ' ' + batchName + " expected the checksum: " + originalChecksum + '\n' errorString = errorString + " but found this checksum instead:" + hashString logging.debug(errorString) errorRecordStatus = datastore.checksumComparisonFailedStatusCode( ) raise Exception(errorString) except Exception as checksumException: #we have an error, so we must create a new folder in the error path #if the file is non-batch, then logging.debug( 'PostProcess: The checksum failed. Please see the appropriate Error Box' ) checkSumErrorString = 'There was a checksum error.' + '\n' + checksumException.message didChecksumFail = True #If the file failed a checksum and is not a bacth file, then move it to the error box if didChecksumFail == True and isBatch == False: errorPathInformation = '' try: logging.debug( 'PostProcess: creating a Decrypted Checksum folder') errorDestination = createSafeFolderInDestinationFolder( errorPath, 'DECRYPTED_CHECKSUM_ERROR') try: info = 'Moving the file that errored into the folder at ' + errorDestination logging.debug(info) shutil.move( filePath, os.path.join(errorDestination, fileBaseName)) errorPathInformation = info except Exception as e: info = "PostProcess: " + e.message + ' an error occurred moving the file: ' + fileBaseName + ' to ' + errorDestination logging.debug(info) except Exception as e: info = 'PostProcess: An error occurred when moving the decrypted file in to the Error box' logging.debug(info) #THEN MOVE THE ENCRYPTED FILE ASIDE TO THE ERROR BOX try: info = 'Moving the source file into the error box at ' + errorPath logging.debug(info) newPath = pathAfterSafelyMovingFileToDestinationFolder( sourceFilePath, errorPath) errorPathInformation = errorPathInformation + '\n' + info except Exception as e: info = "PostProcess: " + e.message + ' an error occurred moving the file: ' + sourceFilePath logging.debug(info) datastore.updateRecordStatusWithID(errorRecordStatus, key_id) info = checksumException.message + '\n' + errorPathInformation logging.debug(info) sendFailureEmail(info) continue #Lets now address the batch decrypted files newPath = filePath success = False if isBatch == True and recordOperationType == 'Decrypt': #create the destination folder for the Archive Manager Job amRecord = datastore.recordWithNumberFromAMJobsTable(batchName) if amRecord is None: #This should not happen as we don't even allow for the logic to proceed to this point without #a valid Archive Manager Record info = 'An error occurred where no data was found for the Archive Manager job ' + batchName + '\n' info = info + 'This error should not happen. Please check ' + os.path.dirname( filePath) + '\n' info = info + 'The files will need to be manually removed from the Decryption Queue.' logging.debug(info) sendFailureEmail(info) continue if didChecksumFail == True: #add checksum error string to archive manager job amRecord.errorString = amRecord.errorString + '\n' + checkSumErrorString datastore.updateArchiveManagerJobErrorString( amRecord, amRecord.errorString) #create the new folder in interim where we will push all of the batch files destinationAMFolder = os.path.join(os.path.dirname(filePath), batchName) if not os.path.exists(destinationAMFolder): try: os.mkdir(destinationAMFolder) except OSError as e: pass #get the file name, strip leading archive manager number originalFileName = os.path.basename(filePath) if isBatch == True: originalFileName = os.path.basename(filePath).split( (batchName + "_"))[1] #this is where we will move the interim file, a new folder with its original name proposedAMPath = os.path.join(destinationAMFolder, originalFileName) #at this point the file should be in the a folder named after the batch try: newPath = pathAfterSafelyMovingFileToDestinationFile( filePath, proposedAMPath) except Exception as e: info = 'There was an error moving a file at %s for Archive Manager job %s. This will need to be manually addressed.' % ( filePath, batchName) sendFailureEmail(info) continue if os.path.basename(originalFileName) != os.path.basename(newPath): #there was a collision, there really is no reason why this should happen, but lets account for it errorString = 'For some reason, there already exists a file in %s labeled %s' % ( destinationAMFolder, originalFileName) + '\n' amRecord.errorString = amRecord.errorString + '\n' + errorString datastore.updateArchiveManagerJobErrorString( amRecord, amRecord.errorString) success = datastore.updateRecordWithFinalEncryptedPathAndHashForStartTimeAndEndTime( newPath, hashString, startTime, endTime, key_id) currentFiles = visibleFilesInFolder(destinationAMFolder) amPath = amRecord.amPath filesInJob = amRecord.allFilesInRecord() #are we finished, are all the files in place or the batch job? try: areAllFilesInPlace = True for nFile in filesInJob: if nFile not in currentFiles: areAllFilesInPlace = False if areAllFilesInPlace == False: continue logging.debug('All files are in place') try: #remove old source folder logging.debug('PostProcess: removing original inbox') shutil.rmtree(amPath) except OSError as e: info = "PostProcess: " + e.message logging.debug(info) info = 'There was a problem removing the folder %s from the inbox after decrypting all of the files in the job.' % ( amPath) sendFailureEmail(info) #refresh the record amRecord = datastore.recordWithNumberFromAMJobsTable(batchName) if amRecord is None: #This should not happen as we don't even allow for the logic to proceed to this point without #a valid Archive Manager Record info = 'An error occurred where no data was found for the Archive Manager job ' + batchName + '\n' info = info + 'This error should not happen. Please check ' + destinationAMFolder + '\n' info = info + 'The files will need to be manually removed from the Decryption Queue.' logging.debug(info) sendFailureEmail(info) continue #if there is an error, the redirect to the error box if amRecord.errorString != '': finalPath = errorPath #move the error files into a folder that indicates they are errors, it will live in the error box try: if datastore.updateArchiveManagerJobAsErrored( amRecord) == True: logging.debug( 'Job has finished, but there were some errors') logging.debug('PostProcess: will send email') info = 'Job %s has some errors! Please see the ErrorBox at %s' % ( batchName, errorPath) info = info + '\n' + amRecord.errorString sendFailureEmail(info) else: logging.debug('PostProcess: Error saving Job') errDirname = os.path.dirname(destinationAMFolder) errBasename = os.path.basename( destinationAMFolder) + '_DECRYPTED_ERROR' os.rename(destinationAMFolder, os.path.join(errDirname, errBasename)) destinationAMFolder = os.path.join( errDirname, errBasename) # shutil.move(destinationAMFolder, errorPath) pathAfterSafelyMovingFolderToDestinationFolder( destinationAMFolder, errorPath) except Exception as e: info = 'An error occurred when moving the errored files to %s.' % ( errorPath, ) logging.debug(info) sendFailureEmail(info) else: #No errors, move the files to the appropriate place print "No Errors finalPath", finalPath try: logging.debug( 'PostProcess: moving archive mananger folder to final destination' ) if os.path.exists( os.path.join( finalPath, os.path.basename(destinationAMFolder))): logging.debug( 'PostProcess: collision moving to duplicate box' ) altPath = pathAfterSafelyMovingFileToDestinationFolder( destinationAMFolder, finalPath) else: shutil.move(destinationAMFolder, finalPath) if datastore.updateArchiveManagerJobAsReadyToComplete( amRecord) == True: logging.debug( 'PostProcess: job is ready to complete') logging.debug( 'PostProcess: moving files and sending email') info = 'Job %s is complete! All of the files are decrypted and have appropriate matching checksums.' % ( batchName) sendSuccessEmail(info) else: logging.debug('PostProcess: Error saving Job') except OSError as e: #again, I am accounting for this error, I just don't know why I would ever encounter a situation like this info = 'There was a problem moving the folder %s to the outbox. You will have to move the file manually.' % ( destinationAMFolder) info = info + " " + e.message sendFailureEmail(info) logging.debug(info) continue except Exception as e: info = 'An error occurred. Please see check the Decryption Queue for job %s. See Error: %s' % ( batchName, e.message) logging.debug(info) sendFailureEmail(info) else: #LAST CASE FOR SINGLE MODE FILES LIKE ENCRYPTION AND SINGLE MODE DECRYPTION newPath = pathAfterSafelyMovingFileToDestinationFolder( filePath, finalPath) if not os.path.exists(newPath): logging.debug('PostProcess: Error moving file') continue logging.debug( 'PostProcess: Will update record status with Hash string and times' ) success = datastore.updateRecordWithFinalEncryptedPathAndHashForStartTimeAndEndTime( newPath, hashString, startTime, endTime, key_id) if success == True: # move original file to original box try: newPath = pathAfterSafelyMovingFileToDestinationFolder( sourceFilePath, finalOriginalDestinationPath) except Exception as e: logging.debug( 'There was an error moving the file into place') info = 'There was an error moving file %s into the outbox at %s' % ( sourceFilePath, finalOriginalDestinationPath) sendFailureEmail(info) if configurationOptions().shouldDeleteOriginal == True: try: os.remove(newPath) except OSError as e: logging.debug('PostProcess: Unable to delete the file', newPath)
def checkSingleFiles(dbPath): logging = DefaultLogger() if not os.path.exists(dbPath): logging.debug('Acquire File: can\'t find database at path') return datastore = DataStore(dbPath) data = datastore.recordsForVerifying() for record in data: key_id = record.id filePath = record.fileName recordSize = int(record.fileSize) dateModifiedString = record.dateModified dateLastModified = datetime.datetime.strptime(dateModifiedString, '%Y-%m-%d %H:%M:%S') timeDifference = datetime.datetime.now() - dateLastModified #This can change with an if/else should I decide I want to put temp files to be decrypted in another place sourcePath = configurationOptions().defaultPathStructure()['inBox'] workingPath = configurationOptions().defaultPathStructure()['workingBox'] if timeDifference.seconds < verificationWaitTime: continue lastSize = recordSize currentSize = 0 if not os.path.exists(filePath): logging.debug('Acquire File: Will update record status as the file no longer exists') datastore.updateRecordAsMissingWithID(key_id) continue currentSize = os.path.getsize(filePath) if lastSize != currentSize: logging.debug(record) logging.debug('Acquire File: attempting db modify as file size has changed...') datastore.updateRecordWithCurrentSizeAndDateModifiedWithID(currentSize, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), key_id) continue if currentSize == 0: continue # if the current size is zero, then continue until it isn't or never will be # its likely the file has been queued to copy but no data has been moved yet (actual OSX case) logging.debug('Acquire File: attempting to lock the file to see if I own the file yet...') try: fileToCheck = open(filePath, 'rb') portalocker.lock(fileToCheck, portalocker.LOCK_EX) fileToCheck.close() logging.debug('Acquire File: proceeding to update the file status knowing that no one else is using it...') except Exception as e: logging.debug('Acquire File: unable to lock file as it is likely in use') continue #must test that file doesn't exist elsewhere in the path newPath = filePath try: newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, workingPath) except Exception as e: info = '''This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved''' logging.debug(info) logging.debug('Acquire File: Error moving file') info = 'There was a problem moving the file into into the queue for: ' + os.path.basename(filePath) info = info + '\n' + 'This will require manual intervention as the occurrence is unique.' #SEND FAILURE EMAIL continue logging.debug('Acquire File: updating record file status and path....') datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
def analyzeBWFFile(dbPath, identifier=1): logging = DefaultLogger() loopcount = 0 datastore = DataStore(dbPath) try: while True: sleep(60 + random.randint(1, 10)) if loopcount % 20 == 0: logging.debug( 'bwf analyzer loop {} is active...'.format(identifier)) loopcount += 1 if not os.path.exists(dbPath): logging.debug('Acquire File: can not find database at path') return record = None #if daisy is not up then just wait until it is if isDaisyUp() == False: logging.debug('Daisy does not appear to be up') continue #get a lock on the file lock = lockWithFile() try: lock.acquire(timeout=-1) if lock.i_am_locking(): record = datastore.oneRecordReadyForProcessing() if record != None: logging.debug( 'process {} is acquiring the lock'.format( identifier)) datastore.updateRecordAsInProcess(record.id) lock.release() except Exception as e: pass if record == None: continue filePath = record.fileName #lets check that is has a genuine Daisy Number if getDaisyNumber(os.path.basename(filePath)) == None: errorBox = configurationOptions().defaultPathStructure( )['errorBox'] errorBox = os.path.expanduser(errorBox) sendProcessFailureMessage({ 'subject': 'BWF Error: file added that has no DANumber', 'message': 'A file, %s, was deposited that does not have a Daisy Number' % (os.path.basename(filePath)) }) #move to errorBox try: print "Moving file %s into %s" % (filePath, errorBox) newPath = pathAfterSafelyMovingFileToDestinationFolder( filePath, errorBox) except Exception as e: logging.debug('Analyze File: Error moving file') info = '''This should not happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved''' logging.debug(info) info = 'There was a problem moving the file into into the errorBox for: ' + os.path.basename( filePath) info = info + '\n' + 'This will require manual intervention as the occurrence is unique.' sendProcessFailureMessage({ 'subject': 'BWF Error', 'message': info }) logging.debug(info) datastore.updateRecordAsNotHavingADaisyNumber(record.id) continue #lets look up metadata before we even proceed, if can't get the metadata we don't want to analyze files dataTuple = retrieveDataForDANumber(os.path.basename(filePath), identifier) logging.debug('Data for {} Before: {}'.format( os.path.basename(filePath), dataTuple)) if dataTuple == None: #ok, lets send an email that will be sent at a maximum of 1 per 4 hours result = "Process Error: Daisy Information not Available:" + e.message sendPeriodicFailureMessage(result) logging.debug('A Periodic Failure Message attempt was made.') continue result = None resultObject = None vendor = dataTuple[0] comments = dataTuple[1] status = dataTuple[2] #once we have the metadata, lets examine the file try: logging.debug('Will examine %s in loop %s' % (filePath, str(identifier))) resultObject = multiChannelBWFFileAnalysis(filePath) result = json.dumps(resultObject) if resultObject == None: logging.debug( 'The analysis of the file %s is "None". This should not occur.' % (filePath)) raise Exception( 'The analysis of the file %s is "None". This should not occur.' % (filePath)) except Exception as e: logging.debug( 'An exception occurred with %s in identifier %s.' % (filePath, str(identifier))) #mark as error datastore.updateRecordWithAnalysisError(record.id) errorBox = configurationOptions().defaultPathStructure( )['errorBox'] errorBox = os.path.expanduser(errorBox) #send email result = "Process Error: An Exception occurred when processing the file: %s. The file will be moved to %s" % ( e.message, errorBox) logging.debug(result) sendProcessFailureMessage({ 'subject': 'Process Error', 'message': result }) #move to errorBox try: print "Moving file %s into %s" % (filePath, errorBox) logging.debug("Moving file %s into %s" % (filePath, errorBox)) newPath = pathAfterSafelyMovingFileToDestinationFolder( filePath, errorBox) except Exception as e: logging.debug('Analyze File: Error moving file') info = '''This should not happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved''' logging.debug(info) info = 'There was a problem moving the file into into the errorBox for: ' + os.path.basename( filePath) info = info + '\n' + 'This will require manual intervention as the occurrence is unique.' sendProcessFailureMessage({ 'subject': 'Process Error Moving File', 'message': info }) logging.debug(info) continue info = 'PreExisting Data for the following file %s: %s %s %s' % ( os.path.basename(filePath), comments, vendor, status) logging.debug(info) resultObject['vendor'] = vendor #The result object is not None as we would have bailed otherwise if resultObject['result'] == 'success': if comments == None: comments = '' #update Comments comments = stringMinusBWFAnalyzerInfo(comments) if comments != '': comments += " " comments += BWFAnalyzerInfoForSuccess( os.path.basename(filePath)) success = setComments( getDaisyNumber(os.path.basename(filePath)), comments) #update local datastore datastore.updateRecordWithComments(comments, record.id) #did we successfully update the comments? if success == True: #update comments field in db and set to success logging.debug('updating comments successfully') datastore.successfullyUpdatedDaisyComments(record.id) else: #put infor in db that you couldn't update Daisy logging.debug('not updating comments successfully') datastore.failedToUpdateDaisyComments(record.id) #update the status to pending fix #only if the status is Needs Attention, otherwise we don't have any further knowledge of what is going on nextStatus = 'NO CHANGE' success = True if status == "Needs Attention": #ok to update status success = setStatusAsPendingFix( getDaisyNumber(os.path.basename(filePath))) nextStatus = 'Pending Fix' if status in ['Being Made', 'Ordered']: #ok to update status success = setStatusAsPendingArchive( getDaisyNumber(os.path.basename(filePath))) nextStatus = 'Pending Archive' datastore.updateRecordWithDaisyStatus(nextStatus, record.id) if success == True: #update staus field in db and set to success logging.debug('updating status successfully') datastore.successfullyUpdatedDaisyStatus(record.id) else: #put infor in db that you couldn't update status in Daisy logging.debug('not updating status successfully') datastore.failedToUpdateDaisyStatus(record.id) else: sendAnalysisFailure(resultObject) if comments == None: comments = '' #update Comments comments = stringMinusBWFAnalyzerInfo(comments) if comments != '': comments += " " comments += BWFAnalyzerInfoForErrors(resultObject['errors']) success = setComments( getDaisyNumber(os.path.basename(filePath)), comments) #update local datastore datastore.updateRecordWithComments(comments, record.id) if success == True: #update comments field in db and set to success logging.debug('updating comments successfully') datastore.successfullyUpdatedDaisyComments(record.id) else: #put infor in db that you couldn't update Daisy logging.debug('not updating comments successfully') datastore.failedToUpdateDaisyComments(record.id) #update Status if status not in ['Being Made', 'Ordered', 'Pending Archive']: #ok to update status success = setStatusAsNeedsAttention( getDaisyNumber(os.path.basename(filePath))) datastore.updateRecordWithDaisyStatus( 'Needs Attention', record.id) if success == True: #update staus field in db and set to success logging.debug('updating status successfully') datastore.successfullyUpdatedDaisyStatus(record.id) else: #put infor in db that you couldn't update status in Daisy logging.debug('not updating status successfully') datastore.failedToUpdateDaisyStatus(record.id) else: success = setStatusAsPendingArchive( getDaisyNumber(os.path.basename(filePath))) datastore.updateRecordWithDaisyStatus( 'Pending Archive', record.id) if success == True: #update status field in db and set to success logging.debug('updating status successfully') datastore.successfullyUpdatedDaisyStatus(record.id) else: #put infor in db that you couldn't update status in Daisy logging.debug('not updating status successfully') datastore.failedToUpdateDaisyStatus(record.id) if datastore.updateRecordWithAnalysisData(result, record.id) == False: info = 'Unable to save record %d %s' % (record.id, result) sendProcessFailureMessage({ 'subject': 'Process Error Unable To Save Record', 'message': info }) continue #update vendor info datastore.updateRecordWithVendor(vendor, record.id) dataTuple = retrieveDataForDANumber(os.path.basename(filePath), identifier) logging.debug('Data for {} After: {}'.format( os.path.basename(filePath), dataTuple)) #now that we have saved the data, we are ready to move the file nextBox = configurationOptions().defaultPathStructure()['outBox'] if resultObject['result'] != 'success': nextBox = configurationOptions().defaultPathStructure( )['failBox'] nextBox = os.path.expanduser(nextBox) newPath = filePath try: newPath = pathAfterSafelyMovingFileToDestinationFolder( filePath, nextBox) except Exception as e: logging.debug('Analyze File: Error moving file') info = '''This should not happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved''' logging.debug(info) info = 'There was a problem moving the file into into ' + nextBox + ' for: ' + os.path.basename( filePath) info = info + '\n' + 'This will require manual intervention as the occurrence is unique.' sendProcessFailureMessage({ 'subject': 'Process Error Moving File', 'message': info }) continue logging.debug( 'Analyze File: preparing to move file to final path...') datastore.updateRecordAsCompleteWithFinalPath(newPath, record.id) except Exception as e: info = 'Exception in analyzeBWFFile' + e.message logging.debug(info) sendProcessFailureMessage({'subject': 'Exception!', 'message': info})