def checkSingleFiles(dbPath):
	logging = DefaultLogger()

	if not os.path.exists(dbPath):
		logging.debug('Acquire File: can\'t find database at path')
		return
	
	datastore = DataStore(dbPath)
	data = datastore.recordsForVerifying()

	for record in data:

		key_id 				= record.id
		filePath 			= record.fileName
		recordSize 			= int(record.fileSize)
		dateModifiedString 	= record.dateModified
		pathStructureName 	= record.pathStructureName
		operationType		= record.operationType
		isBatch				= record.isBatch
		batchName			= record.batchName

		dateLastModified = datetime.datetime.strptime(dateModifiedString, '%Y-%m-%d %H:%M:%S')
		timeDifference = datetime.datetime.now() - dateLastModified

		#This can change with an if/else should I decide I want to put temp files to be decrypted in another place
		sourcePath = configurationOptions().pathStructureWithName(pathStructureName)['inBox']
		workingPath = configurationOptions().pathStructureWithName(pathStructureName)['workingBox']

		if timeDifference.seconds < verificationWaitTime:
			continue

		lastSize = recordSize
		currentSize = 0

		if not os.path.exists(filePath):
			logging.debug('Acquire File: Will update record status as the file no longer exists')
			datastore.updateRecordAsMissingWithID(key_id)
			continue

		currentSize = os.path.getsize(filePath)

		if lastSize != currentSize:
			logging.debug(record)
			logging.debug('Acquire File: attempting db modify as file size has changed...')
			datastore.updateRecordWithCurrentSizeAndDateModifiedWithID(currentSize, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), key_id)
			continue

		if currentSize == 0:
			continue
			# if the current size is zero, then continue until it isn't or never will be
			# its likely the file has been queued to copy but no data has been moved yet (actual OSX case) 
			
		logging.debug('Acquire File: attempting to lock the file to see if I own the file yet...')

		try:
			fileToCheck = open(filePath, 'rb')
			portalocker.lock(fileToCheck, portalocker.LOCK_EX)
			fileToCheck.close()
			logging.debug('Acquire File: proceeding to update the file status knowing that no one else is using it...')
		except Exception as e:
			logging.debug('Acquire File: unable to lock file as it is likely in use')
			continue

		if datastore.doesTheFilePathExistElseWhereInThePathStructure(filePath, operationType, pathStructureName) == True:
			duplicatePath = configurationOptions().pathStructureWithName(pathStructureName)['duplicateBox']
			newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, duplicatePath)
			datastore.updateRecordAsDuplicateWithNewPath(newPath, key_id)
			continue

		newPath = filePath
		#Only update 
		if isBatch == 1 and operationType == 'Decrypt':
			amRecord = None
			uuidString = fileNameForUUIDFileWithPath(os.path.dirname(filePath))

			if uuidString == None:
				#if I can't resolve the UUID, then resovle it though an AM Record
				#Does file's Archive Manager have data associated with it
				amRecord = datastore.recordWithNumberFromAMJobsTable(batchName)
				if amRecord == None:
					info = "Acquire File: Archive Manager data doesn't exist for " + filePath
					info = info + " " + "Marking file as having no AM Data. File will not be moved through the processing queue."
					logging.debug(info)
					datastore.updateRecordStatusWithID(datastore.noArchiveManagerDataExistsForRecord(), key_id)
					continue
			else:
				logging.debug('Updating record %s with UUID %s' % (filePath, uuidString))
				amRecord = datastore.archiveManagerJobsTableRecordWithUUID(uuidString)
				datastore.updateRecordAWithBatchUUIDReference(uuidString, key_id)
		else:
			#at this point, I need to subtract the file's main folder from the pathStructure['inBox']
			#this moves the file from the inbox to the working path
			try:
				newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, workingPath)
			except Exception as e:
				logging.debug('This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved')
				logging.debug('Acquire File: Error moving file')
				info = 'There was a problem moving the file into into the queue for: ' + os.path.basename(filePath)
				info = info + '\n' + 'This will require manual intervention as the occurrence is unique.'
				sendFailureEmail(info)
				continue

			logging.debug('Acquire File: updating record file status and path....')
			datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
Exemple #2
0
def processRecordsReadyToBeHashed(data, datastore):

	logging = DefaultLogger()

	for record in data:
		logging.debug(record)

		key_id 				= record.id
		sourceFilePath 		= record.fileName
		filePath 			= record.operationFileName
		recordOperationType = record.operationType
		pathStructureName 	= record.pathStructureName			
		isBatch				= record.isBatch
		batchName			= record.batchName

		currentPathStructure = configurationOptions().pathStructureWithName(pathStructureName)
		finalPath = currentPathStructure['outBox']
		finalOriginalDestinationPath = currentPathStructure['originalBox']
		errorPath = currentPathStructure['errorBox']

		if not os.path.exists(filePath):
			# if the processed file doesn't exist, then move update the record and move to the error box
			# ADD LOGIC FOR BATCH PROCESSING
			logging.debug('PostProcess: Will update record status as the encrypted file does not exist')
			newPath = pathAfterSafelyMovingFileToDestinationFolder(sourceFilePath, errorPath)
			datastore.updateRecordAsMissingWithFileNameAndID(newPath, key_id)
			continue

		#CALCULATE HASH
		startTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
		datastore.updateRecordWithReHashStart(startTime, key_id)
		hashString = 'NO_HASH'

		#only hash files being decrypyted
		if recordOperationType == 'Decrypt':
			try:
				fileToHash = open(filePath, 'rb')
				logging.debug('PostProcess: locked file to calculate hash...')
				portalocker.lock(fileToHash, portalocker.LOCK_SH)
				hashString = hashForFile(fileToHash)
				logging.debug('PostProcess Hasher: unlocking file...')
				fileToHash.close()
			except Exception as e:
				hashString = 'HASH_GEN_ERROR'
		else:
			hashString = "NO_HASH_FOR_ENCRYPTED_FILES"

		endTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
		
		#ONLY DECRYPTED FILES' HASH IS CHECKED
		didChecksumFail = False
		checkSumErrorString = None

		if recordOperationType == 'Decrypt':
			fileBaseName = os.path.basename(filePath)
			if isBatch:
				fileBaseName = os.path.basename(filePath).split((batchName + "_"))[1]

			daisyNumber = getDaisyNumber(fileBaseName)

			try:
				errorRecordStatus = 0
				if daisyNumber == None:
					errorString = 'There was an error Decrypting the file: ' + fileBaseName + '.\n'
					errorString = errorString + 'Unable to retrieve Daisy Number for ' + filePath + ' ' + batchName
					logging.debug(errorString)
					errorRecordStatus = datastore.daisyEntryNotFoundStatusCode()
					raise Exception(errorString)

				originalChecksum = DaisyMetadataLookup(daisyNumber).checksumForFile(fileBaseName)
				
				if originalChecksum == None:
					errorString = 'There was an error Decrypting the file: ' + fileBaseName + '.\n'
					errorString = errorString + 'Unable to retrieve Checksum for ' + filePath + ' ' + batchName
					logging.debug(errorString)
					errorRecordStatus = datastore.checksumLookupFailedStatusCode()
					raise Exception(errorString)

				if originalChecksum.upper() != hashString.upper():
					errorString = 'Checksums do not match for file ' + filePath + '\n'
					errorString = errorString + ' ' + batchName + " expected the checksum: " + originalChecksum + '\n'
					errorString = errorString + " but found this checksum instead:" + hashString
					logging.debug(errorString)
					errorRecordStatus = datastore.checksumComparisonFailedStatusCode()
					raise Exception(errorString)

			except Exception as checksumException:
				#we have an error, so we must create a new folder in the error path
				#if the file is non-batch, then 
				logging.debug('PostProcess: The checksum failed. Please see the appropriate Error Box')
				checkSumErrorString = 'There was a checksum error.' + '\n' + checksumException.message
				didChecksumFail = True

			#If the file failed a checksum and is not a bacth file, then move it to the error box
			if didChecksumFail == True and isBatch == False:
				errorPathInformation = ''
				try:
					logging.debug('PostProcess: creating a Decrypted Checksum folder')
					errorDestination = createSafeFolderInDestinationFolder(errorPath, 'DECRYPTED_CHECKSUM_ERROR')
					try: 
						info = 'Moving the file that errored into the folder at ' + errorDestination
						logging.debug(info)
						shutil.move(filePath, os.path.join(errorDestination,fileBaseName))
						errorPathInformation = info
					except Exception as e:
						info = "PostProcess: " + e.message + ' an error occurred moving the file: ' + fileBaseName + ' to ' + errorDestination
						logging.debug(info)
				except Exception as e:
					info = 'PostProcess: An error occurred when moving the decrypted file in to the Error box'
					logging.debug(info)

				#THEN MOVE THE ENCRYPTED FILE ASIDE TO THE ERROR BOX
				try:
					info = 'Moving  the source file into the error box at ' + errorPath
					logging.debug(info)
					newPath = pathAfterSafelyMovingFileToDestinationFolder(sourceFilePath, errorPath)
					errorPathInformation = errorPathInformation + '\n' + info
				except Exception as e:
					info = "PostProcess: " + e.message + ' an error occurred moving the file: ' + sourceFilePath
					logging.debug(info)

				datastore.updateRecordStatusWithID(errorRecordStatus, key_id)
				info = checksumException.message + '\n' + errorPathInformation
				logging.debug(info)
				sendFailureEmail(info)
				continue
	
		#Lets now address the batch decrypted files

		newPath = filePath
		success = False

		if isBatch == True and recordOperationType == 'Decrypt':
			#create the destination folder for the Archive Manager Job
			amRecord = datastore.recordWithNumberFromAMJobsTable(batchName)
			if amRecord is None:
				#This should not happen as we don't even allow for the logic to proceed to this point without
				#a valid Archive Manager Record
				info = 'An error occurred where no data was found for the Archive Manager job ' + batchName + '\n'
				info = info + 'This error should not happen. Please check ' + os.path.dirname(filePath) + '\n'
				info = info + 'The files will need to be manually removed from the Decryption Queue.'
				logging.debug(info)
				sendFailureEmail(info)
				continue

			if didChecksumFail == True:
				#add checksum error string to archive manager job
				amRecord.errorString = amRecord.errorString + '\n' + checkSumErrorString
				datastore.updateArchiveManagerJobErrorString(amRecord, amRecord.errorString)

			#create the new folder in interim where we will push all of the batch files
			destinationAMFolder = os.path.join(os.path.dirname(filePath), batchName)
			if not os.path.exists(destinationAMFolder):
				try:
					os.mkdir(destinationAMFolder)
				except OSError as e:
					pass
			
			#get the file name, strip leading archive manager number
			originalFileName = os.path.basename(filePath)
			if isBatch == True:
				originalFileName = os.path.basename(filePath).split((batchName + "_"))[1]

			#this is where we will move the interim file, a new folder with its original name
			proposedAMPath = os.path.join(destinationAMFolder, originalFileName)

			#at this point the file should be in the a folder named after the batch
			try:
				newPath = pathAfterSafelyMovingFileToDestinationFile(filePath, proposedAMPath)
			except Exception as e:
				info = 'There was an error moving a file at %s for Archive Manager job %s. This will need to be manually addressed.' % (filePath, batchName)
				sendFailureEmail(info)
				continue

			if os.path.basename(originalFileName) != os.path.basename(newPath):
				#there was a collision, there really is no reason why this should happen, but lets account for it
				errorString = 'For some reason, there already exists a file in %s labeled %s' % (destinationAMFolder, originalFileName) + '\n'
				amRecord.errorString = amRecord.errorString + '\n' + errorString
				datastore.updateArchiveManagerJobErrorString(amRecord, amRecord.errorString)

			success = datastore.updateRecordWithFinalEncryptedPathAndHashForStartTimeAndEndTime(newPath, hashString, startTime, endTime, key_id)
			currentFiles = visibleFilesInFolder(destinationAMFolder)

			amPath = amRecord.amPath
			filesInJob = amRecord.allFilesInRecord()
			
			#are we finished, are all the files in place or the batch job?
			try:
				areAllFilesInPlace = True			
				for nFile in filesInJob:
					if nFile not in currentFiles:
						areAllFilesInPlace = False

				if areAllFilesInPlace == False:
					continue

				logging.debug('All files are in place')
				try:
					#remove old source folder
					logging.debug('PostProcess: removing original inbox')
					shutil.rmtree(amPath)
				except OSError as e:
					info = "PostProcess: " + e.message
					logging.debug(info)
					info = 'There was a problem removing the folder %s from the inbox after decrypting all of the files in the job.' % (amPath)
					sendFailureEmail(info)

				#refresh the record
				amRecord = datastore.recordWithNumberFromAMJobsTable(batchName)
				if amRecord is None:
					#This should not happen as we don't even allow for the logic to proceed to this point without
					#a valid Archive Manager Record
					info = 'An error occurred where no data was found for the Archive Manager job ' + batchName + '\n'
					info = info + 'This error should not happen. Please check ' + destinationAMFolder + '\n'
					info = info + 'The files will need to be manually removed from the Decryption Queue.'
					logging.debug(info)
					sendFailureEmail(info)
					continue

				#if there is an error, the redirect to the error box
				if amRecord.errorString != '':
					finalPath = errorPath
					#move the error files into a folder that indicates they are errors, it will live in the error box
					try:
						if datastore.updateArchiveManagerJobAsErrored(amRecord) == True:
							logging.debug('Job has finished, but there were some errors')
							logging.debug('PostProcess: will send email')
							info = 'Job %s has some errors! Please see the ErrorBox at %s' % (batchName, errorPath)
							info = info + '\n' + amRecord.errorString
							sendFailureEmail(info)
						else:
							logging.debug('PostProcess: Error saving Job')

						errDirname = os.path.dirname(destinationAMFolder)
						errBasename = os.path.basename(destinationAMFolder) + '_DECRYPTED_ERROR'
						os.rename(destinationAMFolder, os.path.join(errDirname, errBasename))
						destinationAMFolder = os.path.join(errDirname, errBasename)
						# shutil.move(destinationAMFolder, errorPath)
						pathAfterSafelyMovingFolderToDestinationFolder(destinationAMFolder, errorPath)

					except Exception as e:
						info = 'An error occurred when moving the errored files to %s.' % (errorPath,)
						logging.debug(info)
						sendFailureEmail(info)
				else:
					#No errors, move the files to the appropriate place
					print "No Errors finalPath", finalPath
					try:
						logging.debug('PostProcess: moving archive mananger folder to final destination')
						if os.path.exists(os.path.join(finalPath, os.path.basename(destinationAMFolder))):
							logging.debug('PostProcess: collision moving to duplicate box')
							altPath = pathAfterSafelyMovingFileToDestinationFolder(destinationAMFolder, finalPath)
						else:
							shutil.move(destinationAMFolder, finalPath)

						if datastore.updateArchiveManagerJobAsReadyToComplete(amRecord) == True:
							logging.debug('PostProcess: job is ready to complete')
							logging.debug('PostProcess: moving files and sending email')
							info = 'Job %s is complete! All of the files are decrypted and have appropriate matching checksums.' % (batchName)
							sendSuccessEmail(info)
						else:
							logging.debug('PostProcess: Error saving Job')	

					except OSError as e:
						#again, I am accounting for this error, I just don't know why I would ever encounter a situation like this
						info = 'There was a problem moving the folder %s to the outbox. You will have to move the file manually.' % (destinationAMFolder)
						info = info + " " + e.message
						sendFailureEmail(info)
						logging.debug(info)
						continue

			except Exception as e:
				info = 'An error occurred. Please see check the Decryption Queue for job %s. See Error: %s' % (batchName, e.message)
				logging.debug(info)
				sendFailureEmail(info)

		else:
			#LAST CASE FOR SINGLE MODE FILES LIKE ENCRYPTION AND SINGLE MODE DECRYPTION 
			newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, finalPath)	

			if not os.path.exists(newPath):
				logging.debug('PostProcess: Error moving file')
				continue

			logging.debug('PostProcess: Will update record status with Hash string and times')

			success = datastore.updateRecordWithFinalEncryptedPathAndHashForStartTimeAndEndTime(newPath, 
				hashString, startTime, endTime, key_id)

			if success == True:
				# move original file to original box
				try:
					newPath = pathAfterSafelyMovingFileToDestinationFolder(sourceFilePath, finalOriginalDestinationPath)
				except Exception as e:
					logging.debug('There was an error moving the file into place')
					info = 'There was an error moving file %s into the outbox at %s' % (sourceFilePath, finalOriginalDestinationPath)
					sendFailureEmail(info)

				if configurationOptions().shouldDeleteOriginal == True:
					try:
						os.remove(newPath)
					except OSError as e:
						logging.debug('PostProcess: Unable to delete the file', newPath)
Exemple #3
0
def checkSingleFiles(dbPath):
    logging = DefaultLogger()

    if not os.path.exists(dbPath):
        logging.debug('Acquire File: can\'t find database at path')
        return

    datastore = DataStore(dbPath)
    data = datastore.recordsForVerifying()

    for record in data:

        key_id = record.id
        filePath = record.fileName
        recordSize = int(record.fileSize)
        dateModifiedString = record.dateModified
        pathStructureName = record.pathStructureName
        operationType = record.operationType
        isBatch = record.isBatch
        batchName = record.batchName

        dateLastModified = datetime.datetime.strptime(dateModifiedString,
                                                      '%Y-%m-%d %H:%M:%S')
        timeDifference = datetime.datetime.now() - dateLastModified

        #This can change with an if/else should I decide I want to put temp files to be decrypted in another place
        sourcePath = configurationOptions().pathStructureWithName(
            pathStructureName)['inBox']
        workingPath = configurationOptions().pathStructureWithName(
            pathStructureName)['workingBox']

        if timeDifference.seconds < verificationWaitTime:
            continue

        lastSize = recordSize
        currentSize = 0

        if not os.path.exists(filePath):
            logging.debug(
                'Acquire File: Will update record status as the file no longer exists'
            )
            datastore.updateRecordAsMissingWithID(key_id)
            continue

        currentSize = os.path.getsize(filePath)

        if lastSize != currentSize:
            logging.debug(record)
            logging.debug(
                'Acquire File: attempting db modify as file size has changed...'
            )
            datastore.updateRecordWithCurrentSizeAndDateModifiedWithID(
                currentSize,
                datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), key_id)
            continue

        if currentSize == 0:
            continue
            # if the current size is zero, then continue until it isn't or never will be
            # its likely the file has been queued to copy but no data has been moved yet (actual OSX case)

        logging.debug(
            'Acquire File: attempting to lock the file to see if I own the file yet...'
        )

        try:
            fileToCheck = open(filePath, 'rb')
            portalocker.lock(fileToCheck, portalocker.LOCK_EX)
            fileToCheck.close()
            logging.debug(
                'Acquire File: proceeding to update the file status knowing that no one else is using it...'
            )
        except Exception as e:
            logging.debug(
                'Acquire File: unable to lock file as it is likely in use')
            continue

        if datastore.doesTheFilePathExistElseWhereInThePathStructure(
                filePath, operationType, pathStructureName) == True:
            duplicatePath = configurationOptions().pathStructureWithName(
                pathStructureName)['duplicateBox']
            newPath = pathAfterSafelyMovingFileToDestinationFolder(
                filePath, duplicatePath)
            datastore.updateRecordAsDuplicateWithNewPath(newPath, key_id)
            continue

        newPath = filePath
        #Only update
        if isBatch == 1 and operationType == 'Decrypt':
            amRecord = None
            uuidString = fileNameForUUIDFileWithPath(os.path.dirname(filePath))

            if uuidString == None:
                #if I can't resolve the UUID, then resovle it though an AM Record
                #Does file's Archive Manager have data associated with it
                amRecord = datastore.recordWithNumberFromAMJobsTable(batchName)
                if amRecord == None:
                    info = "Acquire File: Archive Manager data doesn't exist for " + filePath
                    info = info + " " + "Marking file as having no AM Data. File will not be moved through the processing queue."
                    logging.debug(info)
                    datastore.updateRecordStatusWithID(
                        datastore.noArchiveManagerDataExistsForRecord(),
                        key_id)
                    continue
            else:
                logging.debug('Updating record %s with UUID %s' %
                              (filePath, uuidString))
                amRecord = datastore.archiveManagerJobsTableRecordWithUUID(
                    uuidString)
                datastore.updateRecordAWithBatchUUIDReference(
                    uuidString, key_id)
        else:
            #at this point, I need to subtract the file's main folder from the pathStructure['inBox']
            #this moves the file from the inbox to the working path
            try:
                newPath = pathAfterSafelyMovingFileToDestinationFolder(
                    filePath, workingPath)
            except Exception as e:
                logging.debug(
                    'This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved'
                )
                logging.debug('Acquire File: Error moving file')
                info = 'There was a problem moving the file into into the queue for: ' + os.path.basename(
                    filePath)
                info = info + '\n' + 'This will require manual intervention as the occurrence is unique.'
                sendFailureEmail(info)
                continue

            logging.debug(
                'Acquire File: updating record file status and path....')
            datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
def checkArchiveManagerJobs(dbPath):
	logging = DefaultLogger()

	datastore = DataStore(dbPath)
	amRecords = datastore.archiveManagerJobsReadyToStart()

	for amRecord in amRecords:
		areAllFilesAvailableAndReady = True
		
		recordsInAMRecord = datastore.recordsForUUID(amRecord.uuid)
		filesInAMRecord = [x.fileName for x in recordsInAMRecord]
		filesInCurrentFolder = []

		try:
			filesInCurrentFolder = os.listdir(amRecord.amPath)
		except Exception as e:
			pass

		isThereAnUnknownFilePresent = False
		for currentFile in filesInCurrentFolder:
			if currentFile not in filesInAMRecord:
				isThereAnUnknownFilePresent = True

		if isThereAnUnknownFilePresent == True:
			logging.debug('Unknown files are present')
			pass
			#report error

		for currentFile in filesInAMRecord:
			logging.debug('%s' % (currentFile))
			lastComponent = os.path.basename(currentFile)
			if lastComponent not in filesInCurrentFolder:
				logging.debug('The following file is not yet available: %s' % (lastComponent))
				areAllFilesAvailableAndReady = False

		if areAllFilesAvailableAndReady == False:
			logging.debug('Not all of the files are staged yet')
			continue

		canLockAllRecords = True

		data = datastore.recordsForUUID(amRecord.uuid)

		for record in data:
			
			filePath = record.fileName

			try:
				fileToCheck = open(filePath, 'rb')
				portalocker.lock(fileToCheck, portalocker.LOCK_EX)
				fileToCheck.close()
				logging.debug('Acquire File: proceeding to update the file status knowing that no one else is using it...')
			except Exception as e:
				logging.debug('Acquire File: unable to lock file as it is likely in use')
				canLockAllRecords = False

		if canLockAllRecords == False:
			logging.debug('Can not lock all of the records yet')
			continue

		for record in data:

			key_id 				= record.id
			filePath 			= record.fileName
			recordSize 			= int(record.fileSize)
			dateModifiedString 	= record.dateModified
			pathStructureName 	= record.pathStructureName
			operationType		= record.operationType
			isBatch				= record.isBatch
			batchName			= record.batchName
			pathStructureName 	= record.pathStructureName

			newPath = filePath
			workingPath = configurationOptions().pathStructureWithName(pathStructureName)['workingBox']

			proposedBatchName = batchName + "_" + os.path.basename(filePath)
			proposedPath = os.path.join(os.path.dirname(filePath), proposedBatchName) 

			#we prepend the job name to the file here as it belongs to a batch
			try:
				if os.path.exists(proposedPath):
					raise Exception('file already exists')
				os.rename(filePath, proposedPath)
				filePath = proposedPath
			except Exception as e:
				#this is an unlikely occurrence
				info = 'There is a duplicate file in the queue for: ' + os.path.basename(filePath) + " " + e.message
				logging.debug(info)
				sendFailureEmail(info)
				continue

			#at this point, I need to subtract the file's main folder from the pathStructure['inBox']
			#this moves the file from the inbox to the working path
			try:
				newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, workingPath)
			except Exception as e:
				logging.debug('This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved')
				logging.debug('Acquire File: Error moving file')
				info = 'There was a problem moving the file into into the queue for: ' + os.path.basename(filePath)
				info = info + '\n' + 'This will require manual intervention as the occurrence is unique.'
				sendFailureEmail(info)
				continue

			logging.debug('Acquire File: updating record file status and path....')
			datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
def decrypt(dbPath):
	'''
	This process examines the database pointed to by dbPath. It 
	Looks for any records which have status 2 and has had a hash value calculated for it.
	'''
	logging = DefaultLogger()

	if not os.path.exists(dbPath):
		logging.debug('Decryptor: can\'t find database at path')
		return

	datastore = DataStore(dbPath)
	loopcount = 0

	while True:
		sleep(5)

		if loopcount % 10 == 0:
			logging.debug('Decryptor Process is alive')
		loopcount += 1

		data = datastore.recordsReadyToDecrypt()
		for record in data:
			logging.debug(record)

			key_id 			   = record.id
			filePath 		   = record.fileName
			pathStructureName  = record.pathStructureName	
			isBatch			   = record.isBatch
			batchName		   = record.batchName

			if not os.path.exists(filePath):
				logging.debug('Decryptor: will update record status as the file no longer exists')
				datastore.updateRecordAsMissingWithID(key_id)
			else:
				options = configurationOptions()
				currentPathStructure = options.pathStructureWithName(pathStructureName)
				decryptionErrorPath = currentPathStructure['errorBox']
				decryptionInterimPath = currentPathStructure['interimBox']
				options.inputPath = filePath

				decryptedFilePath = os.path.join(decryptionInterimPath, os.path.basename(filePath))
				operationStart = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
				nextStatusValue = datastore.operationCompleteStatusCode()

				message = 'Decryptor: decrypting file ' + filePath
				logging.debug(message)

				##UPDATE OPERATION START
				datastore.updateRecordStatusWithOperationStart(operationStart, key_id)

				args = [options.decryptorApplicationPath, filePath, decryptedFilePath]
				process = subprocess.Popen(args)
				out, err = process.communicate()
				returnCode = process.returncode

				message = 'Decryptor: decrypted file with return code ' +  str(returnCode)
				logging.debug(message)

				operationStop = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

				if returnCode != 0:
					info = 'An error occurred with the Decryption operation when decrypting %s.' % (filePath)
					logging.debug(info)
					
					operationStart = datetime.datetime(2000,1,1)
					operationStop = datetime.datetime(2000,1,1)

					if isBatch == 0:
						
						nextStatusValue = datastore.operationFailedStatusCode()

						if os.path.abspath(os.path.dirname(filePath)) != os.path.abspath(decryptionErrorPath):
							logging.debug('moving file to error path')
							if os.path.exists(decryptionErrorPath):
								# shutil.move(filePath, decryptionErrorPath)
								# newPath = os.path.join(decryptionErrorPath, os.path.basename(filePath))
								newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, decryptionErrorPath)
								if not os.path.exists(newPath):
									logging.debug('Decryptor: Error moving file')
									nextStatusValue = datastore.errorMovingFileStatusCode()
							else:
								logging.debug('Decryptor: decryptionErrorPath doesnt exist')
								nextStatusValue = datastore.errorPathDoesntExistStatusCode()
					else:
						#don't move batch files, just update the batch's am errorString to reflect the problem
						#the file's checksum will fail
						#we don't update the batch file's status
						amRecord = datastore.recordWithNumberFromAMJobsTable(batchName)
						if amRecord == None:
							#This should not happen as we don't even allow for the logic to proceed to this point without
							#a valid Archive Manager Record
							info = 'An error occurred where no data was found for the Archive Manager job ' + batchName + '\n'
							info = info + 'This error should not happen. Please check ' + os.path.dirname(filePath) + '\n'
							info = info + 'The files will need to be manually removed from the Decryption Queue.'
							logging.debug(info)
							sendFailureEmail(info)
							continue
						
						errorString = 'A problem was encountered while decrypting %s.' % (filePath)
						errorString = errorString + 'The file\'s checksum will be calculated and compared against that in Daisy should the error have occurred ater the file was decrypted.'
						if amRecord.errorString != '':
							amRecord.errorString = amRecord.errorString + '\n' + errorString
						else:
							amRecord.errorString = errorString

						datastore.updateArchiveManagerJobErrorString(amRecord, amRecord.errorString)

				# we update the status value
				datastore.updateRecordStatusWithDecryptedFileNameAndStartAndEndTime(nextStatusValue, decryptedFilePath, operationStart, operationStop, key_id)
Exemple #6
0
def checkArchiveManagerJobs(dbPath):
    logging = DefaultLogger()

    datastore = DataStore(dbPath)
    amRecords = datastore.archiveManagerJobsReadyToStart()

    for amRecord in amRecords:
        areAllFilesAvailableAndReady = True

        recordsInAMRecord = datastore.recordsForUUID(amRecord.uuid)
        filesInAMRecord = [x.fileName for x in recordsInAMRecord]
        filesInCurrentFolder = []

        try:
            filesInCurrentFolder = os.listdir(amRecord.amPath)
        except Exception as e:
            pass

        isThereAnUnknownFilePresent = False
        for currentFile in filesInCurrentFolder:
            if currentFile not in filesInAMRecord:
                isThereAnUnknownFilePresent = True

        if isThereAnUnknownFilePresent == True:
            logging.debug('Unknown files are present')
            pass
            #report error

        for currentFile in filesInAMRecord:
            logging.debug('%s' % (currentFile))
            lastComponent = os.path.basename(currentFile)
            if lastComponent not in filesInCurrentFolder:
                logging.debug('The following file is not yet available: %s' %
                              (lastComponent))
                areAllFilesAvailableAndReady = False

        if areAllFilesAvailableAndReady == False:
            logging.debug('Not all of the files are staged yet')
            continue

        canLockAllRecords = True

        data = datastore.recordsForUUID(amRecord.uuid)

        for record in data:

            filePath = record.fileName

            try:
                fileToCheck = open(filePath, 'rb')
                portalocker.lock(fileToCheck, portalocker.LOCK_EX)
                fileToCheck.close()
                logging.debug(
                    'Acquire File: proceeding to update the file status knowing that no one else is using it...'
                )
            except Exception as e:
                logging.debug(
                    'Acquire File: unable to lock file as it is likely in use')
                canLockAllRecords = False

        if canLockAllRecords == False:
            logging.debug('Can not lock all of the records yet')
            continue

        for record in data:

            key_id = record.id
            filePath = record.fileName
            recordSize = int(record.fileSize)
            dateModifiedString = record.dateModified
            pathStructureName = record.pathStructureName
            operationType = record.operationType
            isBatch = record.isBatch
            batchName = record.batchName
            pathStructureName = record.pathStructureName

            newPath = filePath
            workingPath = configurationOptions().pathStructureWithName(
                pathStructureName)['workingBox']

            proposedBatchName = batchName + "_" + os.path.basename(filePath)
            proposedPath = os.path.join(os.path.dirname(filePath),
                                        proposedBatchName)

            #we prepend the job name to the file here as it belongs to a batch
            try:
                if os.path.exists(proposedPath):
                    raise Exception('file already exists')
                os.rename(filePath, proposedPath)
                filePath = proposedPath
            except Exception as e:
                #this is an unlikely occurrence
                info = 'There is a duplicate file in the queue for: ' + os.path.basename(
                    filePath) + " " + e.message
                logging.debug(info)
                sendFailureEmail(info)
                continue

            #at this point, I need to subtract the file's main folder from the pathStructure['inBox']
            #this moves the file from the inbox to the working path
            try:
                newPath = pathAfterSafelyMovingFileToDestinationFolder(
                    filePath, workingPath)
            except Exception as e:
                logging.debug(
                    'This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved'
                )
                logging.debug('Acquire File: Error moving file')
                info = 'There was a problem moving the file into into the queue for: ' + os.path.basename(
                    filePath)
                info = info + '\n' + 'This will require manual intervention as the occurrence is unique.'
                sendFailureEmail(info)
                continue

            logging.debug(
                'Acquire File: updating record file status and path....')
            datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
Exemple #7
0
def checkSingleFiles(dbPath):
	logging = DefaultLogger()

	if not os.path.exists(dbPath):
		logging.debug('Acquire File: can\'t find database at path')
		return
	
	datastore = DataStore(dbPath)
	data = datastore.recordsForVerifying()

	for record in data:

		key_id 				= record.id
		filePath 			= record.fileName
		recordSize 			= int(record.fileSize)
		dateModifiedString 	= record.dateModified

		dateLastModified = datetime.datetime.strptime(dateModifiedString, '%Y-%m-%d %H:%M:%S')
		timeDifference = datetime.datetime.now() - dateLastModified

		#This can change with an if/else should I decide I want to put temp files to be decrypted in another place
		sourcePath = configurationOptions().defaultPathStructure()['inBox']
		workingPath = configurationOptions().defaultPathStructure()['workingBox']

		if timeDifference.seconds < verificationWaitTime:
			continue

		lastSize = recordSize
		currentSize = 0

		if not os.path.exists(filePath):
			logging.debug('Acquire File: Will update record status as the file no longer exists')
			datastore.updateRecordAsMissingWithID(key_id)
			continue

		currentSize = os.path.getsize(filePath)

		if lastSize != currentSize:
			logging.debug(record)
			logging.debug('Acquire File: attempting db modify as file size has changed...')
			datastore.updateRecordWithCurrentSizeAndDateModifiedWithID(currentSize, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), key_id)
			continue

		if currentSize == 0:
			continue
			# if the current size is zero, then continue until it isn't or never will be
			# its likely the file has been queued to copy but no data has been moved yet (actual OSX case) 
			
		logging.debug('Acquire File: attempting to lock the file to see if I own the file yet...')

		try:
			fileToCheck = open(filePath, 'rb')
			portalocker.lock(fileToCheck, portalocker.LOCK_EX)
			fileToCheck.close()
			logging.debug('Acquire File: proceeding to update the file status knowing that no one else is using it...')
		except Exception as e:
			logging.debug('Acquire File: unable to lock file as it is likely in use')
			continue

		#must test that file doesn't exist elsewhere in the path

		newPath = filePath
		try:
			newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, workingPath)
		except Exception as e:
			info = '''This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved'''
			logging.debug(info)
			logging.debug('Acquire File: Error moving file')
			info = 'There was a problem moving the file into into the queue for: ' + os.path.basename(filePath)
			info = info + '\n' + 'This will require manual intervention as the occurrence is unique.'
			#SEND FAILURE EMAIL
			continue

		logging.debug('Acquire File: updating record file status and path....')
		datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
Exemple #8
0
def analyzeBWFFile(dbPath, identifier=1):

    logging = DefaultLogger()
    loopcount = 0
    datastore = DataStore(dbPath)

    try:

        while True:

            sleep(60 + random.randint(1, 10))

            if loopcount % 20 == 0:
                logging.debug(
                    'bwf analyzer loop {} is active...'.format(identifier))
            loopcount += 1

            if not os.path.exists(dbPath):
                logging.debug('Acquire File: can not find database at path')
                return

            record = None

            #if daisy is not up then just wait until it is
            if isDaisyUp() == False:
                logging.debug('Daisy does not appear to be up')
                continue

            #get a lock on the file
            lock = lockWithFile()
            try:
                lock.acquire(timeout=-1)
                if lock.i_am_locking():
                    record = datastore.oneRecordReadyForProcessing()
                    if record != None:
                        logging.debug(
                            'process {} is acquiring the lock'.format(
                                identifier))
                        datastore.updateRecordAsInProcess(record.id)
                lock.release()
            except Exception as e:
                pass

            if record == None:
                continue

            filePath = record.fileName

            #lets check that is has a genuine Daisy Number
            if getDaisyNumber(os.path.basename(filePath)) == None:
                errorBox = configurationOptions().defaultPathStructure(
                )['errorBox']
                errorBox = os.path.expanduser(errorBox)
                sendProcessFailureMessage({
                    'subject':
                    'BWF Error: file added that has no DANumber',
                    'message':
                    'A file, %s, was deposited that does not have a Daisy Number'
                    % (os.path.basename(filePath))
                })

                #move to errorBox
                try:
                    print "Moving file %s into %s" % (filePath, errorBox)
                    newPath = pathAfterSafelyMovingFileToDestinationFolder(
                        filePath, errorBox)
                except Exception as e:
                    logging.debug('Analyze File: Error moving file')
                    info = '''This should not happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved'''
                    logging.debug(info)
                    info = 'There was a problem moving the file into into the errorBox for: ' + os.path.basename(
                        filePath)
                    info = info + '\n' + 'This will require manual intervention as the occurrence is unique.'
                    sendProcessFailureMessage({
                        'subject': 'BWF Error',
                        'message': info
                    })
                    logging.debug(info)

                datastore.updateRecordAsNotHavingADaisyNumber(record.id)
                continue

            #lets look up metadata before we even proceed, if can't get the metadata we don't want to analyze files
            dataTuple = retrieveDataForDANumber(os.path.basename(filePath),
                                                identifier)
            logging.debug('Data for {} Before: {}'.format(
                os.path.basename(filePath), dataTuple))

            if dataTuple == None:
                #ok, lets send an email that will be sent at a maximum of 1 per 4 hours
                result = "Process Error: Daisy Information not Available:" + e.message
                sendPeriodicFailureMessage(result)
                logging.debug('A Periodic Failure Message attempt was made.')
                continue

            result = None
            resultObject = None
            vendor = dataTuple[0]
            comments = dataTuple[1]
            status = dataTuple[2]

            #once we have the metadata, lets examine the file
            try:
                logging.debug('Will examine %s in loop %s' %
                              (filePath, str(identifier)))
                resultObject = multiChannelBWFFileAnalysis(filePath)
                result = json.dumps(resultObject)
                if resultObject == None:
                    logging.debug(
                        'The analysis of the file %s is "None". This should not occur.'
                        % (filePath))
                    raise Exception(
                        'The analysis of the file %s is "None". This should not occur.'
                        % (filePath))
            except Exception as e:
                logging.debug(
                    'An exception occurred with %s in identifier %s.' %
                    (filePath, str(identifier)))
                #mark as error
                datastore.updateRecordWithAnalysisError(record.id)
                errorBox = configurationOptions().defaultPathStructure(
                )['errorBox']
                errorBox = os.path.expanduser(errorBox)

                #send email
                result = "Process Error: An Exception occurred when processing the file: %s. The file will be moved to %s" % (
                    e.message, errorBox)
                logging.debug(result)
                sendProcessFailureMessage({
                    'subject': 'Process Error',
                    'message': result
                })

                #move to errorBox
                try:
                    print "Moving file %s into %s" % (filePath, errorBox)
                    logging.debug("Moving file %s into %s" %
                                  (filePath, errorBox))
                    newPath = pathAfterSafelyMovingFileToDestinationFolder(
                        filePath, errorBox)
                except Exception as e:
                    logging.debug('Analyze File: Error moving file')
                    info = '''This should not happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved'''
                    logging.debug(info)
                    info = 'There was a problem moving the file into into the errorBox for: ' + os.path.basename(
                        filePath)
                    info = info + '\n' + 'This will require manual intervention as the occurrence is unique.'
                    sendProcessFailureMessage({
                        'subject': 'Process Error Moving File',
                        'message': info
                    })
                    logging.debug(info)
                continue

            info = 'PreExisting Data for the following file %s: %s %s %s' % (
                os.path.basename(filePath), comments, vendor, status)
            logging.debug(info)

            resultObject['vendor'] = vendor

            #The result object is not None as we would have bailed otherwise

            if resultObject['result'] == 'success':
                if comments == None:
                    comments = ''

                #update Comments
                comments = stringMinusBWFAnalyzerInfo(comments)
                if comments != '':
                    comments += " "
                comments += BWFAnalyzerInfoForSuccess(
                    os.path.basename(filePath))
                success = setComments(
                    getDaisyNumber(os.path.basename(filePath)), comments)
                #update local datastore
                datastore.updateRecordWithComments(comments, record.id)

                #did we successfully update the comments?
                if success == True:
                    #update comments field in db and set to success
                    logging.debug('updating comments successfully')
                    datastore.successfullyUpdatedDaisyComments(record.id)
                else:
                    #put infor in db that you couldn't update Daisy
                    logging.debug('not updating comments successfully')
                    datastore.failedToUpdateDaisyComments(record.id)

                #update the status to pending fix
                #only if the status is Needs Attention, otherwise we don't have any further knowledge of what is going on
                nextStatus = 'NO CHANGE'
                success = True
                if status == "Needs Attention":
                    #ok to update status
                    success = setStatusAsPendingFix(
                        getDaisyNumber(os.path.basename(filePath)))
                    nextStatus = 'Pending Fix'

                if status in ['Being Made', 'Ordered']:
                    #ok to update status
                    success = setStatusAsPendingArchive(
                        getDaisyNumber(os.path.basename(filePath)))
                    nextStatus = 'Pending Archive'

                datastore.updateRecordWithDaisyStatus(nextStatus, record.id)
                if success == True:
                    #update staus field in db and set to success
                    logging.debug('updating status successfully')
                    datastore.successfullyUpdatedDaisyStatus(record.id)
                else:
                    #put infor in db that you couldn't update status in Daisy
                    logging.debug('not updating status successfully')
                    datastore.failedToUpdateDaisyStatus(record.id)

            else:
                sendAnalysisFailure(resultObject)

                if comments == None:
                    comments = ''

                #update Comments
                comments = stringMinusBWFAnalyzerInfo(comments)
                if comments != '':
                    comments += " "
                comments += BWFAnalyzerInfoForErrors(resultObject['errors'])
                success = setComments(
                    getDaisyNumber(os.path.basename(filePath)), comments)

                #update local datastore
                datastore.updateRecordWithComments(comments, record.id)

                if success == True:
                    #update comments field in db and set to success
                    logging.debug('updating comments successfully')
                    datastore.successfullyUpdatedDaisyComments(record.id)
                else:
                    #put infor in db that you couldn't update Daisy
                    logging.debug('not updating comments successfully')
                    datastore.failedToUpdateDaisyComments(record.id)

                #update Status
                if status not in ['Being Made', 'Ordered', 'Pending Archive']:
                    #ok to update status
                    success = setStatusAsNeedsAttention(
                        getDaisyNumber(os.path.basename(filePath)))
                    datastore.updateRecordWithDaisyStatus(
                        'Needs Attention', record.id)
                    if success == True:
                        #update staus field in db and set to success
                        logging.debug('updating status successfully')
                        datastore.successfullyUpdatedDaisyStatus(record.id)
                    else:
                        #put infor in db that you couldn't update status in Daisy
                        logging.debug('not updating status successfully')
                        datastore.failedToUpdateDaisyStatus(record.id)
                else:
                    success = setStatusAsPendingArchive(
                        getDaisyNumber(os.path.basename(filePath)))
                    datastore.updateRecordWithDaisyStatus(
                        'Pending Archive', record.id)
                    if success == True:
                        #update status field in db and set to success
                        logging.debug('updating status successfully')
                        datastore.successfullyUpdatedDaisyStatus(record.id)
                    else:
                        #put infor in db that you couldn't update status in Daisy
                        logging.debug('not updating status successfully')
                        datastore.failedToUpdateDaisyStatus(record.id)

            if datastore.updateRecordWithAnalysisData(result,
                                                      record.id) == False:
                info = 'Unable to save record %d %s' % (record.id, result)
                sendProcessFailureMessage({
                    'subject': 'Process Error Unable To Save Record',
                    'message': info
                })
                continue

            #update vendor info
            datastore.updateRecordWithVendor(vendor, record.id)

            dataTuple = retrieveDataForDANumber(os.path.basename(filePath),
                                                identifier)
            logging.debug('Data for {} After: {}'.format(
                os.path.basename(filePath), dataTuple))

            #now that we have saved the data, we are ready to move the file
            nextBox = configurationOptions().defaultPathStructure()['outBox']
            if resultObject['result'] != 'success':
                nextBox = configurationOptions().defaultPathStructure(
                )['failBox']
            nextBox = os.path.expanduser(nextBox)

            newPath = filePath

            try:
                newPath = pathAfterSafelyMovingFileToDestinationFolder(
                    filePath, nextBox)
            except Exception as e:
                logging.debug('Analyze File: Error moving file')
                info = '''This should not happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved'''
                logging.debug(info)
                info = 'There was a problem moving the file into into ' + nextBox + ' for: ' + os.path.basename(
                    filePath)
                info = info + '\n' + 'This will require manual intervention as the occurrence is unique.'
                sendProcessFailureMessage({
                    'subject': 'Process Error Moving File',
                    'message': info
                })
                continue

            logging.debug(
                'Analyze File: preparing to move file to final path...')
            datastore.updateRecordAsCompleteWithFinalPath(newPath, record.id)

    except Exception as e:
        info = 'Exception in analyzeBWFFile' + e.message
        logging.debug(info)
        sendProcessFailureMessage({'subject': 'Exception!', 'message': info})
def analyzeBWFFile(dbPath, identifier = 1):
	
	logging = DefaultLogger()
	loopcount = 0
	datastore = DataStore(dbPath)

	try:

		while True:

			sleep(60+random.randint(1,10))

			if loopcount % 20 == 0:
				logging.debug('bwf analyzer loop {} is active...'.format(identifier))
			loopcount += 1

			if not os.path.exists(dbPath):
				logging.debug('Acquire File: can not find database at path')
				return
		
			record =  None

			#if daisy is not up then just wait until it is
			if isDaisyUp() == False:
				logging.debug('Daisy does not appear to be up')
				continue

			#get a lock on the file
			lock = lockWithFile()
			try:
				lock.acquire(timeout=-1)
				if lock.i_am_locking():  
					record = datastore.oneRecordReadyForProcessing()
					if record != None:
						logging.debug('process {} is acquiring the lock'.format(identifier))
						datastore.updateRecordAsInProcess(record.id)
				lock.release()
			except Exception as e:
				pass

			if record == None:
				continue

			filePath = record.fileName

			#lets check that is has a genuine Daisy Number
			if getDaisyNumber(os.path.basename(filePath)) == None:
				errorBox = configurationOptions().defaultPathStructure()['errorBox']
				errorBox = os.path.expanduser(errorBox)	
				sendProcessFailureMessage({'subject':'BWF Error: file added that has no DANumber', 'message':'A file, %s, was deposited that does not have a Daisy Number' % (os.path.basename(filePath))})
				
				#move to errorBox
				try:
					print "Moving file %s into %s" % (filePath, errorBox)
					newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, errorBox)
				except Exception as e:
					logging.debug('Analyze File: Error moving file')
					info = '''This should not happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved'''
					logging.debug(info)
					info = 'There was a problem moving the file into into the errorBox for: ' + os.path.basename(filePath)
					info = info + '\n' + 'This will require manual intervention as the occurrence is unique.'
					sendProcessFailureMessage({'subject':'BWF Error', 'message':info})
					logging.debug(info)

				datastore.updateRecordAsNotHavingADaisyNumber(record.id)
				continue

			#lets look up metadata before we even proceed, if can't get the metadata we don't want to analyze files
			dataTuple = retrieveDataForDANumber(os.path.basename(filePath), identifier)
			logging.debug('Data for {} Before: {}'.format(os.path.basename(filePath), dataTuple))

			if dataTuple == None:
				#ok, lets send an email that will be sent at a maximum of 1 per 4 hours
				result = "Process Error: Daisy Information not Available:" + e.message
				sendPeriodicFailureMessage(result)
				logging.debug('A Periodic Failure Message attempt was made.')
				continue

			result = None
			resultObject = None
			vendor = dataTuple[0]
			comments = dataTuple[1]
			status = dataTuple[2]
			
			#once we have the metadata, lets examine the file
			try:
				logging.debug('Will examine %s in loop %s' % (filePath, str(identifier)))
				resultObject = multiChannelBWFFileAnalysis(filePath)
				result = json.dumps(resultObject)
				if resultObject == None:
					logging.debug('The analysis of the file %s is "None". This should not occur.' % (filePath))
					raise Exception('The analysis of the file %s is "None". This should not occur.' % (filePath))
			except Exception as e:
				logging.debug('An exception occurred with %s in identifier %s.' % (filePath, str(identifier)))
				#mark as error
				datastore.updateRecordWithAnalysisError(record.id)
				errorBox = configurationOptions().defaultPathStructure()['errorBox']
				errorBox = os.path.expanduser(errorBox)
				
				#send email
				result = "Process Error: An Exception occurred when processing the file: %s. The file will be moved to %s" % (e.message, errorBox)
				logging.debug(result)
				sendProcessFailureMessage({'subject':'Process Error', 'message':result})

				#move to errorBox
				try:
					print "Moving file %s into %s" % (filePath, errorBox)
					logging.debug("Moving file %s into %s" % (filePath, errorBox))
					newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, errorBox)
				except Exception as e:
					logging.debug('Analyze File: Error moving file')
					info = '''This should not happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved'''
					logging.debug(info)
					info = 'There was a problem moving the file into into the errorBox for: ' + os.path.basename(filePath)
					info = info + '\n' + 'This will require manual intervention as the occurrence is unique.'
					sendProcessFailureMessage({'subject':'Process Error Moving File', 'message':info})
					logging.debug(info)
				continue

			info = 'PreExisting Data for the following file %s: %s %s %s' % (os.path.basename(filePath), comments, vendor, status)
			logging.debug(info)

			resultObject['vendor'] = vendor

			#The result object is not None as we would have bailed otherwise

			if resultObject['result'] == 'success':
				if comments == None:
					comments = ''

				#update Comments 
				comments = stringMinusBWFAnalyzerInfo(comments)
				if comments != '':
					comments += " "
				comments += BWFAnalyzerInfoForSuccess(os.path.basename(filePath))
				success = setComments(getDaisyNumber(os.path.basename(filePath)), comments)
				#update local datastore
				datastore.updateRecordWithComments(comments, record.id)

				#did we successfully update the comments?
				if success == True:
					#update comments field in db and set to success
					logging.debug('updating comments successfully')
					datastore.successfullyUpdatedDaisyComments(record.id)
				else:
					#put infor in db that you couldn't update Daisy
					logging.debug('not updating comments successfully')
					datastore.failedToUpdateDaisyComments(record.id)

				#update the status to pending fix
				#only if the status is Needs Attention, otherwise we don't have any further knowledge of what is going on
				nextStatus = 'NO CHANGE'
				success = True
				if status == "Needs Attention":
					#ok to update status
					success = setStatusAsPendingFix(getDaisyNumber(os.path.basename(filePath)))
					nextStatus = 'Pending Fix'

				if status in ['Being Made', 'Ordered']:
					#ok to update status
					success = setStatusAsPendingArchive(getDaisyNumber(os.path.basename(filePath)))
					nextStatus = 'Pending Archive'
					
				datastore.updateRecordWithDaisyStatus(nextStatus, record.id)
				if success == True:
					#update staus field in db and set to success
					logging.debug('updating status successfully')
					datastore.successfullyUpdatedDaisyStatus(record.id)
				else:
					#put infor in db that you couldn't update status in Daisy		
					logging.debug('not updating status successfully')
					datastore.failedToUpdateDaisyStatus(record.id)

			else:
				sendAnalysisFailure(resultObject)

				if comments == None:
					comments = ''

				#update Comments 
				comments = stringMinusBWFAnalyzerInfo(comments)
				if comments != '':
					comments += " "
				comments += BWFAnalyzerInfoForErrors(resultObject['errors'])
				success = setComments(getDaisyNumber(os.path.basename(filePath)), comments)
				
				#update local datastore
				datastore.updateRecordWithComments(comments, record.id)

				if success == True:
					#update comments field in db and set to success
					logging.debug('updating comments successfully')
					datastore.successfullyUpdatedDaisyComments(record.id)
				else:
					#put infor in db that you couldn't update Daisy
					logging.debug('not updating comments successfully')
					datastore.failedToUpdateDaisyComments(record.id)

				#update Status
				if status not in ['Being Made', 'Ordered', 'Pending Archive']:
					#ok to update status
					success = setStatusAsNeedsAttention(getDaisyNumber(os.path.basename(filePath)))
					datastore.updateRecordWithDaisyStatus('Needs Attention', record.id)
					if success == True:
						#update staus field in db and set to success
						logging.debug('updating status successfully')
						datastore.successfullyUpdatedDaisyStatus(record.id)
					else:
						#put infor in db that you couldn't update status in Daisy
						logging.debug('not updating status successfully')		
						datastore.failedToUpdateDaisyStatus(record.id)
				else:
					success = setStatusAsPendingArchive(getDaisyNumber(os.path.basename(filePath)))
					datastore.updateRecordWithDaisyStatus('Pending Archive', record.id)
					if success == True:
						#update status field in db and set to success
						logging.debug('updating status successfully')
						datastore.successfullyUpdatedDaisyStatus(record.id)
					else:
						#put infor in db that you couldn't update status in Daisy
						logging.debug('not updating status successfully')		
						datastore.failedToUpdateDaisyStatus(record.id)

			if datastore.updateRecordWithAnalysisData(result, record.id) == False:
				info = 'Unable to save record %d %s' % (record.id, result) 
				sendProcessFailureMessage({'subject':'Process Error Unable To Save Record', 'message':info})
				continue

			#update vendor info
			datastore.updateRecordWithVendor(vendor, record.id)

			dataTuple = retrieveDataForDANumber(os.path.basename(filePath), identifier)
			logging.debug('Data for {} After: {}'.format(os.path.basename(filePath),dataTuple))

			#now that we have saved the data, we are ready to move the file
			nextBox = configurationOptions().defaultPathStructure()['outBox']
			if resultObject['result'] != 'success':
				nextBox = configurationOptions().defaultPathStructure()['failBox']
			nextBox = os.path.expanduser(nextBox)

			newPath = filePath

			try:
				newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, nextBox)
			except Exception as e:
				logging.debug('Analyze File: Error moving file')
				info = '''This should not happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved'''
				logging.debug(info)
				info = 'There was a problem moving the file into into ' + nextBox + ' for: ' + os.path.basename(filePath)
				info = info + '\n' + 'This will require manual intervention as the occurrence is unique.'
				sendProcessFailureMessage({'subject':'Process Error Moving File', 'message':info})
				continue

			logging.debug('Analyze File: preparing to move file to final path...')
			datastore.updateRecordAsCompleteWithFinalPath(newPath, record.id)

	except Exception as e:
		info = 'Exception in analyzeBWFFile' + e.message
		logging.debug(info)
		sendProcessFailureMessage({'subject':'Exception!', 'message':info})
def encrypt(dbPath):
	'''
	This process examines the database pointed to by dbPath. It 
	Looks for any records which have status 2 and has had a hash value calculated for it.
	'''
	logging = DefaultLogger()

	if not os.path.exists(dbPath):
		logging.debug('can\'t find database at path')
		return

	datastore = DataStore(dbPath)
	loopcount = 0

	while True:
		sleep(5)

		if loopcount % 10 == 0:
			logging.debug('Encryptor Process is alive')
		loopcount += 1

		data = datastore.recordsReadyToEncrypt()
		for record in data:
			logging.debug(record)

			key_id = record.id
			filePath = record.fileName	
			pathStructureName = record.pathStructureName

			if not os.path.exists(filePath):
				logging.debug('Encryptor: will update record status as the file no longer exists')
				datastore.updateRecordAsMissingWithID(key_id)
			else:
				options = configurationOptions()
				currentPathStructure = options.pathStructureWithName(pathStructureName)
				encryptionErrorPath = currentPathStructure['errorBox']
				encryptionInterimPath = currentPathStructure['interimBox']

				encryptedFilePath = os.path.join(encryptionInterimPath, os.path.basename(filePath))
				encryptionStart = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
				nextStatusValue = datastore.operationCompleteStatusCode()

				options.inputPath = filePath
				options.destinationPath = os.path.dirname(encryptedFilePath)

				##UPDATE THAT ENCRYPTION STARTS HERE
				datastore.updateRecordStatusWithOperationStart(encryptionStart, key_id)

				message = 'Encryptor: encrypting file ' + filePath
				logging.debug(message)

				#there is a bug with MediaSeal when encrypting an encrypted file,
				#this checks for this so that MediaSeal doesn't blow away the file.
				returnCode = -7
				fileToEncrypt = None
				try:
					fileToEncrypt = open(filePath, 'rb')
					portalocker.lock(fileToEncrypt, portalocker.LOCK_SH)
					returnCode = singleShotEncryptor(options)
				except Exception as e:
					logging.debug('unable to lock file')

				if fileToEncrypt is not None:
					fileToEncrypt.close()

				message = 'Encryptor: encrypted file with return code ' +  str(returnCode)
				logging.debug(message)

				encryptionStop = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

				#try again should the connection be bad
				if returnCode == 2:
					sleep(5)
					returnCode = singleShotEncryptor(options)
					encryptionStart = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

				#as we are encrypting single files, we can leave this logic the same
				if returnCode != 0:
					info = "There was a problem encrypting " + filePath + ". Encountered Error Code: " + str(returnCode) + ". The file will be moved to the path's Error box: " + encryptionErrorPath 
					sendFailureEmail(info)

					nextStatusValue = datastore.operationFailedStatusCode()
					encryptionStart = datetime.datetime(2000,1,1)
					encryptionStop = datetime.datetime(2000,1,1)

					if os.path.abspath(os.path.dirname(filePath)) != os.path.abspath(encryptionErrorPath):
						logging.debug('moving file to error path')
						if os.path.exists(encryptionErrorPath):
							try:
								newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, encryptionErrorPath)
							except Exception as e:
								logging.debug('Encryptor: Error moving file')
								
							nextStatusValue = datastore.errorMovingFileStatusCode()
						else:
							logging.debug('Encryptor: encryptionErrorPath doesnt exist')
							nextStatusValue = datastore.errorPathDoesntExistStatusCode()

				datastore.updateRecordStatusWithEncryptedFileNameAndStartAndEndTime(nextStatusValue, encryptedFilePath, encryptionStart, encryptionStop, key_id)
Exemple #11
0
def processRecordsReadyToBeHashed(data, datastore):

    logging = DefaultLogger()

    for record in data:
        logging.debug(record)

        key_id = record.id
        sourceFilePath = record.fileName
        filePath = record.operationFileName
        recordOperationType = record.operationType
        pathStructureName = record.pathStructureName
        isBatch = record.isBatch
        batchName = record.batchName

        currentPathStructure = configurationOptions().pathStructureWithName(
            pathStructureName)
        finalPath = currentPathStructure['outBox']
        finalOriginalDestinationPath = currentPathStructure['originalBox']
        errorPath = currentPathStructure['errorBox']

        if not os.path.exists(filePath):
            # if the processed file doesn't exist, then move update the record and move to the error box
            # ADD LOGIC FOR BATCH PROCESSING
            logging.debug(
                'PostProcess: Will update record status as the encrypted file does not exist'
            )
            newPath = pathAfterSafelyMovingFileToDestinationFolder(
                sourceFilePath, errorPath)
            datastore.updateRecordAsMissingWithFileNameAndID(newPath, key_id)
            continue

        #CALCULATE HASH
        startTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        datastore.updateRecordWithReHashStart(startTime, key_id)
        hashString = 'NO_HASH'

        #only hash files being decrypyted
        if recordOperationType == 'Decrypt':
            try:
                fileToHash = open(filePath, 'rb')
                logging.debug('PostProcess: locked file to calculate hash...')
                portalocker.lock(fileToHash, portalocker.LOCK_SH)
                hashString = hashForFile(fileToHash)
                logging.debug('PostProcess Hasher: unlocking file...')
                fileToHash.close()
            except Exception as e:
                hashString = 'HASH_GEN_ERROR'
        else:
            hashString = "NO_HASH_FOR_ENCRYPTED_FILES"

        endTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

        #ONLY DECRYPTED FILES' HASH IS CHECKED
        didChecksumFail = False
        checkSumErrorString = None

        if recordOperationType == 'Decrypt':
            fileBaseName = os.path.basename(filePath)
            if isBatch:
                fileBaseName = os.path.basename(filePath).split(
                    (batchName + "_"))[1]

            daisyNumber = getDaisyNumber(fileBaseName)

            try:
                errorRecordStatus = 0
                if daisyNumber == None:
                    errorString = 'There was an error Decrypting the file: ' + fileBaseName + '.\n'
                    errorString = errorString + 'Unable to retrieve Daisy Number for ' + filePath + ' ' + batchName
                    logging.debug(errorString)
                    errorRecordStatus = datastore.daisyEntryNotFoundStatusCode(
                    )
                    raise Exception(errorString)

                originalChecksum = DaisyMetadataLookup(
                    daisyNumber).checksumForFile(fileBaseName)

                if originalChecksum == None:
                    errorString = 'There was an error Decrypting the file: ' + fileBaseName + '.\n'
                    errorString = errorString + 'Unable to retrieve Checksum for ' + filePath + ' ' + batchName
                    logging.debug(errorString)
                    errorRecordStatus = datastore.checksumLookupFailedStatusCode(
                    )
                    raise Exception(errorString)

                if originalChecksum.upper() != hashString.upper():
                    errorString = 'Checksums do not match for file ' + filePath + '\n'
                    errorString = errorString + ' ' + batchName + " expected the checksum: " + originalChecksum + '\n'
                    errorString = errorString + " but found this checksum instead:" + hashString
                    logging.debug(errorString)
                    errorRecordStatus = datastore.checksumComparisonFailedStatusCode(
                    )
                    raise Exception(errorString)

            except Exception as checksumException:
                #we have an error, so we must create a new folder in the error path
                #if the file is non-batch, then
                logging.debug(
                    'PostProcess: The checksum failed. Please see the appropriate Error Box'
                )
                checkSumErrorString = 'There was a checksum error.' + '\n' + checksumException.message
                didChecksumFail = True

            #If the file failed a checksum and is not a bacth file, then move it to the error box
            if didChecksumFail == True and isBatch == False:
                errorPathInformation = ''
                try:
                    logging.debug(
                        'PostProcess: creating a Decrypted Checksum folder')
                    errorDestination = createSafeFolderInDestinationFolder(
                        errorPath, 'DECRYPTED_CHECKSUM_ERROR')
                    try:
                        info = 'Moving the file that errored into the folder at ' + errorDestination
                        logging.debug(info)
                        shutil.move(
                            filePath,
                            os.path.join(errorDestination, fileBaseName))
                        errorPathInformation = info
                    except Exception as e:
                        info = "PostProcess: " + e.message + ' an error occurred moving the file: ' + fileBaseName + ' to ' + errorDestination
                        logging.debug(info)
                except Exception as e:
                    info = 'PostProcess: An error occurred when moving the decrypted file in to the Error box'
                    logging.debug(info)

                #THEN MOVE THE ENCRYPTED FILE ASIDE TO THE ERROR BOX
                try:
                    info = 'Moving  the source file into the error box at ' + errorPath
                    logging.debug(info)
                    newPath = pathAfterSafelyMovingFileToDestinationFolder(
                        sourceFilePath, errorPath)
                    errorPathInformation = errorPathInformation + '\n' + info
                except Exception as e:
                    info = "PostProcess: " + e.message + ' an error occurred moving the file: ' + sourceFilePath
                    logging.debug(info)

                datastore.updateRecordStatusWithID(errorRecordStatus, key_id)
                info = checksumException.message + '\n' + errorPathInformation
                logging.debug(info)
                sendFailureEmail(info)
                continue

        #Lets now address the batch decrypted files

        newPath = filePath
        success = False

        if isBatch == True and recordOperationType == 'Decrypt':
            #create the destination folder for the Archive Manager Job
            amRecord = datastore.recordWithNumberFromAMJobsTable(batchName)
            if amRecord is None:
                #This should not happen as we don't even allow for the logic to proceed to this point without
                #a valid Archive Manager Record
                info = 'An error occurred where no data was found for the Archive Manager job ' + batchName + '\n'
                info = info + 'This error should not happen. Please check ' + os.path.dirname(
                    filePath) + '\n'
                info = info + 'The files will need to be manually removed from the Decryption Queue.'
                logging.debug(info)
                sendFailureEmail(info)
                continue

            if didChecksumFail == True:
                #add checksum error string to archive manager job
                amRecord.errorString = amRecord.errorString + '\n' + checkSumErrorString
                datastore.updateArchiveManagerJobErrorString(
                    amRecord, amRecord.errorString)

            #create the new folder in interim where we will push all of the batch files
            destinationAMFolder = os.path.join(os.path.dirname(filePath),
                                               batchName)
            if not os.path.exists(destinationAMFolder):
                try:
                    os.mkdir(destinationAMFolder)
                except OSError as e:
                    pass

            #get the file name, strip leading archive manager number
            originalFileName = os.path.basename(filePath)
            if isBatch == True:
                originalFileName = os.path.basename(filePath).split(
                    (batchName + "_"))[1]

            #this is where we will move the interim file, a new folder with its original name
            proposedAMPath = os.path.join(destinationAMFolder,
                                          originalFileName)

            #at this point the file should be in the a folder named after the batch
            try:
                newPath = pathAfterSafelyMovingFileToDestinationFile(
                    filePath, proposedAMPath)
            except Exception as e:
                info = 'There was an error moving a file at %s for Archive Manager job %s. This will need to be manually addressed.' % (
                    filePath, batchName)
                sendFailureEmail(info)
                continue

            if os.path.basename(originalFileName) != os.path.basename(newPath):
                #there was a collision, there really is no reason why this should happen, but lets account for it
                errorString = 'For some reason, there already exists a file in %s labeled %s' % (
                    destinationAMFolder, originalFileName) + '\n'
                amRecord.errorString = amRecord.errorString + '\n' + errorString
                datastore.updateArchiveManagerJobErrorString(
                    amRecord, amRecord.errorString)

            success = datastore.updateRecordWithFinalEncryptedPathAndHashForStartTimeAndEndTime(
                newPath, hashString, startTime, endTime, key_id)
            currentFiles = visibleFilesInFolder(destinationAMFolder)

            amPath = amRecord.amPath
            filesInJob = amRecord.allFilesInRecord()

            #are we finished, are all the files in place or the batch job?
            try:
                areAllFilesInPlace = True
                for nFile in filesInJob:
                    if nFile not in currentFiles:
                        areAllFilesInPlace = False

                if areAllFilesInPlace == False:
                    continue

                logging.debug('All files are in place')
                try:
                    #remove old source folder
                    logging.debug('PostProcess: removing original inbox')
                    shutil.rmtree(amPath)
                except OSError as e:
                    info = "PostProcess: " + e.message
                    logging.debug(info)
                    info = 'There was a problem removing the folder %s from the inbox after decrypting all of the files in the job.' % (
                        amPath)
                    sendFailureEmail(info)

                #refresh the record
                amRecord = datastore.recordWithNumberFromAMJobsTable(batchName)
                if amRecord is None:
                    #This should not happen as we don't even allow for the logic to proceed to this point without
                    #a valid Archive Manager Record
                    info = 'An error occurred where no data was found for the Archive Manager job ' + batchName + '\n'
                    info = info + 'This error should not happen. Please check ' + destinationAMFolder + '\n'
                    info = info + 'The files will need to be manually removed from the Decryption Queue.'
                    logging.debug(info)
                    sendFailureEmail(info)
                    continue

                #if there is an error, the redirect to the error box
                if amRecord.errorString != '':
                    finalPath = errorPath
                    #move the error files into a folder that indicates they are errors, it will live in the error box
                    try:
                        if datastore.updateArchiveManagerJobAsErrored(
                                amRecord) == True:
                            logging.debug(
                                'Job has finished, but there were some errors')
                            logging.debug('PostProcess: will send email')
                            info = 'Job %s has some errors! Please see the ErrorBox at %s' % (
                                batchName, errorPath)
                            info = info + '\n' + amRecord.errorString
                            sendFailureEmail(info)
                        else:
                            logging.debug('PostProcess: Error saving Job')

                        errDirname = os.path.dirname(destinationAMFolder)
                        errBasename = os.path.basename(
                            destinationAMFolder) + '_DECRYPTED_ERROR'
                        os.rename(destinationAMFolder,
                                  os.path.join(errDirname, errBasename))
                        destinationAMFolder = os.path.join(
                            errDirname, errBasename)
                        # shutil.move(destinationAMFolder, errorPath)
                        pathAfterSafelyMovingFolderToDestinationFolder(
                            destinationAMFolder, errorPath)

                    except Exception as e:
                        info = 'An error occurred when moving the errored files to %s.' % (
                            errorPath, )
                        logging.debug(info)
                        sendFailureEmail(info)
                else:
                    #No errors, move the files to the appropriate place
                    print "No Errors finalPath", finalPath
                    try:
                        logging.debug(
                            'PostProcess: moving archive mananger folder to final destination'
                        )
                        if os.path.exists(
                                os.path.join(
                                    finalPath,
                                    os.path.basename(destinationAMFolder))):
                            logging.debug(
                                'PostProcess: collision moving to duplicate box'
                            )
                            altPath = pathAfterSafelyMovingFileToDestinationFolder(
                                destinationAMFolder, finalPath)
                        else:
                            shutil.move(destinationAMFolder, finalPath)

                        if datastore.updateArchiveManagerJobAsReadyToComplete(
                                amRecord) == True:
                            logging.debug(
                                'PostProcess: job is ready to complete')
                            logging.debug(
                                'PostProcess: moving files and sending email')
                            info = 'Job %s is complete! All of the files are decrypted and have appropriate matching checksums.' % (
                                batchName)
                            sendSuccessEmail(info)
                        else:
                            logging.debug('PostProcess: Error saving Job')

                    except OSError as e:
                        #again, I am accounting for this error, I just don't know why I would ever encounter a situation like this
                        info = 'There was a problem moving the folder %s to the outbox. You will have to move the file manually.' % (
                            destinationAMFolder)
                        info = info + " " + e.message
                        sendFailureEmail(info)
                        logging.debug(info)
                        continue

            except Exception as e:
                info = 'An error occurred. Please see check the Decryption Queue for job %s. See Error: %s' % (
                    batchName, e.message)
                logging.debug(info)
                sendFailureEmail(info)

        else:
            #LAST CASE FOR SINGLE MODE FILES LIKE ENCRYPTION AND SINGLE MODE DECRYPTION
            newPath = pathAfterSafelyMovingFileToDestinationFolder(
                filePath, finalPath)

            if not os.path.exists(newPath):
                logging.debug('PostProcess: Error moving file')
                continue

            logging.debug(
                'PostProcess: Will update record status with Hash string and times'
            )

            success = datastore.updateRecordWithFinalEncryptedPathAndHashForStartTimeAndEndTime(
                newPath, hashString, startTime, endTime, key_id)

            if success == True:
                # move original file to original box
                try:
                    newPath = pathAfterSafelyMovingFileToDestinationFolder(
                        sourceFilePath, finalOriginalDestinationPath)
                except Exception as e:
                    logging.debug(
                        'There was an error moving the file into place')
                    info = 'There was an error moving file %s into the outbox at %s' % (
                        sourceFilePath, finalOriginalDestinationPath)
                    sendFailureEmail(info)

                if configurationOptions().shouldDeleteOriginal == True:
                    try:
                        os.remove(newPath)
                    except OSError as e:
                        logging.debug('PostProcess: Unable to delete the file',
                                      newPath)